国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁 > 學(xué)院 > 開發(fā)設(shè)計 > 正文

ik-analysis for lucene 5.x

2019-11-09 21:07:06
字體:
供稿:網(wǎng)友

package org.wltea.analyzer.lucene; import java.io.IOException; import org.apache.lucene.analysis.Tokenizer;import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;import org.apache.lucene.analysis.tokenattributes.TypeAttribute;import org.wltea.analyzer.core.IKSegmenter;import org.wltea.analyzer.core.Lexeme; public class IKTokenizer extends Tokenizer {     //IK分詞器實(shí)現(xiàn)    PRivate IKSegmenter _IKImplement;          //詞元文本屬性    private final CharTermAttribute termAtt;    //詞元位移屬性    private final OffsetAttribute offsetAtt;    //詞元分類屬性(該屬性分類參考o(jì)rg.wltea.analyzer.core.Lexeme中的分類常量)    private final TypeAttribute typeAtt;    //記錄最后一個詞元的結(jié)束位置    private int endPosition;                /**    public IKTokenizer(Reader in , boolean useSmart){        super(in);        offsetAtt = addAttribute(OffsetAttribute.class);        termAtt = addAttribute(CharTermAttribute.class);        typeAtt = addAttribute(TypeAttribute.class);        _IKImplement = new IKSegmenter(input , useSmart);    }**/          /**     * Lucene 5.x Tokenizer適配器類構(gòu)造函數(shù)     * 實(shí)現(xiàn)最新的Tokenizer接口     * @param useSmart     */    public IKTokenizer(boolean useSmart){        super();        offsetAtt = addAttribute(OffsetAttribute.class);        termAtt = addAttribute(CharTermAttribute.class);        typeAtt = addAttribute(TypeAttribute.class);        _IKImplement = new IKSegmenter(input , useSmart);    }      /* (non-Javadoc)     * @see org.apache.lucene.analysis.TokenStream#incrementToken()     */    @Override    public boolean incrementToken() throws IOException {        //清除所有的詞元屬性        clearAttributes();        Lexeme nextLexeme = _IKImplement.next();        if(nextLexeme != null){            //將Lexeme轉(zhuǎn)成Attributes            //設(shè)置詞元文本            termAtt.append(nextLexeme.getLexemeText());            //設(shè)置詞元長度            termAtt.setLength(nextLexeme.getLength());            //設(shè)置詞元位移            offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());            //記錄分詞的最后位置            endPosition = nextLexeme.getEndPosition();            //記錄詞元分類            typeAtt.setType(nextLexeme.getLexemeTypeString());                      //返會true告知還有下個詞元            return true;        }        //返會false告知詞元輸出完畢        return false;    }          @Override    public void reset() throws IOException {        super.reset();        _IKImplement.reset(input);    }             @Override    public final void end() {        // set final offset        int finalOffset = correctOffset(this.endPosition);        offsetAtt.setOffset(finalOffset, finalOffset);    } }

package org.wltea.analyzer.lucene; import java.io.IOException; import org.apache.lucene.analysis.Tokenizer;import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;import org.apache.lucene.analysis.tokenattributes.TypeAttribute;import org.wltea.analyzer.core.IKSegmenter;import org.wltea.analyzer.core.Lexeme; public class IKTokenizer extends Tokenizer {     //IK分詞器實(shí)現(xiàn)    private IKSegmenter _IKImplement;          //詞元文本屬性    private final CharTermAttribute termAtt;    //詞元位移屬性    private final OffsetAttribute offsetAtt;    //詞元分類屬性(該屬性分類參考o(jì)rg.wltea.analyzer.core.Lexeme中的分類常量)    private final TypeAttribute typeAtt;    //記錄最后一個詞元的結(jié)束位置    private int endPosition;                /**    public IKTokenizer(Reader in , boolean useSmart){        super(in);        offsetAtt = addAttribute(OffsetAttribute.class);        termAtt = addAttribute(CharTermAttribute.class);        typeAtt = addAttribute(TypeAttribute.class);        _IKImplement = new IKSegmenter(input , useSmart);    }**/          /**     * Lucene 5.x Tokenizer適配器類構(gòu)造函數(shù)     * 實(shí)現(xiàn)最新的Tokenizer接口     * @param useSmart     */    public IKTokenizer(boolean useSmart){        super();        offsetAtt = addAttribute(OffsetAttribute.class);        termAtt = addAttribute(CharTermAttribute.class);        typeAtt = addAttribute(TypeAttribute.class);        _IKImplement = new IKSegmenter(input , useSmart);    }      /* (non-Javadoc)     * @see org.apache.lucene.analysis.TokenStream#incrementToken()     */    @Override    public boolean incrementToken() throws IOException {        //清除所有的詞元屬性        clearAttributes();        Lexeme nextLexeme = _IKImplement.next();        if(nextLexeme != null){            //將Lexeme轉(zhuǎn)成Attributes            //設(shè)置詞元文本            termAtt.append(nextLexeme.getLexemeText());            //設(shè)置詞元長度            termAtt.setLength(nextLexeme.getLength());            //設(shè)置詞元位移            offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());            //記錄分詞的最后位置            endPosition = nextLexeme.getEndPosition();            //記錄詞元分類            typeAtt.setType(nextLexeme.getLexemeTypeString());                      //返會true告知還有下個詞元            return true;        }        //返會false告知詞元輸出完畢        return false;    }          @Override    public void reset() throws IOException {        super.reset();        _IKImplement.reset(input);    }             @Override    public final void end() {        // set final offset        int finalOffset = correctOffset(this.endPosition);        offsetAtt.setOffset(finalOffset, finalOffset);    } }


發(fā)表評論 共有條評論
用戶名: 密碼:
驗(yàn)證碼: 匿名發(fā)表
主站蜘蛛池模板: 高州市| 铜梁县| 奉化市| 大田县| 自治县| 永德县| 耿马| 防城港市| 攀枝花市| 穆棱市| 九寨沟县| 芦山县| 靖安县| 石台县| 沂源县| 本溪市| 门头沟区| 宾川县| 商河县| 日喀则市| 太白县| 石景山区| 安乡县| 施甸县| 莎车县| 福海县| 和顺县| 鹤岗市| 诸城市| 平果县| 和田市| 措勤县| 昌江| 田阳县| 凤城市| 恩平市| 台北市| 勃利县| 鸡东县| 娄烦县| 大新县|