lucene入門

2019-11-15 01:00:02

字體：大中小

來源：轉載

供稿：網友

lucene入門

一、lucene簡介

　　Lucene是apache下的一個靠性能的、功能全面的用純java開發的一個全文搜索引擎庫。它幾乎適合任何需要全文搜索應用程序，尤其是跨平臺。lucene是開源的免費的工程。lucene使用簡單但是提供的功能非常強大。相關特點如下：

在硬件上的速度超過150GB/小時
更小的內存需求，只需要1MB堆空間
快速地增加索引、與批量索引
索引的大小大于為被索引文本的20%-30%

　　lucene下載地址為：http://lucene.apache.org/

　　文本示例工程使用maven構建，lucene版本為5.2.1。相關依賴文件如下：

<PRoject xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">    <modelVersion>4.0.0</modelVersion>    <groupId>com.shh</groupId>    <artifactId>lucene</artifactId>    <packaging>war</packaging>    <version>0.0.1-SNAPSHOT</version>    <name>lucene Maven Webapp</name>    <url>http://maven.apache.org</url>    <properties>        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>        <lucene.version>5.2.1</lucene.version>    </properties>    <dependencies>        <dependency>            <groupId>org.apache.lucene</groupId>            <artifactId>lucene-core</artifactId>            <version>${lucene.version}</version>        </dependency>        <dependency>            <groupId>org.apache.lucene</groupId>            <artifactId>lucene-queryparser</artifactId>            <version>${lucene.version}</version>        </dependency>        <dependency>            <groupId>org.apache.lucene</groupId>            <artifactId>lucene-analyzers-common</artifactId>            <version>${lucene.version}</version>        </dependency>        <!-- 分詞器 -->        <dependency>            <groupId>org.apache.lucene</groupId>            <artifactId>lucene-analyzers-smartcn</artifactId>            <version>${lucene.version}</version>        </dependency>        <dependency>            <groupId>org.apache.lucene</groupId>            <artifactId>lucene-highlighter</artifactId>            <version>${lucene.version}</version>        </dependency>    </dependencies>    <build>        <finalName>lucene</finalName>     </build></project>

二、示例

　　1、索引的創建

　　相關代碼如下：

 1 package com.test.lucene; 2  3 import java.io.IOException;  4 import java.nio.file.Paths;  5  6 import org.apache.lucene.analysis.Analyzer; 7 import org.apache.lucene.analysis.standard.StandardAnalyzer; 8 import org.apache.lucene.document.Document; 9 import org.apache.lucene.document.Field.Store;10 import org.apache.lucene.document.IntField;11 import org.apache.lucene.document.StringField;12 import org.apache.lucene.document.TextField;13 import org.apache.lucene.index.IndexWriter;14 import org.apache.lucene.index.IndexWriterConfig;15 import org.apache.lucene.index.IndexWriterConfig.OpenMode;16 import org.apache.lucene.store.Directory;17 import org.apache.lucene.store.FSDirectory;18 19 /**20  * 創建索引21  */22 public class IndexCreate {23     24     public static void main(String[] args) {25         // 指定分詞技術，這里使用的是標準分詞26         Analyzer analyzer = new StandardAnalyzer();27 28         // indexWriter的配置信息29         IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);30 31         // 索引的打開方式：沒有則創建，有則打開32         indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);33 34         Directory directory = null;35         IndexWriter indexWriter = null;36         try {37             // 索引在硬盤上的存儲路徑38             directory = FSDirectory.open(Paths.get("D://index/test"));39             //indexWriter用來創建索引文件40             indexWriter = new IndexWriter(directory, indexWriterConfig);41         } catch (IOException e) {42             e.printStackTrace();43         }44         45         //創建文檔一46         Document doc1 = new Document();47         doc1.add(new StringField("id", "abcde", Store.YES));48         doc1.add(new TextField("content", "中國廣州", Store.YES));49         doc1.add(new IntField("num", 1, Store.YES));50 51         //創建文檔二52         Document doc2 = new Document();53         doc2.add(new StringField("id", "asdff", Store.YES));54         doc2.add(new TextField("content", "中國上海", Store.YES));55         doc2.add(new IntField("num", 2, Store.YES));56 57         try {58             //添加需要索引的文檔59             indexWriter.addDocument(doc1);60             indexWriter.addDocument(doc2);61  62             // 將indexWrite操作提交，如果不提交，之前的操作將不會保存到硬盤63             // 但是這一步很消耗系統資源，索引執行該操作需要有一定的策略64             indexWriter.commit();65         } catch (IOException e) {66             e.printStackTrace();67         } finally {68             // 關閉資源69             try { 70                 indexWriter.close();71                 directory.close(); 72             } catch (IOException e) {73                 e.printStackTrace();74             }75         }76     }77 }

　　2、搜索

　　相關代碼如下：

 1 package com.test.lucene; 2  3 import java.io.IOException; 4 import java.nio.file.Paths; 5  6 import org.apache.lucene.analysis.Analyzer; 7 import org.apache.lucene.analysis.standard.StandardAnalyzer; 8 import org.apache.lucene.document.Document; 9 import org.apache.lucene.index.DirectoryReader;10 import org.apache.lucene.queryparser.classic.ParseException;11 import org.apache.lucene.queryparser.classic.QueryParser;12 import org.apache.lucene.search.IndexSearcher;13 import org.apache.lucene.search.Query;14 import org.apache.lucene.search.TopDocs;15 import org.apache.lucene.store.Directory;16 import org.apache.lucene.store.FSDirectory;17 18 /**19  * 搜索20  */21 public class IndexSearch {22     23     public static void main(String[] args) {24         //索引存放的位置25         Directory directory = null;26         try {27             // 索引硬盤存儲路徑28             directory = FSDirectory.open(Paths.get("D://index/test"));29             // 讀取索引30             DirectoryReader directoryReader = DirectoryReader.open(directory);31             // 創建索引檢索對象32             IndexSearcher searcher = new IndexSearcher(directoryReader);33             // 分詞技術34             Analyzer analyzer = new StandardAnalyzer();35             // 創建Query36             QueryParser parser = new QueryParser("content", analyzer);37             Query query = parser.parse("廣州");// 查詢content為廣州的38             // 檢索索引，獲取符合條件的前10條記錄39             TopDocs topDocs = searcher.search(query, 10);40             if (topDocs != null) {41                 System.out.println("符合條件的記錄為： " + topDocs.totalHits);42                 for (int i = 0; i < topDocs.scoreDocs.length; i++) {43                     Document doc = searcher.doc(topDocs.scoreDocs[i].doc);44                     System.out.println("id = " + doc.get("id"));45                     System.out.println("content = " + doc.get("content"));46                     System.out.println("num = " + doc.get("num"));47                 }48             }49             directory.close();50             directoryReader.close();51         } catch (IOException e) {52             e.printStackTrace();53         } catch (ParseException e) {54             e.printStackTrace();55         }56     }57 }

　　運行結果如下：