一、lucene簡介
Lucene是apache下的一個靠性能的、功能全面的用純java開發的一個全文搜索引擎庫。它幾乎適合任何需要全文搜索應用程序,尤其是跨平臺。lucene是開源的免費的工程。lucene使用簡單但是提供的功能非常強大。相關特點如下:
lucene下載地址為:http://lucene.apache.org/
文本示例工程使用maven構建,lucene版本為5.2.1。相關依賴文件如下:
<PRoject xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.shh</groupId> <artifactId>lucene</artifactId> <packaging>war</packaging> <version>0.0.1-SNAPSHOT</version> <name>lucene Maven Webapp</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <lucene.version>5.2.1</lucene.version> </properties> <dependencies> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>${lucene.version}</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>${lucene.version}</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>${lucene.version}</version> </dependency> <!-- 分詞器 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-smartcn</artifactId> <version>${lucene.version}</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>${lucene.version}</version> </dependency> </dependencies> <build> <finalName>lucene</finalName> </build></project>
二、示例
1、索引的創建
相關代碼如下:
1 package com.test.lucene; 2 3 import java.io.IOException; 4 import java.nio.file.Paths; 5 6 import org.apache.lucene.analysis.Analyzer; 7 import org.apache.lucene.analysis.standard.StandardAnalyzer; 8 import org.apache.lucene.document.Document; 9 import org.apache.lucene.document.Field.Store;10 import org.apache.lucene.document.IntField;11 import org.apache.lucene.document.StringField;12 import org.apache.lucene.document.TextField;13 import org.apache.lucene.index.IndexWriter;14 import org.apache.lucene.index.IndexWriterConfig;15 import org.apache.lucene.index.IndexWriterConfig.OpenMode;16 import org.apache.lucene.store.Directory;17 import org.apache.lucene.store.FSDirectory;18 19 /**20 * 創建索引21 */22 public class IndexCreate {23 24 public static void main(String[] args) {25 // 指定分詞技術,這里使用的是標準分詞26 Analyzer analyzer = new StandardAnalyzer();27 28 // indexWriter的配置信息29 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);30 31 // 索引的打開方式:沒有則創建,有則打開32 indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);33 34 Directory directory = null;35 IndexWriter indexWriter = null;36 try {37 // 索引在硬盤上的存儲路徑38 directory = FSDirectory.open(Paths.get("D://index/test"));39 //indexWriter用來創建索引文件40 indexWriter = new IndexWriter(directory, indexWriterConfig);41 } catch (IOException e) {42 e.printStackTrace();43 }44 45 //創建文檔一46 Document doc1 = new Document();47 doc1.add(new StringField("id", "abcde", Store.YES));48 doc1.add(new TextField("content", "中國廣州", Store.YES));49 doc1.add(new IntField("num", 1, Store.YES));50 51 //創建文檔二52 Document doc2 = new Document();53 doc2.add(new StringField("id", "asdff", Store.YES));54 doc2.add(new TextField("content", "中國上海", Store.YES));55 doc2.add(new IntField("num", 2, Store.YES));56 57 try {58 //添加需要索引的文檔59 indexWriter.addDocument(doc1);60 indexWriter.addDocument(doc2);61 62 // 將indexWrite操作提交,如果不提交,之前的操作將不會保存到硬盤63 // 但是這一步很消耗系統資源,索引執行該操作需要有一定的策略64 indexWriter.commit();65 } catch (IOException e) {66 e.printStackTrace();67 } finally {68 // 關閉資源69 try { 70 indexWriter.close();71 directory.close(); 72 } catch (IOException e) {73 e.printStackTrace();74 }75 }76 }77 }2、搜索
相關代碼如下:
1 package com.test.lucene; 2 3 import java.io.IOException; 4 import java.nio.file.Paths; 5 6 import org.apache.lucene.analysis.Analyzer; 7 import org.apache.lucene.analysis.standard.StandardAnalyzer; 8 import org.apache.lucene.document.Document; 9 import org.apache.lucene.index.DirectoryReader;10 import org.apache.lucene.queryparser.classic.ParseException;11 import org.apache.lucene.queryparser.classic.QueryParser;12 import org.apache.lucene.search.IndexSearcher;13 import org.apache.lucene.search.Query;14 import org.apache.lucene.search.TopDocs;15 import org.apache.lucene.store.Directory;16 import org.apache.lucene.store.FSDirectory;17 18 /**19 * 搜索20 */21 public class IndexSearch {22 23 public static void main(String[] args) {24 //索引存放的位置25 Directory directory = null;26 try {27 // 索引硬盤存儲路徑28 directory = FSDirectory.open(Paths.get("D://index/test"));29 // 讀取索引30 DirectoryReader directoryReader = DirectoryReader.open(directory);31 // 創建索引檢索對象32 IndexSearcher searcher = new IndexSearcher(directoryReader);33 // 分詞技術34 Analyzer analyzer = new StandardAnalyzer();35 // 創建Query36 QueryParser parser = new QueryParser("content", analyzer);37 Query query = parser.parse("廣州");// 查詢content為廣州的38 // 檢索索引,獲取符合條件的前10條記錄39 TopDocs topDocs = searcher.search(query, 10);40 if (topDocs != null) {41 System.out.println("符合條件的記錄為: " + topDocs.totalHits);42 for (int i = 0; i < topDocs.scoreDocs.length; i++) {43 Document doc = searcher.doc(topDocs.scoreDocs[i].doc);44 System.out.println("id = " + doc.get("id"));45 System.out.println("content = " + doc.get("content"));46 System.out.println("num = " + doc.get("num"));47 }48 }49 directory.close();50 directoryReader.close();51 } catch (IOException e) {52 e.printStackTrace();53 } catch (ParseException e) {54 e.printStackTrace();55 }56 }57 }運行結果如下:

三、lucene的工作原理
lucene全文搜索分為兩個步驟:
索引創建:將數據(包括數據庫數據、文件等)進行信息提取,并創建索引文件。
搜索索引:根據用戶的搜索請求,對創建的索引進行搜索,并將搜索的結果返回給用戶。
相關示意圖如下:

新聞熱點
疑難解答