`

Lucene为数据库建立索引

阅读更多
package com.lucenedemo;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.sql.*;
import java.util.*;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocCollector;

/*
 * @author zhaozhi3758
 * 为数据库建立索引并查询
 */
public class SearchDb {

	private ArrayList dbList = new ArrayList();

	private String INDEX_DIR = "f:/index";

	/**
	 * 获取数据库数据
	 * 
	 * @return
	 */
	@SuppressWarnings("finally")
	public void getResults() {
		ResultSet rs = null;
		Statement stmt = null;
		Connection conn = null;
		

		try {
			Class.forName("com.mysql.jdbc.Driver");// 加载驱动
			// new oracle.jdbc.driver.OracleDriver();
			String url = "jdbc:mysql://localhost/libraryv1";
			conn = DriverManager.getConnection(url, "root", "123");// 建立连接
			stmt = conn.createStatement();
			rs = stmt.executeQuery("select * from reader");// 发送执行,获得结果集
			while (rs.next()) {// 遍历结果
				HashMap map = new HashMap();
				map.put("name", rs.getString(2));
				map.put("vocation", rs.getString(5));
				map.put("date", rs.getString(6));
				this.dbList.add(map);
			}
		} catch (ClassNotFoundException e) {
			e.printStackTrace();
		} catch (SQLException e) {
			e.printStackTrace();
		} finally {
			try {
				if (rs != null) {// 关闭连接释放资源
					rs.close();
					rs = null;
				}
				if (stmt != null) {
					stmt.close();
					stmt = null;
				}
				if (conn != null) {
					conn.close();
					conn = null;
				}
			} catch (SQLException e) {
				e.printStackTrace();
			}

		}

	}

	/**
	 * 创建索引
	 */
	public void indexDocs() {

		if (this.dbList.size() <= 0)
			return;
		try {
			IndexWriter writer = new IndexWriter(this.INDEX_DIR,
					new StandardAnalyzer(), true);
			createIndex(writer);
			System.out.println("正在优化...");
			writer.optimize();
			writer.close();
		} catch (IOException e) {
			System.out.println(" caught a " + e.getClass()
					+ "\n with message: " + e.getMessage());
		}
	}

	public void createIndex(IndexWriter writer) {
		HashMap map = new HashMap();
		for (Iterator it = this.dbList.iterator(); it.hasNext();) {
			map = (HashMap)it.next();
			Document doc = new Document();//必须放在循环中
			//System.out.println("-->" + map.get("date").toString());
			doc.add(new Field("name", map.get("name").toString(),Field.Store.YES, Field.Index.TOKENIZED));
			doc.add(new Field("vocation", map.get("vocation").toString(),Field.Store.YES, Field.Index.TOKENIZED));
			doc.add(new Field("date", map.get("date").toString(),Field.Store.YES, Field.Index.TOKENIZED));
			//增加共同域
			doc.add(new Field("all", "0",Field.Store.YES, Field.Index.TOKENIZED));
			try {
				writer.addDocument(doc);//添加到索引器writer
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}

		}
	}
	/**
	 * 结果遍历 
	 * @param hits
	 * @param key
	 * @throws Exception
	 */
	 public static void printResult(Hits hits, String key,int num) throws Exception {
		System.out.println("查找 \"" + key + "\" :");
		if (hits != null) {
			if (hits.length() == 0) {
				System.out.println("没有找到任何结果");
			} else {
				System.out.println("找到" + hits.length() + "个结果");
				for (int i = 0; i < hits.length(); i++) {
					Document doc = hits.doc(i);
					System.out.println("姓名"+num+":" + doc.get("name"));
					System.out.println("职业"+num+":"+ doc.get("vocation"));
				}

			}

		}

	}


	/**
	 * 遍历索引[方式一:按词条搜索]
	 * 注意:字段值是区分大小写的,因此在查询时必须注意大小写的匹配
	 */
	public void search_mod1(String type, String keyword)
			throws CorruptIndexException, IOException, ParseException {
		IndexSearcher searcher = new IndexSearcher(this.INDEX_DIR);
		Term term = new Term(type,keyword);
		//Query query=new TermQuery(term);最常用,然后构造hits保存检索结果
		TermDocs docs = searcher.getIndexReader().termDocs(term);
		//System.out.println("count:"+docs.freq());
		while (docs.next()) {
			Document doc = searcher.doc(docs.doc());
			System.out.println("姓名1:" + doc.get("name"));
			System.out.println("职业1:" + doc.get("vocation"));
		}
		  if (searcher != null)searcher.close();
	}
	/**
	 * 遍历索引[方式二]
	 * @param type
	 * @param keyword
	 * @throws Exception
	 */
	public void search_mod2(String type, String keyword) throws Exception{
		    int count=50;//取前50条
	        Searcher searcher = new IndexSearcher(INDEX_DIR);
	        Query query = new QueryParser(type, new StandardAnalyzer()).parse(keyword);
	        
	        TopDocCollector collector = new TopDocCollector(count);
	        searcher.search(query, collector);
	        ScoreDoc[] hits = collector.topDocs().scoreDocs;
	       // System.out.println("共查找到结果:"+hits.length);
	        for (int i = 0; i < hits.length; i++) {
	            int docId = hits[i].doc;
	            Document doc = searcher.doc(docId);
	            //System.out.println("Title:"+doc.get("title"));
	            System.out.println("姓名2:"+doc.get("name"));
	            System.out.println("职业2:"+doc.get("vocation"));
	        }
	        if (searcher != null)searcher.close();
		
		
	}
	/**
	 * 遍历索引[方式三]
	 * @param type
	 * @param keyword
	 * @throws Exception
	 */
	public void search_mod3(String type, String keyword)throws Exception{
		  Searcher searcher = new IndexSearcher(INDEX_DIR);
	      Query query = new QueryParser(type, new StandardAnalyzer()).parse(keyword);
		  Hits hits=searcher.search(query);
		  for(int i=0;i<hits.length();i++){
			  Document doc=hits.doc(i);
			  System.out.println("姓名3:"+doc.get("name"));
	          System.out.println("职业3:"+doc.get("vocation"));
			  
		  }
		  if (searcher != null)searcher.close();
	}
	/**
	 * 遍历索引[方式四:与或搜索]
	 */
	public void search_mod4()throws Exception{
		Query query1 = null;
	    Query query2 = null;
	    BooleanQuery query = new BooleanQuery();// 构造一个布尔查询
	    Hits hits = null;
	    Searcher searcher = new IndexSearcher(INDEX_DIR);
	    query1 = new TermQuery(new Term("name","赵"));
	    query2 = new TermQuery(new Term("vocation","教"));
	    // 添加两个子查询[下面两个子句为或的关系]
	    //MUST、SHOULD、MUST_NOT表示与、或、非
	    query.add(query1,BooleanClause.Occur.SHOULD);
	    query.add(query2,BooleanClause.Occur.SHOULD);
	    hits = searcher.search(query);
	    printResult(hits, "赵和教",4);
	}
	/**
	 * 遍历索引[方式五:在某一范围内搜索]
	 * @throws Exception
	 */
    public void search_mod5()throws Exception{
    	 RangeQuery query = null;
    	 Hits hits = null;
 	     Searcher searcher = new IndexSearcher(INDEX_DIR);
 	     //当第二的参数为null时,后边界无限
 	     query = new RangeQuery(new Term("date","1985-04-05"), 
 	    		 new Term("date","1988-05-02"), true);//true包括边界

 	      hits = searcher.search(query);
	      printResult(hits, "1985-04-05~1988-05-02",5);
    }
    /**
	 * 遍历索引[方式六:多域搜索]
	 * @throws Exception
	 */
    public void search_mod6()throws Exception{
    	Hits hits = null;
        Searcher searcher = new IndexSearcher(INDEX_DIR);
        //指定或的关系
    	BooleanClause.Occur[] flags = new BooleanClause.Occur[] { 
    			              BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };
    	Query query= MultiFieldQueryParser.parse(
    			   "赵", new String[] {"name", "vocation"}, flags, new StandardAnalyzer());
    	  hits = searcher.search(query);
	      printResult(hits, "名字或职业含有赵的结果",6);
    }
    /**
	 * 遍历索引[方式七:将索引结果排序]
	 * @throws Exception
	 */
    public void search_mod7()throws Exception{
    	Sort sort = new Sort(new SortField[]{new SortField("date", SortField.AUTO, true)});
    	 RangeQuery query = null;
    	 Hits hits = null;
 	     Searcher searcher = new IndexSearcher(INDEX_DIR);
 	     //当第二的参数为null时,后边界无限
 	     query = new RangeQuery(new Term("date","1985-04-05"), 
 	    		 new Term("date","1988-05-02"), true);//true包括边界

 	      hits = searcher.search(query,sort);
	      printResult(hits, "按时间大小",7);
    	
    }
	public static void main(String[] args) {
		SearchDb sdb = new SearchDb();
		sdb.getResults();
		sdb.indexDocs();
		try {
			 sdb.search_mod1("all","0");//查询所有
			 System.out.println("------------------");
			 sdb.search_mod2("vocation","师");
			 System.out.println("------------------");
			 sdb.search_mod3("vocation","员");
			 System.out.println("------------------");
			 sdb.search_mod4();
			 System.out.println("------------------");
			 sdb.search_mod5();
			 System.out.println("------------------");
			 sdb.search_mod6();
			 System.out.println("------------------");
			 sdb.search_mod7();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (Exception e) {
			e.printStackTrace();
		}

	}

}

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics