Commit e5cf2882 authored by yihua.huang's avatar yihua.huang

fix a lucene bug

parent 9ef6de01
......@@ -24,7 +24,7 @@
</dependency>
<dependency>
<groupId>us.codecraft</groupId>
<artifactId>webmagic-core</artifactId>
<artifactId>webmagic-extension</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
......
......@@ -33,16 +33,14 @@ public class LucenePipeline implements Pipeline {
private Directory directory;
private IndexWriter indexWriter;
private Analyzer analyzer;
private IndexWriterConfig config;
private void init() throws IOException {
analyzer = new StandardAnalyzer(Version.LUCENE_44);
directory = new RAMDirectory();
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, analyzer);
indexWriter = new IndexWriter(directory, config);
indexWriter.close();
config = new IndexWriterConfig(Version.LUCENE_44, analyzer);
}
public LucenePipeline() {
......@@ -67,7 +65,6 @@ public class LucenePipeline implements Pipeline {
documents.add(hitDoc);
}
ireader.close();
directory.close();
return documents;
}
......@@ -85,7 +82,9 @@ public class LucenePipeline implements Pipeline {
doc.add(new Field(objectEntry.getKey(), objectEntry.getValue().toString(), TextField.TYPE_STORED));
}
try {
IndexWriter indexWriter = new IndexWriter(directory, config);
indexWriter.addDocument(doc);
indexWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
......
package us.codecraft.webmagic.lucene;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.model.ExtractBy;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.TargetUrl;
import us.codecraft.webmagic.pipeline.LucenePipeline;
import java.io.IOException;
import java.util.List;
/**
* @author code4crafter@gmail.com <br>
* @date: 13-8-2 <br>
* Time: 上午7:52 <br>
*/
@TargetUrl("http://my.oschina.net/flashsword/blog/\\d+")
public class OschinaBlog {
@ExtractBy("//title")
private String title;
@ExtractBy(value = "div.BlogContent", type = ExtractBy.Type.Css)
private String content;
@Override
public String toString() {
return "OschinaBlog{" +
"title='" + title + '\'' +
", content='" + content + '\'' +
'}';
}
public static void main(String[] args) {
LucenePipeline pipeline = new LucenePipeline();
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog"), OschinaBlog.class).pipeline(pipeline).runAsync();
while (true) {
try {
List<Document> search = pipeline.search("title", "webmagic");
System.out.println(search);
Thread.sleep(3000);
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
public String getTitle() {
return title;
}
public String getContent() {
return content;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment