Package org.apache.lucene.benchmark.byTask.feeds

Examples of org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource$Parser


        super.setUp();
        WorkspaceImpl wsp = (WorkspaceImpl) superuser.getWorkspace();
        QueryManagerImpl qm = (QueryManagerImpl) wsp.getQueryManager();
        QueryObjectModelFactoryImpl factory = (QueryObjectModelFactoryImpl) qm
                .getQOMFactory();
        parser = new Parser(factory, superuser.getValueFactory());
    }
View Full Code Here


  }

  public int index(File wikipediaXML, int numDocs, int batchSize) throws Exception {
    int result = 0;
    if (wikipediaXML != null && wikipediaXML.exists()) {
      EnwikiContentSource contentSource = new EnwikiContentSource();
      Properties properties = new Properties();
      //fileName = config.get("docs.file", null);
      String filePath = wikipediaXML.getAbsolutePath();
      properties.setProperty("docs.file", filePath);
      properties.setProperty("doc.maker.forever", "false");
      contentSource.setConfig(new Config(properties));
      contentSource.resetInputs();
      //docMaker.openFile();
      List<SolrInputDocument> docs = new ArrayList<SolrInputDocument>(1000);
      int i = 0;
      SolrInputDocument sDoc = null;
      long start = System.currentTimeMillis();
      try {
        DocData docData = new DocData();

        while ((docData = contentSource.getNextDocData(docData)) != null && i < numDocs) {
          int mod = i % batchSize;

          sDoc = new SolrInputDocument();
          docs.add(sDoc);
          sDoc.addField("file", filePath + "_" + i);
View Full Code Here

    properties.setProperty("docs.file", wikipedia.getAbsolutePath());
    properties.setProperty("content.source.forever", "false");
    properties.setProperty("keep.image.only.docs", String.valueOf(keepImageOnlyDocs));
    Config config = new Config(properties);

    ContentSource source = new EnwikiContentSource();
    source.setConfig(config);
   
    DocMaker docMaker = new DocMaker();
    docMaker.setConfig(config, source);
    docMaker.resetInputs();
    if (wikipedia.exists()) {
View Full Code Here

    properties.setProperty("docs.file", wikipedia.getAbsolutePath());
    properties.setProperty("content.source.forever", "false");
    properties.setProperty("keep.image.only.docs", String.valueOf(keepImageOnlyDocs));
    Config config = new Config(properties);

    ContentSource source = new EnwikiContentSource();
    source.setConfig(config);
   
    DocMaker docMaker = new DocMaker();
    docMaker.setConfig(config, source);
    docMaker.resetInputs();
    if (wikipedia.exists()) {
View Full Code Here

  private ManifestParseTree parse(InputStream inputStream) throws IOException, TokenizerException, ParserException {
    Preprocessor preprocessor = new ManifestPreprocessor();
    List<InputLine> contents = preprocessor.process(inputStream);
    Tokenizer tokenizer = new ManifestTokenizer(contents);

    Parser parser = new ManifestParser();
    return parser.parse(tokenizer);
  }
View Full Code Here

  private ManifestParseTree parse(InputStream inputStream) throws IOException, TokenizerException, ParserException {
    Preprocessor preprocessor = new ManifestPreprocessor();
    List<InputLine> contents = preprocessor.process(inputStream);
    Tokenizer tokenizer = new ManifestTokenizer(contents);

    Parser parser = new ManifestParser();
    return parser.parse(tokenizer);
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource$Parser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.