Examples of Parse

com.gtranslate.parsing.Parse
com.owens.oobjloader.parser.Parse
fit.Parse
net.nutch.parse.Parse
The result of parsing a page's raw content. @see Parser#getParse(FetcherOutput,Content)
opennlp.tools.coref.mention.Parse
Interface for syntactic and named-entity information to be used in coreference annotation.
opennlp.tools.parser.Parse
Data structure for holding parse constituents.
org.apache.droids.api.Parse
Wrapper object that encapsulate the result of the parsing of the underlying document. @version 1.0
org.apache.nutch.parse.Parse
The result of parsing a page's raw content. @see Parser#getParse(Content)
org.jbpm.pvm.internal.xml.Parse
information related to one single parse operation, for instructions see {@link Parser}. @author Tom Baeyens
org.jbpm.xml.Parse
combines the parse result, the parse problems and other parse information that is related to a single parse operation, see also {@link Parser}. @author Tom Baeyens
org.jruby.pg.internal.messages.Parse
water.api.ParseHandler.Parse

Examples of opennlp.tools.parser.Parse

    return parse.toString();
  }




  public opennlp.tools.coref.mention.Parse getPreviousToken() {
    Parse parent = parse.getParent();
    Parse node = parse;
    int index=-1;
    //find parent with previous children
    while(parent != null && index < 0) {
      index = parent.indexOf(node)-1;
      if (index < 0) {
        node = parent;
        parent = parent.getParent();
      }
    }
    //find right-most child which is a token
    if (index < 0) {
      return null;
    }
    else {
      Parse p = parent.getChildren()[index];
      while (!p.isPosTag()) {
        Parse[] kids = p.getChildren();
        p = kids[kids.length-1];
      }
      return new DefaultParse(p,sentenceNumber);
    }
  }

View Full Code Here

Examples of org.apache.droids.api.Parse

            if (log.isDebugEnabled()) {
              log.debug("Could not find parser for " + contentType);
            }
          }
          else {
            Parse parse = parser.parse(entity, link);
            if( parse.getOutlinks() != null ) {
              Collection<Link> outlinks = getFilteredOutlinks( parse );
              droid.getQueue().addAll( outlinks );
            }
            entity.setParse(parse);
            handle(entity, link);

View Full Code Here

Examples of org.apache.nutch.parse.Parse

        }
      }


      // get parse metadata
      Metadata metadata = parseData.getContentMeta();
      Parse parse = new ParseImpl(parseText, parseData);


      // handle redirect urls
      Text reprUrlText = (Text)fetchDatum.getMetaData().get(
        Nutch.WRITABLE_REPR_URL_KEY);
      String reprUrl = reprUrlText != null ? reprUrlText.toString() : null;
      String url = key.toString();
      String fieldUrl = (reprUrl != null) ? reprUrl : url;
      String host = URLUtil.getHost(fieldUrl);


      // add segment, used to map from merged index back to segment files
      FieldWritable segField = new FieldWritable(Fields.SEGMENT,
        metadata.get(Nutch.SEGMENT_NAME_KEY), FieldType.CONTENT, false, true,
        false);
      fieldsList.add(segField);


      // add digest, used by dedup
      FieldWritable digestField = new FieldWritable(Fields.DIGEST,
        metadata.get(Nutch.SIGNATURE_KEY), FieldType.CONTENT, false, true,
        false);
      fieldsList.add(digestField);


      // url is both stored and indexed, so it's both searchable and returned
      fieldsList.add(new FieldWritable(Fields.URL, fieldUrl, FieldType.CONTENT,
        true, true, true));
      fieldsList.add(new FieldWritable(Fields.SEG_URL, url, FieldType.CONTENT,
        false, true, false));


      if (reprUrl != null) {
        // also store original url as both stored and indexes
        fieldsList.add(new FieldWritable(Fields.ORIG_URL, url,
          FieldType.CONTENT, true, true, true));
      }


      if (host != null) {
        // add host as un-stored, indexed and tokenized
        FieldWritable hostField = new FieldWritable(Fields.HOST, host,
          FieldType.CONTENT, true, false, true);
        fieldsList.add(hostField);


        // add site as un-stored, indexed and un-tokenized
        FieldWritable siteField = new FieldWritable(Fields.SITE, host,
          FieldType.CONTENT, true, false, false);
        fieldsList.add(siteField);
      }


      // content is indexed, so that it's searchable, but not stored in index
      fieldsList.add(new FieldWritable(Fields.CONTENT, parse.getText(),
        FieldType.CONTENT, true, false, true));


      // title
      String title = parse.getData().getTitle();
      if (title.length() > MAX_TITLE_LENGTH) { // truncate title if needed
        title = title.substring(0, MAX_TITLE_LENGTH);
      }
      // add title indexed and stored so that it can be displayed
      fieldsList.add(new FieldWritable(Fields.TITLE, title, FieldType.CONTENT,
        true, true, true));


      // add cached content/summary display policy, if available
      String caching = parse.getData().getMeta(Nutch.CACHING_FORBIDDEN_KEY);
      if (caching != null && !caching.equals(Nutch.CACHING_FORBIDDEN_NONE)) {
        fieldsList.add(new FieldWritable(Fields.CACHE, caching,
          FieldType.CONTENT, false, true, false));
      }

View Full Code Here

Examples of org.apache.nutch.parse.Parse

  protected void tearDown() {}


  public void testIt() throws ProtocolException, ParseException {
    String urlString;
    Content content;
    Parse parse;
    Configuration conf = NutchConfiguration.create();
    Protocol protocol;
    ProtocolFactory factory = new ProtocolFactory(conf);
    OOParser parser = new OOParser();
    parser.setConf(conf);


    for (int i=0; i<sampleFiles.length; i++) {
      urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];


      protocol = factory.getProtocol(urlString);
      content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();


      parse = parser.getParse(content).get(content.getUrl());


      String text = parse.getText().replaceAll("[ \t\r\n]+", " ");
      assertTrue(expectedText.equals(text));
    }
  }

View Full Code Here

Examples of org.apache.nutch.parse.Parse

  public void testIt() throws ProtocolException, ParseException {


    String urlString;
    Protocol protocol;
    Content content;
    Parse parse;


    Configuration conf = NutchConfiguration.create();
    ParseUtil parser = new ParseUtil(conf);
    ProtocolFactory factory = new ProtocolFactory(conf);
    for (int i = 0; i < sampleFiles.length; i++) {
      urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];


      protocol = factory.getProtocol(urlString);
      content = protocol.getProtocolOutput(new Text(urlString),
                                           new CrawlDatum()).getContent();
      parse = parser.parseByExtensionId("parse-msexcel", content).get(content.getUrl());


      assertTrue(parse.getText().equals(expectedText));
    }
  }

View Full Code Here

Examples of org.apache.nutch.parse.Parse


  public void testIt() throws ProtocolException, ParseException {
    String urlString;
    Protocol protocol;
    Content content;
    Parse parse;


    for (int i = 0; i < sampleFiles.length; i++) {
      urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];


      Configuration conf = NutchConfiguration.create();
      protocol = new ProtocolFactory(conf).getProtocol(urlString);
      content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();
      parse = new ParseUtil(conf).parseByExtensionId("parse-pdf", content).get(content.getUrl());


      int index = parse.getText().indexOf(expectedText);
      assertTrue(index > 0);
    }
  }

View Full Code Here

Examples of org.apache.nutch.parse.Parse

    try {
      ParseUtil parser = new ParseUtil(NutchConfiguration.create());
      /* loop through the test documents and validate result */
      for (int t = 0; t < docs.length; t++) {
        Content content = getContent(docs[t]);
        Parse parse = parser.parse(content).get(content.getUrl());
        assertEquals(metalanguages[t], (String) parse.getData().getParseMeta().get(Metadata.LANGUAGE));
      }
    } catch (Exception e) {
      e.printStackTrace(System.out);
      fail(e.toString());
    }

View Full Code Here

Examples of org.apache.nutch.parse.Parse

            // long startTime = System.currentTimeMillis();


            //Make a content object
            Content content = new Content(url,url, docBody.toString().getBytes(), mimetype, metaData, conf);


            Parse parse = null;
            ParseStatus parseStatus;
            try {
              parse = pu.parse(content);
              parseStatus = parse.getData().getStatus();
            } 
            catch (final Exception e) {
              parseStatus = new ParseStatus(e);
              LOG.error("error: unknown "+parseStatus.toString());
              if(!parseStatus.isSuccess()) {

View Full Code Here

Examples of org.apache.nutch.parse.Parse

              key);
            
            return;
          }


          Parse parse = (Parse)value;


          textOut.append(key, new ParseText(parse.getText()));
          ParseData parseData = parse.getData();


          // recover the signature prepared by Fetcher or ParseSegment
          String sig = parseData.getContentMeta().get(
            Nutch.SIGNATURE_KEY);

View Full Code Here

Examples of org.apache.nutch.parse.Parse

      DataInputStream in = new DataInputStream(new FileInputStream(file));
      
      try
      {
        in.readFully(bytes);
        Parse parse = parser.getParse(new Content(url, url, bytes,
          "application/pdf", new Metadata(), conf));
        System.out.println(parse.getData().getTitle());
      }
      finally
      {
        if (in != null)
        {

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.