Examples of ParseText


Examples of kpi.asoiu.parsers.ParseText

    public static void main(String[] args) throws Exception {


//        System.out.println(rules);
        ParseText pt = ModelFactory.getInstance().getParseText();
//
//        pt.parse("The Istanbul meeting faces a mass of dilemmas, contradictions and complexities, with no clear way forward.\n" +
//                "\n" +
//                "Western leaders talk of the need to step up pressure on the regime and support the opposition. But they have no intention of intervening militarily in any way, or even backing the rebels with weaponry.\n" +
//                "\n" +
//                "The opposition is divided, and many of its fighters on the ground have been crushed by a ruthless crackdown. It is under pressure to accept the peace plan of Kofi Annan. The rebels know that in any talks, the balance of power will be heavily against them.\n" +
//                "\n" +
//                "It clearly believes it has little to fear from the Istanbul gathering.");
//        pt.parse("What is the new super-food? Do you live here? ");

        pt.parse("A compound sentence consists of multiple independent clauses. " +
                "Dolphins live in water but they are not fish. " +
                "They need air to breathe. " +
                "The air consists of many elements. " +
                "Dolphins can stay under the water for many minutes but then they must come up to the surface. " +
                "Water is a home for dolphins. " +
View Full Code Here

Examples of kpi.asoiu.parsers.ParseText

    }

    private ModelFactory() {
        try {
            parseSentence = new ParseSentence();
            parseText = new ParseText();
            parsePhrase = new ParsePhrase();
        } catch (IOException e) {

            e.printStackTrace()//ToDo DO THIS!!!
        }
View Full Code Here

Examples of net.nutch.parse.ParseText

      nfs.mkdirs(outDir);
      SegmentWriter sw = new SegmentWriter(nfs, outDir, true);
      LOG.fine(" - opening first output segment in " + outDir.getName());
      FetcherOutput fo = new FetcherOutput();
      Content co = new Content();
      ParseText pt = new ParseText();
      ParseData pd = new ParseData();
      int outputCnt = 0;
      for (int n = 0; n < ir.maxDoc(); n++) {
        if (ir.isDeleted(n)) {
          //System.out.println("-del");
View Full Code Here

Examples of net.nutch.parse.ParseText

    if (!parsed)
      LOG.warning(" - some input segments are non-parsed, forcing non-parsed output!");
    FetcherOutput fo = new FetcherOutput();
    Content co = new Content();
    ParseData pd = new ParseData();
    ParseText pt = new ParseText();
    long outputCnt = 0L;
    int segCnt = 1;
    File outDir = new File(output, SegmentWriter.getNewSegmentName());
    LOG.info("Writing output in " + output);
    try {
View Full Code Here

Examples of net.nutch.parse.ParseText

   */
  public synchronized boolean next(FetcherOutput fo, Content co,
          ParseText pt, ParseData pd) throws IOException {
    boolean valid = true;
    Content rco = (co == null) ? _co : co;
    ParseText rpt = (pt == null) ? _pt : pt;
    ParseData rpd = (pd == null) ? _pd : pd;
    if (fetcherReader.next(fo) == null) valid = false;
    if (contentReader != null)
      if (contentReader.next(rco) == null) valid = false;
    if (parseTextReader != null)
View Full Code Here

Examples of net.nutch.parse.ParseText

  public synchronized void dump(boolean sorted, PrintStream output) throws Exception {
    reset();
    FetcherOutput fo = new FetcherOutput();
    Content co = new Content();
    ParseData pd = new ParseData();
    ParseText pt = new ParseText();
    long recNo = 0L;
    if (!sorted) {
      while(next(fo, co, pt, pd)) {
        output.println("Recno:: " + recNo++);
        output.println("FetcherOutput::\n" + fo.toString());
        if (contentReader != null)
          output.println("Content::\n" + co.toString());
        if (parseDataReader != null)
          output.println("ParseData::\n" + pd.toString());
        if (parseTextReader != null)
          output.println("ParseText::\n" + pt.toString());
        output.println("");
      }
    } else {
      File unsortedFile = new File(segmentDir, ".unsorted");
      File sortedFile = new File(segmentDir, ".sorted");
      nfs.delete(unsortedFile);
      nfs.delete(sortedFile);
      SequenceFile.Writer seqWriter = new SequenceFile.Writer(nfs,
              unsortedFile.toString(), UTF8.class, LongWritable.class);
      FetchListEntry fle;
      LongWritable rec = new LongWritable();
      UTF8 url = new UTF8();
      String urlString;
      while (fetcherReader.next(fo) != null) {
        fle = fo.getFetchListEntry();
        urlString = fle.getPage().getURL().toString();
        rec.set(recNo);
        url.set(urlString);
        seqWriter.append(url, rec);
        recNo++;
      }
      seqWriter.close();
      // sort the SequenceFile
      long start = System.currentTimeMillis();

      SequenceFile.Sorter sorter = new SequenceFile.Sorter(nfs,
              new UTF8.Comparator(), LongWritable.class);

      sorter.sort(unsortedFile.toString(), sortedFile.toString());

      float localSecs = (System.currentTimeMillis() - start) / 1000.0f;
      LOG.info(" - sorted: " + recNo + " entries in " + localSecs + "s, "
        + (recNo/localSecs) + " entries/s");

      nfs.delete(unsortedFile);
      SequenceFile.Reader seqReader = new SequenceFile.Reader(nfs, sortedFile.toString());
      while (seqReader.next(url, rec)) {
        recNo = rec.get();
        get(recNo, fo, co, pt, pd);
        output.println("Recno:: " + recNo++);
        output.println("FetcherOutput::\n" + fo.toString());
        if (contentReader != null)
          output.println("Content::\n" + co.toString());
        if (parseDataReader != null)
          output.println("ParseData::\n" + pd.toString());
        if (parseTextReader != null)
          output.println("ParseText::\n" + pt.toString());
        output.println("");
      }
      seqReader.close();
      nfs.delete(sortedFile);
    }
View Full Code Here

Examples of net.nutch.parse.ParseText

        break;
      case OP_PARSEDATA:
        value = new ParseData();
        break;
      case OP_PARSETEXT:
        value = new ParseText();
        break;
      case OP_FETCHDATE:
        value = new LongWritable();
        break;
      default:
View Full Code Here

Examples of net.nutch.parse.ParseText

        text.append("\nCreated at epoch time: " + System.currentTimeMillis() + ", " + r.nextLong());
      }
      for (int k = 0; k < 10; k++) {
        text.append(k + " lines of text in the queue, " + k + " lines of text...\n");
      }
      ParseText pt = new ParseText(text.toString());
      sw.append(fo, co, pt, pd);
    }
    sw.close();
  }
View Full Code Here

Examples of org.apache.nutch.parse.ParseText

      throws IOException {

      Node nodeDb = null;
      List<CrawlDatum> fetchDatums = new ArrayList<CrawlDatum>();
      ParseData parseData = null;
      ParseText parseText = null;
      List<FieldWritable> fieldsList = new ArrayList<FieldWritable>();

      // assign values, url must be successfully fetched and parsed
      while (values.hasNext()) {
View Full Code Here

Examples of org.apache.nutch.parse.ParseText

            return;
          }

          Parse parse = (Parse)value;

          textOut.append(key, new ParseText(parse.getText()));
          ParseData parseData = parse.getData();

          // recover the signature prepared by Fetcher or ParseSegment
          String sig = parseData.getContentMeta().get(
            Nutch.SIGNATURE_KEY);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.