Examples of SegmentWriter


Examples of net.nutch.segment.SegmentWriter

      outDirs.add(outDir);
      LOG.info("* Merging all segments into " + output.getName());
      s1 = System.currentTimeMillis();
      delta = s1;
      nfs.mkdirs(outDir);
      SegmentWriter sw = new SegmentWriter(nfs, outDir, true);
      LOG.fine(" - opening first output segment in " + outDir.getName());
      FetcherOutput fo = new FetcherOutput();
      Content co = new Content();
      ParseText pt = new ParseText();
      ParseData pd = new ParseData();
      int outputCnt = 0;
      for (int n = 0; n < ir.maxDoc(); n++) {
        if (ir.isDeleted(n)) {
          //System.out.println("-del");
          continue;
        }
        Document doc = ir.document(n);
        String segDoc = doc.get("sd");
        int idx = segDoc.indexOf('|');
        String segName = segDoc.substring(0, idx);
        String docName = segDoc.substring(idx + 1);
        SegmentReader sr = (SegmentReader) readers.get(segName);
        long docid;
        try {
          docid = Long.parseLong(docName);
        } catch (Exception e) {
          continue;
        }
        try {
          // get data from the reader
          sr.get(docid, fo, co, pt, pd);
        } catch (Throwable thr) {
          // don't break the loop, because only one of the segments
          // may be corrupted...
          LOG.fine(" - corrupt record no. " + docid + " in segment " + sr.segmentDir.getName() + " - skipping.");
          continue;
        }
        sw.append(fo, co, pt, pd);
        outputCnt++;
        processedRecords++;
        if (processedRecords > 0 && (processedRecords % LOG_STEP == 0)) {
          LOG.info(" Processed " + processedRecords + " records (" +
                  (float)(LOG_STEP * 1000)/(float)(System.currentTimeMillis() - delta) + " rec/s)");
          delta = System.currentTimeMillis();
        }
        if (processedRecords % maxCount == 0) {
          sw.close();
          outDir = new File(output, SegmentWriter.getNewSegmentName());
          LOG.fine(" - starting next output segment in " + outDir.getName());
          nfs.mkdirs(outDir);
          sw = new SegmentWriter(nfs, outDir, true);
          outDirs.add(outDir);
        }
      }
      LOG.info("* Merging took " + (System.currentTimeMillis() - s1) + " ms");
      ir.close();
      sw.close();
      FileUtil.fullyDelete(fsmtIndexDir);
      for (Iterator iter = readers.keySet().iterator(); iter.hasNext();) {
        SegmentReader sr = (SegmentReader) readers.get(iter.next());
        sr.close();
      }
View Full Code Here

Examples of net.nutch.segment.SegmentWriter

   * @param unique if true, use unique data per segment, otherwise use the same
   *        data
   * @throws Exception
   */
  protected void createSegmentData(NutchFileSystem nfs, File dir, boolean unique) throws Exception {
    SegmentWriter sw = new SegmentWriter(nfs, dir, true);
    Random r = new Random(System.currentTimeMillis());
    for (int i = 0; i < PAGE_CNT; i++) {
      String url = "http://www.example.com/page-" + i;
      String rnd = "";
      if (unique) {
        rnd = "/" + System.currentTimeMillis();
        url += rnd;
      }
      url += "/example.html";
      FetchListEntry fle = new FetchListEntry(true, new Page(url, 1.0f), new String[] { "test" + rnd });
      FetcherOutput fo = new FetcherOutput(fle, MD5Hash.digest(url), FetcherOutput.SUCCESS);
      StringBuffer content = new StringBuffer("<html><body><h1>Hello from Page " + i + "</h1>");
      if (unique) {
        content.append("<p>Created at epoch time: " + System.currentTimeMillis() + ", " + r.nextLong() + "</p>");
      }
      for (int k = 0; k < 10; k++) {
        content.append("<p>" + k + " lines of text in the queue, " + k + " lines of text...</p>\n");
      }
      content.append("</body></html>");
      Properties meta = new Properties();
      meta.setProperty("Content-Type", "text/html");
      meta.setProperty("Host", "http://localhost");
      meta.setProperty("Connection", "Keep-alive, close");
      Content co = new Content(url, "http://www.example.com", content.toString().getBytes("UTF-8"), "text/html", meta);
      ParseData pd = new ParseData("Hello from Page " + i, new Outlink[0], meta);
      StringBuffer text = new StringBuffer("Hello from Page" + i);
      if (unique) {
        text.append("\nCreated at epoch time: " + System.currentTimeMillis() + ", " + r.nextLong());
      }
      for (int k = 0; k < 10; k++) {
        text.append(k + " lines of text in the queue, " + k + " lines of text...\n");
      }
      ParseText pt = new ParseText(text.toString());
      sw.append(fo, co, pt, pd);
    }
    sw.close();
  }
View Full Code Here

Examples of org.apache.jackrabbit.oak.plugins.segment.SegmentWriter

    public MemoryJournal(SegmentStore store, NodeState head) {
        this.store = checkNotNull(store);
        this.parent = null;

        SegmentWriter writer = store.getWriter();
        RecordId id = writer.writeNode(head).getRecordId();
        writer.flush();

        this.base = id;
        this.head = id;
    }
View Full Code Here

Examples of org.apache.jackrabbit.oak.plugins.segment.SegmentWriter

    }

    @Override
    public synchronized boolean setHead(RecordId base, RecordId head) {
        if (checkNotNull(base).equals(this.head)) {
            SegmentWriter writer = store.getWriter();
            if (writer.getCurrentSegment(head.getSegmentId()) != null) {
                writer.flush();
            }
            this.head = checkNotNull(head);
            return true;
        } else {
            return false;
View Full Code Here

Examples of org.apache.jackrabbit.oak.plugins.segment.SegmentWriter

    }

    @Override
    public synchronized void merge() {
        if (parent != null) {
            SegmentWriter writer = store.getWriter();

            Segment segment = writer.getDummySegment();
            NodeState before = new SegmentNodeState(segment, base);
            NodeState after = new SegmentNodeState(segment, head);

            while (!parent.setHead(base, head)) {
                RecordId newBase = parent.getHead();
                NodeBuilder builder =
                        new SegmentNodeState(segment, newBase).builder();
                after.compareAgainstBaseState(before, new MergeDiff(builder));
                NodeState state = builder.getNodeState();

                RecordId newHead = writer.writeNode(state).getRecordId();

                base = newBase;
                head = newHead;
            }
View Full Code Here

Examples of org.apache.jackrabbit.oak.plugins.segment.SegmentWriter

        this.name = "root";

        DBObject id = new BasicDBObject("_id", "root");
        state = journals.findOne(id, null, primaryPreferred());
        if (state == null) {
            SegmentWriter writer = store.getWriter();
            RecordId headId = writer.writeNode(head).getRecordId();
            writer.flush();
            state = new BasicDBObject(of(
                    "_id""root",
                    "head", headId.toString()));
            try {
                journals.insert(state, concern);
View Full Code Here

Examples of org.apache.jackrabbit.oak.plugins.segment.SegmentWriter

        if (state.containsField("parent")) {
            RecordId base = RecordId.fromString(state.get("base").toString());
            RecordId head = RecordId.fromString(state.get("head").toString());

            SegmentWriter writer = store.getWriter();
            Segment segment = writer.getDummySegment();

            NodeState before = new SegmentNodeState(segment, base);
            NodeState after = new SegmentNodeState(segment, head);

            Journal parent = store.getJournal(state.get("parent").toString());
            while (!parent.setHead(base, head)) {
                RecordId newBase = parent.getHead();
                NodeBuilder builder =
                        new SegmentNodeState(segment, newBase).builder();
                after.compareAgainstBaseState(before, new MergeDiff(builder));
                RecordId newHead =
                        writer.writeNode(builder.getNodeState()).getRecordId();

                base = newBase;
                head = newHead;
            }
View Full Code Here

Examples of org.apache.jackrabbit.oak.plugins.segment.SegmentWriter

        this.name = "root";

        DBObject id = new BasicDBObject("_id", "root");
        state = journals.findOne(id, null, primaryPreferred());
        if (state == null) {
            SegmentWriter writer = store.getWriter();
            RecordId headId = writer.writeNode(head).getRecordId();
            writer.flush();
            state = new BasicDBObject(of(
                    "_id""root",
                    "head", headId.toString()));
            try {
                journals.insert(state, concern);
View Full Code Here

Examples of org.apache.jackrabbit.oak.plugins.segment.SegmentWriter

        if (state.containsField("parent")) {
            RecordId base = RecordId.fromString(state.get("base").toString());
            RecordId head = RecordId.fromString(state.get("head").toString());

            SegmentWriter writer = store.getWriter();
            Segment segment = writer.getDummySegment();

            NodeState before = new SegmentNodeState(segment, base);
            NodeState after = new SegmentNodeState(segment, head);

            Journal parent = store.getJournal(state.get("parent").toString());
            while (!parent.setHead(base, head)) {
                RecordId newBase = parent.getHead();
                NodeBuilder builder =
                        new SegmentNodeState(segment, newBase).builder();
                after.compareAgainstBaseState(before, new MergeDiff(builder));
                RecordId newHead =
                        writer.writeNode(builder.getNodeState()).getRecordId();

                base = newBase;
                head = newHead;
            }
View Full Code Here

Examples of org.apache.jackrabbit.oak.plugins.segment.SegmentWriter

        this.name = "root";

        DBObject id = new BasicDBObject("_id", "root");
        state = journals.findOne(id, null, primaryPreferred());
        if (state == null) {
            SegmentWriter writer = new SegmentWriter(store);
            RecordId head = writer.writeNode(root).getRecordId();
            writer.flush();
            state = new BasicDBObject(of(
                    "_id""root",
                    "head", head.toString()));
            try {
                journals.insert(state, concern);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.