Examples of cascading.tuple.TupleEntry

cascading.tuple.TupleEntry
Class TupleEntry allows a {@link Tuple} instance and its declaring {@link Fields} instance to be used as a single object.
Once a TupleEntry is created, its Fields cannot be changed, but the Tuple instance it holds can be replaced or modified. The managed Tuple should not have elements added or removed, as this will break the relationship with the associated Fields instance.
If type information is provided on the Fields instance, all setters on this class will use that information to coerce the given object to the expected type.
For example, if position is is of type {@code long}, then {@code entry.setString(0, "9" )} will coerce the "9" to along {@code 9}. Thus, {@code entry.getObject(0) == 9l}.
No coercion is performed with the {@link #getObject(Comparable)} and {@link #getObject(int)} methods.
To set a value without coercion, see the {@link #setRaw(Comparable,Object)} and {@link #setRaw(int,Object)}methods. @see Fields @see Tuple

    }
  }
  
  @Override
  public void operate(FlowProcess process, FunctionCall<NullContext> functionCall) {
        TupleEntry arguments = functionCall.getArguments();
        FetchedDatum fetchedDatum = new FetchedDatum(arguments.getTuple());


        if (fetchedDatum.getContentType().startsWith("text/html")) {
          init();


          Metadata metadata = new Metadata();

View Full Code Here

        
        // Test for all valid fetches.
        Tap validate = platform.makeTap(platform.makeBinaryScheme(StatusDatum.FIELDS), statusPath);
        TupleEntryIterator tupleEntryIterator = validate.openForRead(platform.makeFlowProcess());
        while (tupleEntryIterator.hasNext()) {
            TupleEntry entry = tupleEntryIterator.next();
            StatusDatum sd = new StatusDatum(entry);
            if (sd.getStatus() != UrlStatus.FETCHED) {
                LOGGER.error(String.format("Fetched failed! Status is %s for %s", sd.getStatus(), sd.getUrl()));
                BaseFetchException e = sd.getException();
                if (e != null) {

View Full Code Here

        // Test for 10 good fetches.
        Tap validate = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), contentPath);
        TupleEntryIterator tupleEntryIterator = validate.openForRead(platform.makeFlowProcess());
        int fetchedPages = 0;
        while (tupleEntryIterator.hasNext()) {
            TupleEntry entry = tupleEntryIterator.next();
            new FetchedDatum(entry);
            fetchedPages += 1;
        }


        Assert.assertEquals(10, fetchedPages);

View Full Code Here

        float linkScore = 0;


        String url = null;


        while (iter.hasNext()) {
            TupleEntry entry = iter.next();
            
            boolean isCrawlDatum = entry.getString(CRAWLDBDATUM_URL_FIELD) != null;
            boolean isStatus = entry.getString(STATUSDATUM_URL_FIELD) != null;
            boolean isAnalyzed = entry.getString(ANALYZEDDATUM_URL_FIELD) != null;
            if (isCrawlDatum) {
               Tuple crawlDbTuple = TupleEntry.select(CrawlDbDatum.FIELDS, entry);
               crawlDbDatum = new CrawlDbDatum(crawlDbTuple);
               url = crawlDbDatum.getUrl();
            }
            
            if (isStatus) {
                statusDatum = new StatusDatum(entry);
                url = statusDatum.getUrl();
            }


            if (isAnalyzed) {
                Tuple analyzedTuple = TupleEntry.select(AnalyzedDatum.FIELDS, entry);
                analyzedDatum = new AnalyzedDatum(analyzedTuple);
                url = analyzedDatum.getUrl();
            }


            // we could have either status + link or just link tuple entry
            if (entry.getString(new Fields(LinkDatum.URL_FN)) != null) {
                LinkDatum linkDatum = new LinkDatum(TupleEntry.select(LinkDatum.FIELDS, entry));
                
                pageScore = linkDatum.getPageScore();
                // Add up the link scores
                linkScore += linkDatum.getLinkScore();

View Full Code Here

  @Test
  public void testSplitterWithNonMbox() {
    MboxSplitterFunction splitter = new MboxSplitterFunction();
    
    FetchedDatum datum = new FetchedDatum("baseUrl", "redirectedUrl", 0, new HttpHeaders(), new ContentBytes(), "text/ascii", 0);
    TupleEntry value = new TupleEntry(datum.getTupleEntry());
    
    when(_funcCall.getArguments()).thenReturn(value);
    splitter.operate(_process, _funcCall);
    
    verify(_collector).add(value);

View Full Code Here

    MboxSplitterFunction splitter = new MboxSplitterFunction();


    final String mboxString = "From 1\r\rContent 1\r\rFrom 2\r\rContent 2";
    byte[] mboxContent = mboxString.getBytes("us-ascii");
    FetchedDatum datum = new FetchedDatum("baseUrl", "redirectedUrl", 0, new HttpHeaders(), new ContentBytes(mboxContent), "application/mbox", 0);
    TupleEntry value = new TupleEntry(datum.getTupleEntry());
    
    when(_funcCall.getArguments()).thenReturn(value);
    splitter.operate(_process, _funcCall);


    verify(_collector, times(2)).add(any(TupleEntry.class));

View Full Code Here

  @Override
  public void operate(FlowProcess process, FunctionCall<NullContext> functionCall) {
    init();
    
    // On input we have a FetchedDatum that holds a single email.
        TupleEntry arguments = functionCall.getArguments();
        FetchedDatum fetchedDatum = new FetchedDatum(arguments.getTuple());
        
        // Now, if the FetchedDatum mime-type is application/mbox, we want to parse it and
        // output the results
        if (fetchedDatum.getContentType().equals("application/mbox")) {
          Metadata metadata = new Metadata();

View Full Code Here


    HashSet<String> names = new HashSet<String>();
        
    Iterator<TupleEntry> iter = bufferCall.getArgumentsIterator();
    while (iter.hasNext()) {
      TupleEntry entry = iter.next();
      
      double score = entry.getDouble(FieldNames.SCORE);
      summedScore += score;
      
      String name = entry.getString(FieldNames.EMAIL_NAME);
      if (name != null) {
        name = name.trim();
        if (name.startsWith("\"") && name.endsWith("\"")) {
          name = name.substring(1, name.length() - 1);
        }

View Full Code Here

    String name = null;
    double score = 0.0;
    
    Iterator<TupleEntry> iter = bufferCall.getArgumentsIterator();
    while (iter.hasNext()) {
      TupleEntry entry = iter.next();
      
      String possibleEmail = entry.getString(FieldNames.EMAIL_ADDRESS);
      if (possibleEmail != null) {
        if (email != null) {
          if (!email.equals(possibleEmail)) {
            LOGGER.warn(String.format("Duplicate entry found for email addresses (%s - %s | %s)",
                entry.getString(FieldNames.MESSAGE_ID), email, possibleEmail));
          } else {
            // We occasionally get dup message ids because a msg gets archived twice. 
            // FUTURE KKr - ignore duplicate message ids. Since they typically occur in order, we
            // could do it in our parse handling code.
          }
        } else {
          email = possibleEmail;
          name = entry.getString(FieldNames.EMAIL_NAME);
        }
      }
      
      score += entry.getDouble(FieldNames.SCORE);
    }
    
    if (email != null) {
      bufferCall.getOutputCollector().add(new Tuple(email, name, score));
    }

View Full Code Here

        boolean allOK = false;
        int verifiedCnt = 0;
        Tap sourceTap = platform.makeTap(platform.makeTextScheme(), dataPath);
        TupleEntryIterator tupleEntryIterator = sourceTap.openForRead(platform.makeFlowProcess());
        while (tupleEntryIterator.hasNext()) {
          TupleEntry next = tupleEntryIterator.next();
          String line = next.getString("line");
          String[] split = line.split("\t");
          if (split[0].equals(PAGE1_URL)) {
              allOK |= split[3].equals(PAGE1_SCORE);
              verifiedCnt++;
          } else if (split[0].equals(PAGE2_URL)) {

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of cascading.tuple.TupleEntry

bixo.examples.crawl.DemoStatusTool

bixo.fetcher.FetcherTest

bixo.operations.FetchBuffer$QueuedValues

bixo.operations.FilterAndScoreByUrlAndRobotsTest

bixo.pipes.AbstractFetchPipeTest

cascading.CascadingTestCase

cascading.DistanceUseCasePlatformTest

cascading.flow.stream.element.OperatorStage

cascading.function.FunctionPlatformTest

cascading.lingual.jdbc.JDBCPlatformTestCase

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.