Package org.apache.nutch.metadata

Examples of org.apache.nutch.metadata.Metadata.names()


          return;
        }
        byte[] data = c.getContent();
        LOG.debug("-data len=" + data.length);
        Metadata meta = c.getMetadata();
        String[] names = meta.names();
        LOG.debug("- " + names.length + " meta");
        for (int i = 0; i < names.length; i++) {
          boolean my = true;
          char ch = names[i].charAt(0);
          if (Character.isLetter(ch) && Character.isUpperCase(ch)) {
View Full Code Here


    Parse parse = parseResult.get(content.getUrl());
    Metadata metadata = parse.getData().getParseMeta();

    // check in the metadata first : the tika-parser
    // might have stored the values there already
    for (String mdName : metadata.names()) {
      addIndexedMetatags(metadata, mdName, metadata.getValues(mdName));
    }

    Metadata generalMetaTags = metaTags.getGeneralTags();
    for (String tagName : generalMetaTags.names()) {
View Full Code Here

    for (String mdName : metadata.names()) {
      addIndexedMetatags(metadata, mdName, metadata.getValues(mdName));
    }

    Metadata generalMetaTags = metaTags.getGeneralTags();
    for (String tagName : generalMetaTags.names()) {
      addIndexedMetatags(metadata, tagName, generalMetaTags.getValues(tagName));
    }

    Properties httpequiv = metaTags.getHttpEquivTags();
    for (Enumeration<?> tagNames = httpequiv.propertyNames(); tagNames
View Full Code Here

                            currRec.origurl = value.getUrl();
                            currRec.newurl = value.getBaseUrl();
                            currRec.content = value.getContent();
                            Metadata metadata = value.getMetadata();
                            currRec.header = new HashMap<String,String>();
                            for (String name : metadata.names()) {
                                String data = metadata.get(name);
                                currRec.header.put(name.toLowerCase(),data);
                            }
                            currRec.header.remove("nutch.content.digest");
                            currRec.header.remove("nutch.crawl.score");
View Full Code Here

    Metadata metadata = parse.getData().getParseMeta();

    // check in the metadata first : the tika-parser
    // might have stored the values there already

    for (String mdName : metadata.names()) {
      String value = metadata.get(mdName);
      // check whether the name is in the list of what we want or if
      // specified *
      if (metatagset.contains("*") || metatagset.contains(mdName.toLowerCase())) {
        LOG.debug("Found meta tag : " + mdName + "\t" + value);
View Full Code Here

        metadata.add("metatag." + mdName.toLowerCase(), value);
      }
    }

    Metadata generalMetaTags = metaTags.getGeneralTags();
    for (String tagName : generalMetaTags.names() ) {
    String[] tagValues = generalMetaTags.getValues(tagName);   
 
      for ( String tagValue : tagValues ) {
      // check whether the name is in the list of what we want or if
      // specified *
 
View Full Code Here

  /** Test for <code>names</code> method */
  public void testNames() {
    String[] names = null;
    Metadata meta = new Metadata();
    names = meta.names();
    assertEquals(0, names.length);
   
    meta.add("name-one", "value");
    names = meta.names();
    assertEquals(1, names.length);
View Full Code Here

    Metadata meta = new Metadata();
    names = meta.names();
    assertEquals(0, names.length);
   
    meta.add("name-one", "value");
    names = meta.names();
    assertEquals(1, names.length);
    assertEquals("name-one", names[0]);
    meta.add("name-two", "value");
    names = meta.names();
    assertEquals(2, names.length);
View Full Code Here

    meta.add("name-one", "value");
    names = meta.names();
    assertEquals(1, names.length);
    assertEquals("name-one", names[0]);
    meta.add("name-two", "value");
    names = meta.names();
    assertEquals(2, names.length);
  }
 
  /** Test for <code>remove(String)</code> method */
  public void testRemove() {
View Full Code Here

          return;
        }
        byte[] data = c.getContent();
        LOG.debug("-data len=" + data.length);
        Metadata meta = c.getMetadata();
        String[] names = meta.names();
        LOG.debug("- " + names.length + " meta");
        for (int i = 0; i < names.length; i++) {
          boolean my = true;
          char ch = names[i].charAt(0);
          if (Character.isLetter(ch) && Character.isUpperCase(ch)) {
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.