Package org.archive.access.nutch.parse

Source Code of org.archive.access.nutch.parse.MetadataOnlyParser

/* MetadataOnlyParser
*
* $Id: MetadataOnlyParser.java 1896 2007-08-01 21:44:31Z jlee-archive $
*
* Copyright (C) 2005 Internet Archive.
*
* This file is part of the archive-access tools project
* (http://sourceforge.net/projects/archive-access).
*
* The archive-access tools are free software; you can redistribute them and/or
* modify them under the terms of the GNU Lesser Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or any
* later version.
*
* The archive-access tools are distributed in the hope that they will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
* Public License for more details.
*
* You should have received a copy of the GNU Lesser Public License along with
* the archive-access tools; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.archive.access.nutch.parse;

import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.parse.ParseImpl;
import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.parse.Parser;
import org.apache.nutch.protocol.Content;

/**
* Parser that gets metadata only from passed resource.
*
* @author St.Ack
*/
public class MetadataOnlyParser implements Parser
{
  private Configuration conf;
   
  public Parse getParse(Content content)
  {
//    return new ParseImpl(content.getUrl(),  TODO MC BUG - don't index url as content
    return new ParseImpl("",
      new ParseData(ParseStatus.STATUS_SUCCESS,
      "", new Outlink[0], content.getMetadata()));
  }

  public Configuration getConf()
  {
    return this.conf;
  }

  public void setConf(Configuration conf)
  {
    this.conf = conf;
  }
}
TOP

Related Classes of org.archive.access.nutch.parse.MetadataOnlyParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.