Package com.findwise.hydra.local

Examples of com.findwise.hydra.local.LocalDocument


  @Test
  public void testMetadataExtractionCanBeDisabled() throws Exception {
    TikaStage stage = new TikaStage();
    stage.setAddMetaData(false);
    LocalDocument doc = buildDocumentWithResourceFile("/test.pdf");
    stage.process(doc);
    assertFalse("Document should not contain metadata", doc.hasContentField("test_pdf_Author"));
  }
View Full Code Here


  @Test
  public void testLanguageDetectionCanBeDisabled() throws Exception {
    TikaStage stage = new TikaStage();
    stage.setAddLanguage(false);
    LocalDocument doc = buildDocumentWithResourceFile("/test.pdf");
    stage.process(doc);
    assertFalse("Document should not contain language", doc.hasContentField("test_pdf_language"));
  }
View Full Code Here

   * Creates a LocalDocument containing an attached DocumentFile whose contents are taken from
   * the given resource.
   *
   */
  private LocalDocument buildDocumentWithResourceFile(String resourcePath) throws IOException, URISyntaxException {
    LocalDocument doc = new LocalDocument();
    doc.setDocumentFileRepository(buildDocumentFileRepositoryWithResource(resourcePath));
    return doc;
  }
View Full Code Here

        + "groovyrunner" + IOUtils.DIR_SEPARATOR
        + "FieldAddingGroovyStage.groovy");
    runner.setGroovyScript(script);
    runner.init();

    LocalDocument doc = new LocalDocument();
    runner.process(doc);
    String actualValue = (String) doc.getContentField("fieldName");
    assertEquals("value", actualValue);
  }
View Full Code Here

  public void postDocuments(int numberToPost) throws JsonException, IOException, HttpException, URISyntaxException {
    RemotePipeline rp = new HttpRemotePipeline("insertStage");
    for(int i=0; i<numberToPost; i++) {
      rp.saveFull(LocalDocumentFactory.getRandomStringDocument("in", "id"));
      RemotePipeline rp2 = new HttpRemotePipeline("fileAdder");
      LocalDocument ld = rp2.getDocument(new LocalQuery());
      File f = getFile();
      FileInputStream fis = new FileInputStream(f);
      DocumentFile<Local> df = new DocumentFile<Local>(ld.getID(), f.getName(), fis);
      df.setEncoding(new InputStreamReader(df.getStream()).getEncoding());
      df.setMimetype("application/msword");
      rp2.saveFile(df);
    }
  }
View Full Code Here

   
    stage.setUrlField("url");
    stage.setContentField("out");
    stage.setMetadatPrefix("meta_");

    doc = new LocalDocument();
  }
View Full Code Here

  @Before
  public void init() {
    stage = new SimpleFetchingTikaStage();
    stage.setUrlFieldPattern(pattern);

    doc = new LocalDocument();
  }
View Full Code Here

   * Usage: key:value is added here;key2:second value here;key3:Third value here [...]
   * @param args
   */
  public static void main(String[] args) {
    RemotePipeline rp1 = new HttpRemotePipeline("127.0.0.1", 12001, "StdinInputNode");
    LocalDocument ld = new LocalDocument();
   
    for (String tuple : StringUtils.join(args, " ").split(";")) {
      String[] keyValue = tuple.split(":");
      if (keyValue.length != 2) {
        logger.error("Wrong input format. Format is 'key:value;key2:value2 [...]'");
        logger.error("Your data was not added");
        return;
      }
      ld.putContentField(keyValue[0], keyValue[1]);
    }
   
    try {
      if (rp1.saveFull(ld)) {
        logger.info("Document added");
View Full Code Here

public class LocalDocumentFactory {
  private static Random random = new Random();
 
  public static LocalDocument getRandomDocument(String ... wantedFields) {
    LocalDocument ld = new LocalDocument();
    ld.setAction(Action.ADD);
   
    for(String field : wantedFields) {
      ld.putContentField(field, getRandomValue());
    }
   
    return ld;
  }
View Full Code Here

   
    return ld;
  }
 
  public static LocalDocument getRandomStringDocument(String ... wantedFields) {
    LocalDocument ld = new LocalDocument();
    ld.setAction(Action.ADD);
   
    for(String field : wantedFields) {
      ld.putContentField(field, getRandomStringValue());
    }
   
    return ld;
  }
View Full Code Here

TOP

Related Classes of com.findwise.hydra.local.LocalDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.