Package it.cnr.isti.hpc.wikipedia.article

Examples of it.cnr.isti.hpc.wikipedia.article.Article


  }
 
 
  @Test
  public void testNotRedirect() throws IOException {
    Article a = new Article();
    String mediawiki = IOUtils.getFileAsUTF8String("./src/test/resources/en/liberalism.txt");
    parser.parse(a, mediawiki);
    System.out.println(a.getRedirect());
    assertTrue(! a.isRedirect());
   
   
  }
View Full Code Here


        return;
      }
      if (page.isMain())
        type = Type.ARTICLE;

      Article article = new Article();
      article.setTitle(title);
      article.setWikiId(Integer.parseInt(id));
      article.setNamespace(namespace);
      article.setIntegerNamespace(integerNamespace);
      article.setTimestamp(timestamp);
      article.setType(type);
      parser.parse(article, page.getText());

      try {
        out.write(article.toJson());
        out.write("\n");
      } catch (IOException e) {
        logger.error("writing the output file {}", e.toString());
        System.exit(-1);
      }
View Full Code Here

  public void testParsing() throws UnsupportedEncodingException, FileNotFoundException, IOException, SAXException {
    URL u = this.getClass().getResource("/en/mercedes.xml");
    WikipediaArticleReader wap = new WikipediaArticleReader(u.getFile(),"/tmp/mercedes.json.gz", Language.EN);
    wap.start();
    String json = IOUtils.getFileAsUTF8String("/tmp/mercedes.json.gz");
    Article a = Article.fromJson(json);
    assertTrue(a.getCleanText().startsWith("Mercedes-Benz"));
    assertEquals(15, a.getCategories().size());
   
   
  }
View Full Code Here

  }

 
  @Test
  public void testInfobox() throws IOException {
    Article articleWithInfobox = new Article();

    String text = readFileAsString("/it/xml-dump/article-with-infobox.txt");
    articleParser.parse(articleWithInfobox, text);
   
    assertTrue(articleWithInfobox.hasInfobox());
    Template infobox = articleWithInfobox.getInfobox();
    assertEquals(12,infobox.getSchema().size());
    assertEquals("Infobox_fiume", infobox.getName());
    assertEquals("Adige", infobox.get("nome"));
    assertEquals("12200", infobox.get("bacino"));
   
View Full Code Here

  }

  @Test
  public void table() throws IOException {
    Article articleWithTable = new Article();
    String text = readFileAsString("/it/xml-dump/table.txt");
    articleParser.parse(articleWithTable, text);
    assertEquals("Nome italiano", articleWithTable.getTables().get(0)
        .getColumn(1).get(0));
    assertEquals("15 agosto", articleWithTable.getTables().get(0)
        .getColumn(0).get(10));

  }
View Full Code Here

  @Test
  public void list() throws IOException {

    String text = readFileAsString("/it/xml-dump/list.txt");
    Article articleWithList = new Article();
    articleParser.parse(articleWithList, text);
    List<String> list = articleWithList.getLists().get(2);
    assertEquals("Antropologia culturale e Antropologia dei simboli", list.get(0));
   
  }
View Full Code Here

  ArticleParser parser = new ArticleParser(Language.EN);
 
 
  @Test
  public void testParsing() throws IOException {
    Article a = new Article();
    String mediawiki = IOUtils.getFileAsUTF8String("./src/test/resources/en/article.txt");
    parser.parse(a, mediawiki);
    assertTrue("Wrong parsed text",a.getCleanText().trim().startsWith("Albedo (), or reflection coefficient, is the diffuse reflectivity or reflecting power of a surface."));
    assertEquals(5, a.getCategories().size());
    assertEquals(7,a.getSections().size());
    assertEquals(74,a.getLinks().size());
   
  }
View Full Code Here

 

 
  @Test
  public void testMercedes() throws IOException {
    Article a = new Article();
    String mediawiki = IOUtils.getFileAsUTF8String("./src/test/resources/en/mercedes.txt");
    parser.parse(a, mediawiki);
    assertTrue(a.getCleanText().startsWith("Mercedes-Benz"));
    assertEquals(15, a.getCategories().size());
   
  }
View Full Code Here

  }
 
 
  @Test
  public void testDisambiguation() throws IOException {
    Article a = new Article();
    String mediawiki = IOUtils.getFileAsUTF8String("./src/test/resources/en/hdis.txt");
    parser.parse(a, mediawiki);
    assertTrue(a.isDisambiguation());
   
  }
View Full Code Here

TOP

Related Classes of it.cnr.isti.hpc.wikipedia.article.Article

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.