Examples of Extractor


Examples of barrysoft.web.Extractor

public class ExtractionTest {
 
  @Test
  public void testExtract() {
   
    Extractor search = new Extractor();
   
    search.getParser().setName("ITSA Search");
   
    ParserRule inr = new ParserRule("(?i)(?s).*?<a href=\"([^<>]+)\">\\s[series]</a>.*?");
    inr.setQuickRule(" [series]<");
    inr.setGroupName("season link", 0);
    inr.addParam(new ParserRuleParam("series", "How I Met Your Mother"));
   
    search.getParser().addRule(inr);
    try {
      search.getDownloader().setUrl("http://www.italiansubs.net/index.php?option=com_remository");
    } catch (MalformedURLException e) {
      fail(e.getMessage());
    }
   
    Extractor season = new Extractor();
   
    season.getParser().setName("ITSA Season");
   
    ParserRule outr = new ParserRule("(?i)(?s).*?<a href=\"([^<>]+)\">\\s[season]</a>.*?");
    outr.setQuickRule(" [season]<");
    outr.setGroupName("episodes link", 0);
    //outr.addParam(new ParserRuleParam("season", "Stagione 2"));
   
    season.getParser().addRule(outr);
   
    Extractor episode = new Extractor();
   
    episode.getParser().setName("ITSA Episode");
   
    outr = new ParserRule("(?i)(?s).*?<a href=\"([^<>]+)\">[episode]</a>.*?");
    outr.setQuickRule("[episode]<");
    outr.setGroupName("subtitle link", 0);
    //outr.addParam(new ParserRuleParam("episode", "How I Met Your Mother 2x06"));
   
    episode.getParser().addRule(outr);
   
    ExtractionJob job = new ExtractionJob("Get ITSA subtitle link");
   
    ExtractionStep estep = new ExtractionStep("Get season's page", search, season);
    estep.connect("season link", ExtractionStep.URL_BINDING);
    job.addStep(estep);
   
    estep = new ExtractionStep("Get subtitle's page", season, episode);
    estep.connect("episodes link", ExtractionStep.URL_BINDING);
    job.addStep(estep);
   
    try {
      job.execute();
    } catch (IllegalStateException e) {
      fail(e.getMessage());
    } catch (MalformedURLException e) {
      fail(e.getMessage());
    } catch (IOException e) {
      fail(e.getMessage());
    }
   
    String[] link;
    try {
      link = episode.getParser().getRule(0).getResults("subtitle link");
      assertEquals(1, link.length);
      System.out.println(link[0]);
    } catch (IndexOutOfBoundsException e) {
      fail(e.getMessage());
    } catch (NoSuchElementException e) {
View Full Code Here

Examples of com.ontometrics.scraper.extraction.Extractor

  private URL baseUrl;

  private String sessionIDName;

  public Scraper() {
    this.extractor = new Extractor();
  }
View Full Code Here

Examples of de.jetwick.tw.Extractor

            int counter = 0;

            @Override
            public String getObject() {
                return new Extractor() {

                    @Override
                    public String createTagMarkup(String tag, String cleanTag) {
//                        String url = Helper.TSURL + cleanTag;
                        return "<a class=\"i-tw-link tw-tag\" "
View Full Code Here

Examples of de.jetwick.tw.Extractor

                        final JTweet tweet = (JTweet) item.getModelObject();
                        final Label label = new Label("tweet", new Model<String>() {

                            @Override
                            public String getObject() {
                                return new Extractor().setTweet(tweet).setText(tweet.getText()).run().toString();
//                                return tweet.getText();
                            }
                        });
                        label.setEscapeModelStrings(false);
                        item.add(label);
View Full Code Here

Examples of de.jetwick.tw.Extractor

                        tw.getFromUser().getScreenName()));

                item.add(new ExternalLink("statusLink",
                        Helper.toTwitterHref(tw.getFromUser().getScreenName(), tw.getTwitterId())));

                Label label = new Label("tweetText", new Extractor().setTweet(tw).run().toString());
                label.setEscapeModelStrings(false);
                item.add(label);
            }
        });
    }
View Full Code Here

Examples of de.jetwick.tw.Extractor

    public Collection<JTweet> findRetweets(Map<Long, JTweet> tweets, final Map<String, JUser> userMap) {
        // 1. check if tweets contains originals which were retweeted -> only done for 'tweets'
        // 2. check if tweets contains retweets -> done for 'tweets' and for tweets in solr

        final Set<JTweet> updatedTweets = new LinkedHashSet<JTweet>();
        Extractor extractor = new Extractor() {

            @Override
            public boolean onNewUser(int index, String user) {
                boolean isRetweet = index >= 3 && text.substring(index - 3, index).equalsIgnoreCase("rt ");
                if (isRetweet) {
                    user = user.toLowerCase();
                    JUser existingUser = userMap.get(user);
                    JTweet resTw = null;

                    // check ifRetweetOf against local tweets
                    if (existingUser != null)
                        for (JTweet tmp : existingUser.getOwnTweets()) {
                            if (tmp.getCreatedAt().getTime() < tweet.getCreatedAt().getTime()
                                    && tweet.isRetweetOf(tmp)) {
                                if (addReplyNoTricks(tmp, tweet)) {
                                    resTw = tmp;
                                    break;
                                }
                            }
                        }

                    // check ifRetweetOf against tweets existing in index
                    if (resTw == null)
                        resTw = connectToOrigTweet(tweet, user);

                    if (resTw != null) {
                        updatedTweets.add(resTw);
                        return false;
                    }
                }

                // break loop of Extractor because we only need the first user!
                return true;
            }
        };

        for (JTweet tw : tweets.values()) {
            if (tw.isRetweet()) {
                extractor.setTweet(tw).run();
            }
        }
        return updatedTweets;
    }
View Full Code Here

Examples of edu.stanford.nlp.ie.machinereading.Extractor

  public RelationExtractorAnnotator(Properties props){
    verbose = Boolean.parseBoolean(props.getProperty("sup.relation.verbose", "false"));
    String relationModel = props.getProperty("sup.relation.model", DefaultPaths.DEFAULT_SUP_RELATION_EX_RELATION_MODEL);
    try {
      Extractor entityExtractor = new RothEntityExtractor();
      BasicRelationExtractor relationExtractor = BasicRelationExtractor.load(relationModel);
     
      System.err.println("Loading relation model from " + relationModel);
      mr = MachineReading.makeMachineReadingForAnnotation(new RothCONLL04Reader(), entityExtractor, relationExtractor, null, null,
          null, true, verbose);
View Full Code Here

Examples of net.sf.cannagrower.data.hardware.Extractor

      culture=new Culture(repository);

      culture.getHardwares().store(new Room());
      culture.getHardwares().store(new Lamp());
      culture.getHardwares().store(new Intractor());
      culture.getHardwares().store(new Extractor());
      culture.getHardwares().store(new MediumSoil());
     
      plantation=new Plantation(culture);
     
      event=new net.sf.cannagrower.data.event.PlantsAdd();
View Full Code Here

Examples of org.apache.any23.extractor.Extractor

public class ExtractionExceptionTest {

    @Test
    public void testPrintStackTrace() throws ExtractionException, IOException {
        final String FAKE_EXTRACTOR_NAME = "fake-extractor-name";
        final Extractor extractor = mock(Extractor.class);
        final ExtractorDescription ed = mock(ExtractorDescription.class);
        when(ed.getExtractorName()).thenReturn(FAKE_EXTRACTOR_NAME);
        when(extractor.getDescription()).thenReturn(ed);

        final TripleHandler th = mock(TripleHandler.class);
        final ExtractionContext extractionContext = new ExtractionContext(
                extractor.getDescription().getExtractorName(),
                new URIImpl("http://fake.document.uri")
        );
        final ExtractionResult er = new ExtractionResultImpl(extractionContext, extractor, th);
        er.notifyIssue(IssueReport.IssueLevel.Fatal  , "Fake fatal error.", 1, 2);
        er.notifyIssue(IssueReport.IssueLevel.Error  , "Fake error."      , 3, 4);
View Full Code Here

Examples of org.apache.jmeter.extractor.Extractor

        }
    }

    private String process(String textToParse) {
        List<String> result = new ArrayList<String>();
        Extractor extractor = HtmlExtractor.getExtractorImpl(cssJqueryLabeledChoice.getText());
        final int nbFound = extractor.extract(
                cssJqueryField.getText(), attributeField.getText(), -1, textToParse, result, 0, null);

        // Construct a multi-line string with all matches
        StringBuilder sb = new StringBuilder();
        sb.append("Match count: ").append(nbFound).append("\n");
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.