Examples of Extractor

barrysoft.web.Extractor
com.ontometrics.scraper.extraction.Extractor
Does the work of iteratively extracting portions of the page.
Note that once the requested elements are specified, this class is held in the scraper and can be successively invoked for jobs that involve iteration (e.g. paging). @author Rob
de.jetwick.tw.Extractor
This class extracts links, users and hashtags of one tweet. Used for UI to render links, users and hashtags but also for indexing to detect users in retweets. @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
edu.stanford.nlp.ie.machinereading.Extractor
net.sf.cannagrower.data.hardware.Extractor
org.apache.any23.extractor.Extractor
It defines the signature of a generic Extractor. @param < Input> the type of the input data to be processed.
org.apache.jmeter.extractor.Extractor
CSS/JQuery based extractor for HTML pages @since 2.9
org.apache.slide.extractor.Extractor
The Extractor interface
org.apache.sqoop.job.etl.Extractor
This allows connector to extract data from a source system based on each partition.
org.drools.spi.Extractor
org.graylog2.plugin.inputs.Extractor
org.graylog2.restclient.models.Extractor
@author Lennart Koopmann
org.restlet.routing.Extractor
Filter extracting attributes from a call. Multiple extractions can be defined, based on the query string of the resource reference, on the request form (ex: posted from a browser) or on cookies.

Concurrency note: instances of this class or its subclasses can be invoked by several threads at the same time and therefore must be thread-safe. You should be especially careful when storing state in member variables. @author Jerome Louvel
org.semanticdesktop.aperture.extractor.Extractor
slash.navigation.download.actions.Extractor
Extracts a {@link Download} to a target directory. @author Christian Pesch

Examples of barrysoft.web.Extractor

public class ExtractionTest {
  
  @Test
  public void testExtract() {
    
    Extractor search = new Extractor();
    
    search.getParser().setName("ITSA Search");
    
    ParserRule inr = new ParserRule("(?i)(?s).*?<a href=\"([^<>]+)\">\\s[series]</a>.*?");
    inr.setQuickRule(" [series]<");
    inr.setGroupName("season link", 0);
    inr.addParam(new ParserRuleParam("series", "How I Met Your Mother"));
    
    search.getParser().addRule(inr);
    try {
      search.getDownloader().setUrl("http://www.italiansubs.net/index.php?option=com_remository");
    } catch (MalformedURLException e) {
      fail(e.getMessage());
    }
    
    Extractor season = new Extractor();
    
    season.getParser().setName("ITSA Season");
    
    ParserRule outr = new ParserRule("(?i)(?s).*?<a href=\"([^<>]+)\">\\s[season]</a>.*?");
    outr.setQuickRule(" [season]<");
    outr.setGroupName("episodes link", 0);
    //outr.addParam(new ParserRuleParam("season", "Stagione 2"));
    
    season.getParser().addRule(outr);
    
    Extractor episode = new Extractor();
    
    episode.getParser().setName("ITSA Episode");
    
    outr = new ParserRule("(?i)(?s).*?<a href=\"([^<>]+)\">[episode]</a>.*?");
    outr.setQuickRule("[episode]<");
    outr.setGroupName("subtitle link", 0);
    //outr.addParam(new ParserRuleParam("episode", "How I Met Your Mother 2x06"));
    
    episode.getParser().addRule(outr);
    
    ExtractionJob job = new ExtractionJob("Get ITSA subtitle link");
    
    ExtractionStep estep = new ExtractionStep("Get season's page", search, season);
    estep.connect("season link", ExtractionStep.URL_BINDING);
    job.addStep(estep);
    
    estep = new ExtractionStep("Get subtitle's page", season, episode);
    estep.connect("episodes link", ExtractionStep.URL_BINDING);
    job.addStep(estep);
    
    try {
      job.execute();
    } catch (IllegalStateException e) {
      fail(e.getMessage());
    } catch (MalformedURLException e) {
      fail(e.getMessage());
    } catch (IOException e) {
      fail(e.getMessage());
    }
    
    String[] link;
    try {
      link = episode.getParser().getRule(0).getResults("subtitle link");
      assertEquals(1, link.length);
      System.out.println(link[0]);
    } catch (IndexOutOfBoundsException e) {
      fail(e.getMessage());
    } catch (NoSuchElementException e) {

View Full Code Here

Examples of com.ontometrics.scraper.extraction.Extractor

  private URL baseUrl;


  private String sessionIDName;


  public Scraper() {
    this.extractor = new Extractor();
  }

View Full Code Here

Examples of de.jetwick.tw.Extractor


            int counter = 0;


            @Override
            public String getObject() {
                return new Extractor() {


                    @Override
                    public String createTagMarkup(String tag, String cleanTag) {
//                        String url = Helper.TSURL + cleanTag;
                        return "<a class=\"i-tw-link tw-tag\" "

View Full Code Here

Examples of de.jetwick.tw.Extractor

                        final JTweet tweet = (JTweet) item.getModelObject();
                        final Label label = new Label("tweet", new Model<String>() {


                            @Override
                            public String getObject() {
                                return new Extractor().setTweet(tweet).setText(tweet.getText()).run().toString();
//                                return tweet.getText();
                            }
                        });
                        label.setEscapeModelStrings(false);
                        item.add(label);

View Full Code Here

Examples of de.jetwick.tw.Extractor

                        tw.getFromUser().getScreenName()));


                item.add(new ExternalLink("statusLink",
                        Helper.toTwitterHref(tw.getFromUser().getScreenName(), tw.getTwitterId())));


                Label label = new Label("tweetText", new Extractor().setTweet(tw).run().toString());
                label.setEscapeModelStrings(false);
                item.add(label);
            }
        });
    }

View Full Code Here

Examples of de.jetwick.tw.Extractor

    public Collection<JTweet> findRetweets(Map<Long, JTweet> tweets, final Map<String, JUser> userMap) {
        // 1. check if tweets contains originals which were retweeted -> only done for 'tweets'
        // 2. check if tweets contains retweets -> done for 'tweets' and for tweets in solr


        final Set<JTweet> updatedTweets = new LinkedHashSet<JTweet>();
        Extractor extractor = new Extractor() {


            @Override
            public boolean onNewUser(int index, String user) {
                boolean isRetweet = index >= 3 && text.substring(index - 3, index).equalsIgnoreCase("rt ");
                if (isRetweet) {
                    user = user.toLowerCase();
                    JUser existingUser = userMap.get(user);
                    JTweet resTw = null;


                    // check ifRetweetOf against local tweets
                    if (existingUser != null)
                        for (JTweet tmp : existingUser.getOwnTweets()) {
                            if (tmp.getCreatedAt().getTime() < tweet.getCreatedAt().getTime()
                                    && tweet.isRetweetOf(tmp)) {
                                if (addReplyNoTricks(tmp, tweet)) {
                                    resTw = tmp;
                                    break;
                                }
                            }
                        }


                    // check ifRetweetOf against tweets existing in index
                    if (resTw == null)
                        resTw = connectToOrigTweet(tweet, user);


                    if (resTw != null) {
                        updatedTweets.add(resTw);
                        return false;
                    }
                }


                // break loop of Extractor because we only need the first user!
                return true;
            }
        };


        for (JTweet tw : tweets.values()) {
            if (tw.isRetweet()) {
                extractor.setTweet(tw).run();
            }
        }
        return updatedTweets;
    }

View Full Code Here

Examples of edu.stanford.nlp.ie.machinereading.Extractor


  public RelationExtractorAnnotator(Properties props){
    verbose = Boolean.parseBoolean(props.getProperty("sup.relation.verbose", "false"));
    String relationModel = props.getProperty("sup.relation.model", DefaultPaths.DEFAULT_SUP_RELATION_EX_RELATION_MODEL);
    try {
      Extractor entityExtractor = new RothEntityExtractor();
      BasicRelationExtractor relationExtractor = BasicRelationExtractor.load(relationModel);
      
      System.err.println("Loading relation model from " + relationModel);
      mr = MachineReading.makeMachineReadingForAnnotation(new RothCONLL04Reader(), entityExtractor, relationExtractor, null, null,
          null, true, verbose);

View Full Code Here

Examples of net.sf.cannagrower.data.hardware.Extractor

      culture=new Culture(repository);


      culture.getHardwares().store(new Room());
      culture.getHardwares().store(new Lamp());
      culture.getHardwares().store(new Intractor());
      culture.getHardwares().store(new Extractor());
      culture.getHardwares().store(new MediumSoil());
      
      plantation=new Plantation(culture);
      
      event=new net.sf.cannagrower.data.event.PlantsAdd();

View Full Code Here

Examples of org.apache.any23.extractor.Extractor

public class ExtractionExceptionTest {


    @Test
    public void testPrintStackTrace() throws ExtractionException, IOException {
        final String FAKE_EXTRACTOR_NAME = "fake-extractor-name";
        final Extractor extractor = mock(Extractor.class);
        final ExtractorDescription ed = mock(ExtractorDescription.class);
        when(ed.getExtractorName()).thenReturn(FAKE_EXTRACTOR_NAME);
        when(extractor.getDescription()).thenReturn(ed);


        final TripleHandler th = mock(TripleHandler.class);
        final ExtractionContext extractionContext = new ExtractionContext(
                extractor.getDescription().getExtractorName(),
                new URIImpl("http://fake.document.uri")
        );
        final ExtractionResult er = new ExtractionResultImpl(extractionContext, extractor, th);
        er.notifyIssue(IssueReport.IssueLevel.Fatal  , "Fake fatal error.", 1, 2);
        er.notifyIssue(IssueReport.IssueLevel.Error  , "Fake error."      , 3, 4);

View Full Code Here

Examples of org.apache.jmeter.extractor.Extractor

        }
    }


    private String process(String textToParse) {
        List<String> result = new ArrayList<String>();
        Extractor extractor = HtmlExtractor.getExtractorImpl(cssJqueryLabeledChoice.getText());
        final int nbFound = extractor.extract(
                cssJqueryField.getText(), attributeField.getText(), -1, textToParse, result, 0, null);


        // Construct a multi-line string with all matches
        StringBuilder sb = new StringBuilder();
        sb.append("Match count: ").append(nbFound).append("\n");

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.