Package org.archive.net

Examples of org.archive.net.UURI


            String build = getFormat();
            CharSequence dest = link.getUURI();
            String implied = extractImplied(dest, trigger, build);
            if (implied != null) {
                try {
                    UURI target = UURIFactory.getInstance(implied);
                    LinkContext lc = LinkContext.INFERRED_MISC;
                    Hop hop = Hop.INFERRED;
                    addOutlink(curi, target, lc, hop);
                    numberOfLinksExtracted.incrementAndGet();
View Full Code Here


     *
     * @return   a CrawlURI
     * @throws Exception   just in case
     */
    protected CrawlURI defaultURI() throws Exception {
        UURI uuri = UURIFactory.getInstance("http://www.archive.org/start/");
        return new CrawlURI(uuri, null, null, LinkContext.NAVLINK_MISC);
    }
View Full Code Here

    }
   
   
    private void addLink(CrawlURI curi, String hyperlink) {
        try {
            UURI dest = UURIFactory.getInstance(curi.getUURI(), hyperlink);
            LinkContext lc = LinkContext.NAVLINK_MISC;
            addOutlink(curi, hyperlink, lc, Hop.NAVLINK);
        } catch (URIException e1) {
            logUriError(e1, curi.getUURI(), hyperlink);
        }
View Full Code Here

    /* (non-Javadoc)
     * @see java.util.logging.Formatter#format(java.util.logging.LogRecord)
     */
    public String format(LogRecord lr) {
        UURI uuri = (UURI) lr.getParameters()[0];
        String problem = (String) lr.getParameters()[1];

        return ArchiveUtils.getLog17Date()
        + " "
        + ( (uuri ==null) ? "n/a" : uuri.toString() )
        + " \""
        + lr.getMessage()
        + "\" "
        + problem
        + "\n";
View Full Code Here

        for (ArchiveRecordHeader arh: headers) {
            // ignore 'filedesc:' record
            if(arh.getUrl().startsWith("filedesc:")) {
                continue;
            }
            UURI uuri = UURIFactory.getInstance(arh.getUrl());
            String path = uuri.getPath();
            if (path.startsWith("/")) {
                path = path.substring(1);
            }
            if (arh.getUrl().startsWith("http:")) {
                result.add(path);
View Full Code Here

        }
        return linkSet;
    }
    private CrawlURI createTestUri(String urlStr, String resourceFileName) throws URIException,
    UnsupportedEncodingException, IOException {
        UURI testUuri = UURIFactory.getInstance(urlStr);
        CrawlURI testUri = new CrawlURI(testUuri, null, null, LinkContext.NAVLINK_MISC);
       

        File temp = File.createTempFile("test", ".tmp");
        Recorder recorder = new Recorder(temp, 1024, 1024);
View Full Code Here

        // all whois urls in the same queue
        if (curi.getUURI().getScheme().equals("whois")) {
            return "whois...";
        }
       
        UURI basis = curi.getPolicyBasisUURI();
        String candidate = getCoreKey(basis);
       
        if(StringUtils.isEmpty(candidate)) {
            return DEFAULT_CLASS_KEY;
        }
View Full Code Here

        return (ExtractorYoutubeFormatStream)extractor;
    }

    private CrawlURI createTestUri(String urlStr, String resourceFileName) throws URIException,
    UnsupportedEncodingException, IOException {
        UURI testUuri = UURIFactory.getInstance(urlStr);
        CrawlURI testUri = new CrawlURI(testUuri, null, null, LinkContext.NAVLINK_MISC);

        InputStream is = ExtractorYoutubeFormatStreamTest.class.getClassLoader().getResourceAsStream(resourceFileName);
        BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
        StringBuilder content = new StringBuilder();
View Full Code Here

TOP

Related Classes of org.archive.net.UURI

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.