Examples of net.yacy.document.Document

net.yacy.document.Document

            } else {
                parseUsingJava(sourceFile,outputFile);
            }
            
            // return result
            final Document[] docs = new Document[]{new Document(
                    location, // url
                    mimeType, // mime
                    "UTF-8",  // charset
                    this,
                    null,     // languages

View Full Code Here

                
                /*
                 * create the plasmaParserDocument for the database
                 * and set shortText and bodyText properly
                 */
                return new Document[]{new Document(
                        location,
                        mimeType,
                        "UTF-8",
                        this,
                        null,

View Full Code Here

        this.SUPPORTED_EXTENSIONS.add("7z");
        this.SUPPORTED_MIME_TYPES.add("application/x-7z-compressed");
    }


    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final IInStream source) throws Parser.Failure, InterruptedException {
        final Document doc = new Document(
                location,
                mimeType,
                charset,
                this,
                null,

View Full Code Here


    public void createBookmark(final LoaderDispatcher loader, final DigestURI url, final String bmk_user, final boolean autotag, final String tagsString, final String foldersString) throws IOException, Failure, RowSpaceExceededException {


      final YMarkEntry bmk_entry = new YMarkEntry(false);
        final YMarkMetadata meta = new YMarkMetadata(url);
    final Document document = meta.loadDocument(loader);
    final EnumMap<YMarkMetadata.METADATA, String> metadata = meta.loadMetadata();
    final String urls = url.toNormalform(true, false);
    bmk_entry.put(YMarkEntry.BOOKMARK.URL.key(), urls);
    if(!this.worktables.has(YMarkTables.TABLES.BOOKMARKS.tablename(bmk_user), YMarkUtil.getBookmarkId(urls))) {
      bmk_entry.put(YMarkEntry.BOOKMARK.PUBLIC.key(), "false");

View Full Code Here

    
    public Document[] parse(final MultiProtocolURI location, final String mimeType,
            final String charset, final InputStream source1)
            throws Parser.Failure, InterruptedException {


        final Document[] docs = new Document[]{new Document(
                location,
                mimeType,
                charset,
                this,
                null,

View Full Code Here

            log.logWarning(ex.getMessage());
        } catch (IOException ex) {
            log.logWarning(ex.getMessage());
        }


        return new Document[]{new Document(
            location,
            mimeType,
            "UTF-8",
            this,
            null,

View Full Code Here

            final InputStream sourceStream) throws Parser.Failure, InterruptedException {


        try {
            // first get a document from the parsed html
            final ContentScraper scraper = parseToScraper(location, documentCharset, sourceStream);
            final Document document = transformScraper(location, mimeType, documentCharset, scraper);


            return new Document[]{document};
        } catch (final IOException e) {
      throw new Parser.Failure("IOException in htmlParser: " + e.getMessage(), location);
    }

View Full Code Here

        for (int i = 1; i <= 6; i++) {
            for (final String headline : scraper.getHeadlines(i)) {
                sections[p++] = headline;
            }
        }
        final Document ppd = new Document(
                location,
                mimeType,
                charSet,
                scraper,
                scraper.getContentLanguages(),
                scraper.getKeywords(),
                scraper.getTitle(),
                scraper.getAuthor(),
                scraper.getPublisher(),
                sections,
                scraper.getDescription(),
                scraper.getLon(), scraper.getLat(),
                scraper.getText(),
                scraper.getAnchors(),
                scraper.getRSS(),
                scraper.getImages(),
                scraper.indexingDenied());
        //scraper.close();
        ppd.setFavicon(scraper.getFavicon());


        return ppd;
    }

View Full Code Here

            if (title.length() == l) break;
            l = title.length();
        }


        Document[] docs;
        docs = new Document[]{new Document(
                  location,
                  mimeType,
                  "UTF-8",
                  this,
                  null,

View Full Code Here

            throw new Parser.Failure("Load error:" + e.getMessage(), url);
        }
        
        final List<Document> docs = new ArrayList<Document>();
        MultiProtocolURI uri;
        Document doc;
        for (final URLEntry item: sitemap) try {
            uri = new MultiProtocolURI(item.loc);
            doc = new Document(
                    uri,
                    TextParser.mimeOf(url),
                    charset,
                    this,
                    null,

View Full Code Here

0 1 2 3 4 5 6

TOP

Related Classes of net.yacy.document.Document

Bookmarks

de.anomic.data.ymark.YMarkAutoTagger

de.anomic.data.ymark.YMarkTables

de.anomic.search.DocumentIndex

de.anomic.search.MediaSnippet

de.anomic.search.Segment

de.anomic.search.Switchboard

de.anomic.search.TextSnippet

get_metadata

get_treeview

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.