Package net.yacy.document.parser.html

Examples of net.yacy.document.parser.html.ImageEntry


                i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0));
                dark = (i % 2 == 0);

                final Map<MultiProtocolURI, ImageEntry> ts = document.getImages();
                final Iterator<ImageEntry> tsi = ts.values().iterator();
                ImageEntry entry;
                while (tsi.hasNext()) {
                    entry = tsi.next();
                    prop.put("viewMode_links_" + i + "_nr", i);
                    prop.put("viewMode_links_" + i + "_dark", dark ? "1" : "0");
                    prop.put("viewMode_links_" + i + "_type", "image");
                    prop.put("viewMode_links_" + i + "_text", (entry.alt().isEmpty()) ? "&nbsp;" : markup(wordArray, entry.alt()));
                    prop.put("viewMode_links_" + i + "_url", entry.url().toNormalform(false, true));
                    prop.put("viewMode_links_" + i + "_link", markup(wordArray, entry.url().toNormalform(false, true)));
                    if (entry.width() > 0 && entry.height() > 0)
                        prop.put("viewMode_links_" + i + "_attr", entry.width() + "x" + entry.height() + " Pixel");
                    else
                        prop.put("viewMode_links_" + i + "_attr", "unknown");
                    dark = !dark;
                    i++;
                }
View Full Code Here


        final SortedSet<ImageEntry> images = new TreeSet<ImageEntry>();
        images.addAll(document.getImages().values()); // iterates images in descending size order!
        // a measurement for the size of the images can be retrieved using the htmlFilterImageEntry.hashCode()

        final Iterator<ImageEntry> i = images.iterator();
        ImageEntry ientry;
        DigestURI url;
        String desc;
        final List<MediaSnippet> result = new ArrayList<MediaSnippet>();
        while (i.hasNext()) {
            ientry = i.next();
            url = new DigestURI(ientry.url());
            final String u = url.toString();
            if (u.indexOf(".ico") >= 0 || u.indexOf("favicon") >= 0) continue;
            if (ientry.height() > 0 && ientry.height() < 32) continue;
            if (ientry.width() > 0 && ientry.width() < 32) continue;
            desc = ientry.alt();
            final int appcount = queryhashes.size()  * 2 -
                           removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() -
                           removeAppearanceHashes(desc, queryhashes).size();
            final long ranking = Long.MAX_VALUE - (ientry.height() + 1) * (ientry.width() + 1) * (appcount + 1);
            result.add(new MediaSnippet(ContentDomain.IMAGE, url, MimeTable.url2mime(url), desc, ientry.fileSize(), ientry.width(), ientry.height(), ranking, source));
        }
        return result;
    }
View Full Code Here

                insertTextToWords(entry.getValue(), 99, flag_cat_hasapp, RESULT_FLAGS, true, meaningLib);
            }

            // images
            final Iterator<ImageEntry> j = document.getImages().values().iterator();
            ImageEntry ientry;
            MultiProtocolURI url;
            while (j.hasNext()) {
                ientry = j.next();
                url = ientry.url();
                if (url == null) continue;
                insertTextToWords(url.toNormalform(false, false), 99, flag_cat_hasimage, RESULT_FLAGS, false, meaningLib);
                insertTextToWords(ientry.alt(), 99, flag_cat_hasimage, RESULT_FLAGS, true, meaningLib);
            }
       
            // finally check all words for missing flag entry
            final Iterator<Map.Entry<String, Word>> k = words.entrySet().iterator();
            Word wprop;
View Full Code Here

                            ext = u.substring(extpos + 1).toLowerCase();
                        }
                        if (Classification.isMediaExtension(ext)) {
                            // this is not a normal anchor, its a media link
                            if (Classification.isImageExtension(ext)) {
                                ContentScraper.addImage(collectedImages, new ImageEntry(url, name, -1, -1, -1));
                            }
                            else if (Classification.isAudioExtension(ext)) this.audiolinks.put(url, name);
                            else if (Classification.isVideoExtension(ext)) this.videolinks.put(url, name);
                            else if (Classification.isApplicationExtension(ext)) this.applinks.put(url, name);
                        }
View Full Code Here

        final HashSet<String> languages = new HashSet<String>();
        final HashMap<MultiProtocolURI, Properties> anchors = new HashMap<MultiProtocolURI, Properties>();
        final HashMap<MultiProtocolURI, ImageEntry> images  = new HashMap<MultiProtocolURI, ImageEntry>();
        // add this image to the map of images
        String infoString = ii.info.toString();
        images.put(ii.location, new ImageEntry(location, "", ii.width, ii.height, -1));
       
        if (title == null || title.length() == 0) title = MultiProtocolURI.unescape(location.getFileName());
       
        return new Document[]{new Document(
             location,
View Full Code Here

                i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0), document.getAnchors());
                dark = (i % 2 == 0);

                final Map<MultiProtocolURI, ImageEntry> ts = document.getImages();
                final Iterator<ImageEntry> tsi = ts.values().iterator();
                ImageEntry entry;
                while (tsi.hasNext()) {
                    entry = tsi.next();
                    prop.put("viewMode_links_" + i + "_nr", i);
                    prop.put("viewMode_links_" + i + "_dark", dark ? "1" : "0");
                    prop.put("viewMode_links_" + i + "_type", "image");
                    prop.put("viewMode_links_" + i + "_text", (entry.alt().isEmpty()) ? "&nbsp;" : markup(wordArray, entry.alt()));
                    prop.put("viewMode_links_" + i + "_url", entry.url().toNormalform(false, true));
                    prop.put("viewMode_links_" + i + "_link", markup(wordArray, entry.url().toNormalform(false, true)));
                    if (entry.width() > 0 && entry.height() > 0) {
                        prop.put("viewMode_links_" + i + "_rel", entry.width() + "x" + entry.height() + " Pixel");
                    } else {
                        prop.put("viewMode_links_" + i + "_rel", "");
                    }
                    prop.put("viewMode_links_" + i + "_name", "");
                    dark = !dark;
View Full Code Here

        final HashSet<String> languages = new HashSet<String>();
        final HashMap<MultiProtocolURI, Properties> anchors = new HashMap<MultiProtocolURI, Properties>();
        final HashMap<MultiProtocolURI, ImageEntry> images  = new HashMap<MultiProtocolURI, ImageEntry>();
        // add this image to the map of images
        final String infoString = ii.info.toString();
        images.put(ii.location, new ImageEntry(location, "", ii.width, ii.height, -1));

        if (title == null || title.length() == 0) title = MultiProtocolURI.unescape(location.getFileName());

        return new Document[]{new Document(
             location,
View Full Code Here

                            ext = u.substring(extpos + 1).toLowerCase();
                        }
                        if (Classification.isMediaExtension(ext)) {
                            // this is not a normal anchor, its a media link
                            if (Classification.isImageExtension(ext)) {
                                ContentScraper.addImage(collectedImages, new ImageEntry(url, name, -1, -1, -1));
                            }
                            else if (Classification.isAudioExtension(ext)) this.audiolinks.put(url, name);
                            else if (Classification.isVideoExtension(ext)) this.videolinks.put(url, name);
                            else if (Classification.isApplicationExtension(ext)) this.applinks.put(url, name);
                        }
View Full Code Here

                insertTextToWords(entry.getValue(), 99, flag_cat_hasapp, this.RESULT_FLAGS, true, meaningLib);
            }

            // images
            final Iterator<ImageEntry> j = document.getImages().values().iterator();
            ImageEntry ientry;
            MultiProtocolURI url;
            while (j.hasNext()) {
                ientry = j.next();
                url = ientry.url();
                if (url == null) continue;
                insertTextToWords(url.toNormalform(false, false), 99, flag_cat_hasimage, this.RESULT_FLAGS, false, meaningLib);
                insertTextToWords(ientry.alt(), 99, flag_cat_hasimage, this.RESULT_FLAGS, true, meaningLib);
            }

            // finally check all words for missing flag entry
            final Iterator<Map.Entry<String, Word>> k = this.words.entrySet().iterator();
            Word wprop;
View Full Code Here

        final SortedSet<ImageEntry> images = new TreeSet<ImageEntry>();
        images.addAll(document.getImages().values()); // iterates images in descending size order!
        // a measurement for the size of the images can be retrieved using the htmlFilterImageEntry.hashCode()

        final Iterator<ImageEntry> i = images.iterator();
        ImageEntry ientry;
        DigestURI url;
        String desc;
        final List<MediaSnippet> result = new ArrayList<MediaSnippet>();
        while (i.hasNext()) {
            ientry = i.next();
            url = new DigestURI(ientry.url());
            final String u = url.toString();
            if (u.indexOf(".ico",0) >= 0 || u.indexOf("favicon",0) >= 0) continue;
            if (ientry.height() > 0 && ientry.height() < 32) continue;
            if (ientry.width() > 0 && ientry.width() < 32) continue;
            desc = ientry.alt();
            final int appcount = queryhashes.size()  * 2 -
                           removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() -
                           removeAppearanceHashes(desc, queryhashes).size();
            final long ranking = Long.MAX_VALUE - (ientry.height() + 1) * (ientry.width() + 1) * (appcount + 1);
            result.add(new MediaSnippet(ContentDomain.IMAGE, url, Classification.url2mime(url), desc, ientry.fileSize(), ientry.width(), ientry.height(), ranking, source));
        }
        return result;
    }
View Full Code Here

TOP

Related Classes of net.yacy.document.parser.html.ImageEntry

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.