Package cosc561.searchengine.ejb

Examples of cosc561.searchengine.ejb.SearchEngineTimerService


    @PersistenceContext(unitName = "SearchEnginePU")
    private EntityManager entityManager;

    public void runUrlScanner()
    {
        Url url = entityManager.createNamedQuery("Url.findByStatus", Url.class).setParameter("status", UrlStatus.NEW).setMaxResults(1).getSingleResult();
        Document document = null;

        url.setStatus(UrlStatus.SCANNING);
        entityManager.flush();

        // Test url if valid
        try
        {
            document = Jsoup.connect(url.getId()).get();
        }
        catch (IOException ex)
        {
            logger.log(Level.SEVERE, null, ex);
            url.setStatus(UrlStatus.ERROR);
            return;
        }

        // Parse and add new urls
        Elements links = document.select("a[href]");
        Set<Url> urls = new HashSet<>();

        System.out.println("URL: " + url);
        System.out.println("Links: " + links.size());
        for (Element link : links)
        {
            System.out.println("\t" + link.attr("abs:href") + " (" + link.text() + ")");
            urls.add(new Url(link.attr("abs:href"), UrlStatus.NEW));
        }

        for (Url currentUrl : urls)
        {
            try
            {
                entityManager.persist(currentUrl);
            }
            catch(Exception e)
            {
                logger.log(Level.SEVERE, "Exception: " + url + " already exists", e);
            }
        }

        url.setDocument(document.html());
        url.setStatus(UrlStatus.SCANNED);
        url.setScannedOn(new Date());
    }
View Full Code Here

TOP

Related Classes of cosc561.searchengine.ejb.SearchEngineTimerService

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.