Package org.dbpedia.spotlight.evaluation.external

Source Code of org.dbpedia.spotlight.evaluation.external.WikiMachineClient

/*
* *
*  * Copyright 2011 Pablo Mendes, Max Jakob
*  *
*  * Licensed under the Apache License, Version 2.0 (the "License");
*  * you may not use this file except in compliance with the License.
*  * You may obtain a copy of the License at
*  *
*  * http://www.apache.org/licenses/LICENSE-2.0
*  *
*  * Unless required by applicable law or agreed to in writing, software
*  * distributed under the License is distributed on an "AS IS" BASIS,
*  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  * See the License for the specific language governing permissions and
*  * limitations under the License.
*
*/

package org.dbpedia.spotlight.evaluation.external;

import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.PostMethod;

import net.htmlparser.jericho.Source;
import net.htmlparser.jericho.Element;
import org.dbpedia.spotlight.exceptions.AnnotationException;
import org.dbpedia.spotlight.model.DBpediaResource;
import org.dbpedia.spotlight.model.Text;

import java.util.List;

import java.io.*;
import java.util.ArrayList;

/**
* The External Clients were translated to Scala but this class was not.
* Because The WikiMachine service and website (http://thewikimachine.fbk.eu/) are unavailable.
* As result of that, this client is no more working.
*
* Last Tested: 08/27th/2013 by Alexandre Cançado Cardoso
*/

/**
* @author pablomendes (first incomplete version, final touches to Andres' code.)
* @author Andres Garcia-Silva (main implementation)
*/

public class WikiMachineClient extends AnnotationClient {

    String wikiPrefix = "http://en.wikipedia.org/wiki/";

    @Override
    public List<DBpediaResource> extract(Text text) throws AnnotationException {
        String response = process(text.text());
        //System.out.println(response);
        return parse(response);
    }

    protected String process(String text) throws AnnotationException {
        String url = "http://thewikimachine.fbk.eu/gui/basic";
        PostMethod method = new PostMethod(url);
        method.setRequestHeader("Content-type","application/x-www-form-urlencoded");

        NameValuePair[] params = {
                new NameValuePair("context",text),
                new NameValuePair("type","link"),
                new NameValuePair("si","10"),
        };
        method.setRequestBody(params);
        LOG.debug("Sending request to WikiMachine: "+params);

        String response = request(method);
        return response;
    }

    public List<DBpediaResource> parse(String html) throws AnnotationException {

        Source parser;
        String wikiUrl;
        String surfaceForm;
        List<DBpediaResource> entities = new ArrayList<DBpediaResource>();

        try {
            InputStream is = new ByteArrayInputStream(html.getBytes("UTF-8"));
            parser = new Source(is);
            parser.fullSequentialParse();
            parser.getElementById("div");
        } catch (IOException e) {
            throw new AnnotationException("Error reading output from WikiMachine ",e);
        }
        List<Element>KeywordElements=parser.getAllElementsByClass("keywords");

        if (KeywordElements!=null && !KeywordElements.isEmpty()){
            Element keywordElement= KeywordElements.get(0);
            for (Element linkElement : keywordElement.getAllElements()) {
                wikiUrl="";
                surfaceForm="";
                wikiUrl=linkElement.getAttributeValue("href");
                if (wikiUrl!=null)
                    if (wikiUrl.startsWith(wikiPrefix)) {
                        surfaceForm = linkElement.getContent().getTextExtractor().toString();
                        entities.add(new DBpediaResource(wikiUrl.replaceAll(wikiPrefix,"")));
                        //System.out.println(surfaceForm+" "+wikiUrl);
                    }
            }
        }
        LOG.trace(entities);
        return entities;
    }

    public static void main(String[] args) throws Exception {

        WikiMachineClient client = new WikiMachineClient();

        //File manualEvalOutput = new File("/home/pablo/eval/manual/systems/WikiMachine.list");
        //File manualEvalInput = new File("/home/pablo/eval/manual/AnnotationText.txt");
        //client.evaluate(manualEvalInput, manualEvalOutput);

        //File cucerzanEvalInput = new File("/home/pablo/eval/cucerzan/cucerzan.txt");
        //File cucerzanEvalOutput = new File("/home/pablo/eval/cucerzan/systems/WikiMachine.list");
//        client.evaluate(cucerzanEvalInput, cucerzanEvalOutput);

//        File wikifyEvalInput = new File("/home/pablo/eval/wikify/gold/WikifyAllInOne.txt");
//        File wikifyEvalOutput = new File("/home/pablo/eval/wikify/systems/WikiMachine.list");
//        client.evaluate(wikifyEvalInput, wikifyEvalOutput);

        File csawEvalInput = new File("/home/pablo/eval/csaw/gold/paragraphs.txt");
        File csawEvalOutput = new File("/home/pablo/eval/csaw/systems/WikiMachine.list");
        client.evaluate(csawEvalInput, csawEvalOutput);

    }

}
TOP

Related Classes of org.dbpedia.spotlight.evaluation.external.WikiMachineClient

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.