Examples of org.dbpedia.spotlight.model.Text

org.dbpedia.spotlight.model.Text

        String keamodelfile = "/data/spotlight/3.7/kea/keaModel-1-3-1";
    KeaSpotter ks = new KeaSpotter(keamodelfile);


    String intext = "The last few decades have seen fundamental changes in food consumption patterns around the world. These changes were characterized not only by an increase in overall calorie intakes but also by a shift in the composition of the diet towards more meat, eggs, dairy products as well as more fats and oils, i.e. a shift towards high calorie diets that are also much richer in saturated fats and cholesterol. The main drivers of this transition include factors such as: (i) rapidly falling real prices for food; (ii) urbanization with the development of new marketing channels and the spread of supermarkets into developing countries; (iii) and freer trade and globalization with the emergence of large, trans-nationally operating food companies. This diet transition also brought about a rapid increase in the prevalence of overweight, obesity and related non-communicable diseases (NCDs). Initially, these problems were limited to developed countries, but more recently, there are growing concerns that the adverse effects of a rapid nutrition transition could even be more severe in developing countries. The growing health concerns have also given rise to a intense debate about possible remedies to stop and reverse the obesity epidemic in developed countries, and, perhaps even more importantly, to prevent similar developments in developing countries. Some of these policy options are being examined in this paper. The instruments analysed include price interventions, both at the level of primary commodities and final consumer goods (tax on fat food), direct incentives to reduce and disincentives to maintain an excess body weight; finally the paper also presents some experience gathered with a combination of various measures in integrated nutrition programmes. ";


    List<SurfaceFormOccurrence> sflst = ks.extract(new Text(intext));
    System.out.println("\n\nPrinting SurfaceOccurrences");
    for (SurfaceFormOccurrence so: sflst) {
      System.out.println(so.surfaceForm().name() + " : offset " + so.textOffset());
    }
  }

View Full Code Here

    //remove special chars from input text, and keep a list of positions of them n a list.
    //start/end offsets need to be adjusted after extracting spots from cleaned text.
    String orgText = text.text();
    List<Integer> chars2removeLst = OpenNLPUtil.chars2remove(orgText);
    String cleanText = OpenNLPUtil.cleanText(orgText, chars2removeLst);
    Text cleanTextStr = new Text(cleanText);
    //extracting NounPhrase nGrams
    List<SurfaceFormOccurrence> npNgrams = extractNPNGrams(cleanTextStr);
    /*
    System.out.println("\n\nAll NGrams of sentence:");
    System.out.println(intext + "\n");

View Full Code Here


    @GET
    @Produces( MediaType.APPLICATION_JSON )
    public Response getJSON(@DefaultValue(SpotlightConfiguration.DEFAULT_TEXT) @QueryParam("text") String text) {
        TopicalClassifier classifier = TopicalServer.getClassifier();
        Text textObj = new Text(text);
        Tuple2<Topic,Object>[] result = classifier.getPredictions(textObj);


        return ok(TopicalOutputSerializer.topicTagsAsJson(textObj, result).toString());
    }

View Full Code Here

    public Response postJSON(
            @DefaultValue(SpotlightConfiguration.DEFAULT_TEXT) @FormParam("text") String text,
            @Context HttpServletRequest request
    ) {
        TopicalClassifier classifier = TopicalServer.getClassifier();
        Text textObj = new Text(text);
        Tuple2<Topic,Object>[] result = classifier.getPredictions(textObj);


        return ok(TopicalOutputSerializer.topicTagsAsJson(textObj, result).toString());
    }

View Full Code Here


    @GET
    @Produces( MediaType.APPLICATION_XML )
    public Response getXML(@DefaultValue(SpotlightConfiguration.DEFAULT_TEXT) @QueryParam("text") String text) {
        TopicalClassifier classifier = TopicalServer.getClassifier();
        Text textObj = new Text(text);
        Tuple2<Topic,Object>[] result = classifier.getPredictions(textObj);


        return ok(TopicalOutputSerializer.topicTagsAsXml(textObj, result).toString());
    }

View Full Code Here

    public Response postXML(
            @DefaultValue(SpotlightConfiguration.DEFAULT_TEXT) @FormParam("text") String text,
            @Context HttpServletRequest request
    ) {
        TopicalClassifier classifier = TopicalServer.getClassifier();
        Text textObj = new Text(text);
        Tuple2<Topic,Object>[] result = classifier.getPredictions(textObj);


        return ok(TopicalOutputSerializer.topicTagsAsXml(textObj, result).toString());
    }

View Full Code Here

 */
public class ParseSurfaceFormText {


    public static List<SurfaceFormOccurrence> parse(String textWithMarkedSurfaceForms) {
        List<SurfaceFormOccurrence> sfOccs = new ArrayList<SurfaceFormOccurrence>();
        Text unMarkedUpText = new Text(textWithMarkedSurfaceForms.replaceAll("(\\[\\[|\\]\\])", ""));
        int i = 0;
        while (i < textWithMarkedSurfaceForms.length()) {
            int start = textWithMarkedSurfaceForms.indexOf("[[", i) + 2;
            if (start == 1)
                break;

View Full Code Here

                if (i<restartFrom) continue;


                List<DBpediaResource> entities = new ArrayList<DBpediaResource>();
                try {
                    final long startTime = System.nanoTime();
                    entities = extract(new Text(snippet.replaceAll("\\s+"," ")));
                    final long endTime = System.nanoTime();
                    sum += endTime - startTime;
                    LOG.info(String.format("(%s) Extraction ran in %s ns.", i, endTime - startTime));
                    correct++;
                } catch (AnnotationException e) {

View Full Code Here

     * @throws SpottingException
     */
  public List<SurfaceFormOccurrence> extract(Text text) throws SpottingException {
        LOG.debug(String.format("Spotting with spotter %s and selector %s.",spotter.getName(),spotSelector));


    Text textObject = buildText(text);


    List<SurfaceFormOccurrence> spottedSurfaceForms = spotter.extract(textObject);


    if(spotSelector != null) {
      List<SurfaceFormOccurrence> selectedSpots = spotSelector.select(spottedSurfaceForms);

View Full Code Here

    public long getPromiscuity(IndexReader reader, SurfaceForm sf) {
        long p = 0;
        Query q;
        try {
            // Use query parser to get analyzed terms
            q = mLuceneManager.getQuery(new Text(sf.name()));
            Set<Term> terms = new HashSet<Term>();
            q.extractTerms(terms);
            LOG.info(String.format("Terms: %s",terms));
            // Now get how many documents contain all of those terms
            p = cardinality(reader, terms);

View Full Code Here

0 1 2

TOP

Related Classes of org.dbpedia.spotlight.model.Text

org.dbpedia.spotlight.evaluation.external.AnnotationClient

org.dbpedia.spotlight.evaluation.external.WikiMachineClientTest

org.dbpedia.spotlight.evaluation.SpotterMemoryEvaluator

org.dbpedia.spotlight.evaluation.SpotterPerformanceEvaluator

org.dbpedia.spotlight.lucene.similarity.TermCache

org.dbpedia.spotlight.spot.KeaSpotter

org.dbpedia.spotlight.spot.OpenNLPNGramSpotter

org.dbpedia.spotlight.spot.SpotterWithSelector

org.dbpedia.spotlight.string.ParseSurfaceFormText

org.dbpedia.spotlight.web.rest.related.Extract

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.