Examples of org.apache.stanbol.entityhub.servicesapi.model.Text

Package org.apache.stanbol.entityhub.servicesapi.model

Examples of org.apache.stanbol.entityhub.servicesapi.model.Text

org.apache.stanbol.entityhub.servicesapi.model.Text
Defines a natural language text in a given language.
The text MUST NOT be null nor an empty {@link String}. The language may be null (indicating the default language) or any kind of value. It is recommended to use ISO 639-1 codes (two Letter codes). By definition it is also allowed to use empty strings as language. However implementations of this interface are free to convert the empty language to null.
Implementations of that interface MUST BE immutable @author Rupert Westenthaler

                JSONObject jResult = new JSONObject();
                jResult.put("id", r.getId());
                double similarity = 0.0;
                String name = null; //the name returned for the entity
                for(Iterator<Text> labels = r.getText(NAME_FIELD);labels.hasNext();){
                    Text label = labels.next();
                    if(label.getText().equalsIgnoreCase(rQuery.getQuery())){
                        name = label.getText();
                        similarity = 1.0;
                        break;
                    }
                    double curSimilarity = Utils.levenshtein(rQuery.getQuery(), label.getText());
                    if(similarity < curSimilarity){
                        name = label.getText();
                        similarity = curSimilarity;
                    }
                }
                //set the selected name
                jResult.put("name", name);

View Full Code Here

            List<URL> wikipediaLinks = new ArrayList<URL>();
            List<Text> shortNames = new ArrayList<Text>();
            List<Text> colloquialNames = new ArrayList<Text>();
            for(FeatureName name : alternateNames){
                if(name.isNaturalLanguageLabel()){
                    Text act = vf.createText(name.getName(),name.getLang());
                    if(name.isPreferred()){
                        officialList.add(act);
                    } else {
                        altList.add(act);
                    }

View Full Code Here

        //now test, that the Plain Literals are available as natural language
        //tests via the Representation Interface!
        //1) one without a language
        Iterator<Text> noLangaugeTexts = rep.get(field, (String)null);
        assertTrue(noLangaugeTexts.hasNext());
        Text noLanguageText = noLangaugeTexts.next();
        assertEquals(noLangLiteral.getLabel(), noLanguageText.getText());
        assertNull(noLanguageText.getLanguage());
        assertFalse(noLangaugeTexts.hasNext()); //only a single result
        //2) one with a language
        Iterator<Text> enLangaugeTexts = rep.get(field, "en");
        assertTrue(enLangaugeTexts.hasNext());
        Text enLangageText = enLangaugeTexts.next();
        assertEquals(enLiteral.getLabel(), enLangageText.getText());
        assertEquals(enLiteral.getLanguage().toString(), enLangageText.getLanguage());
        assertFalse(enLangaugeTexts.hasNext());//only a single result
        //3) test to get all natural language values
        Set<String> stringValues = new HashSet<String>();
        for(Literal plainLiteral : plainLiterals){
            stringValues.add(plainLiteral.getLabel());

View Full Code Here

        for(Iterator<String> fields = rep.getFieldNames();fields.hasNext();){
            String field = fields.next();
            Iterator<Text> values = rep.getText(field);
//            assertTrue(values.hasNext());
            while(values.hasNext()){
                Text text = values.next();
                assertNotNull(text);
                String lang = text.getLanguage();
                //log.info(text.getText()+" | "+text.getLanguage()+" | "+text.getText().endsWith("@"+lang));
                //this texts that the text does not contain the @{lang} as added by
                //the toString method of the RDF Literal java class
                assertFalse("Labels MUST NOT end with the Language! value="+text.getText(),
                    text.getText().endsWith("@"+lang));
            }
        }
    }

View Full Code Here


    @Override
    public Representation process(Representation rep) {
        Iterator<Text> aliases = rep.getText(FB_ALIAS);
        while(aliases.hasNext()){
            Text alias = aliases.next();
            if(StringUtils.isAllUpperCase(alias.getText())){
                rep.add(FB_NAME, alias);
                rep.add(RDFS_LABEL,alias);
            }
        }
        return rep;

View Full Code Here

    public Representation process(Representation rep) {
        //wikipedia
        if(dbpediaState){
            //we try to link only a single page. So get the English label and
            //search for the according dbpedia key 
            Text enLabel = rep.getFirst(RDFS_LABEL, "en");
            String mainKey = enLabel != null ? decodeKey(enLabel.getText()).replace(' ', '_') : null;
            Iterator<Text> wpEnKeys = rep.getText(WP_EN);
            Collection<String> keys = new ArrayList<String>();
            boolean foundMain = false;
            if(wpEnKeys.hasNext()){ //link to the English dbpedia
                while(!foundMain & wpEnKeys.hasNext()){

View Full Code Here

            //now the EntityAnnotations for the Suggestions
            for(Suggestion suggestion : linkedEntity.getSuggestions()){
                UriRef entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
                //should we use the label used for the match, or search the
                //representation for the best label ... currently its the matched one
                Text label = suggestion.getBestLabel(linkerConfig.getNameField(),language);
                metadata.add(new TripleImpl(entityAnnotation, 
                    Properties.ENHANCER_ENTITY_LABEL, 
                    label.getLanguage() == null ?
                            new PlainLiteralImpl(label.getText()) :
                                new PlainLiteralImpl(label.getText(),
                                    new Language(label.getLanguage()))));
                metadata.add(new TripleImpl(entityAnnotation, 
                    Properties.ENHANCER_ENTITY_REFERENCE, 
                    new UriRef(suggestion.getRepresentation().getId())));
                Iterator<Reference> suggestionTypes = suggestion.getRepresentation().getReferences(linkerConfig.getTypeField());
                while(suggestionTypes.hasNext()){

View Full Code Here

            if (maxScore == null) {
                maxScore = score;
            }
            Iterator<Text> labels = rep.getText(nameField);
            while (labels.hasNext() && match.getLevenshtein() < 1.0) {
                Text label = labels.next();
                if (language == null || // if the content language is unknown ->
                                        // accept all labels
                    label.getLanguage() == null || // accept labels with no
                                                   // language
                    // and labels in the same language as the content
                    (language != null && label.getLanguage().startsWith(language))) {
                    double actMatch = levenshtein(
                        casesensitive ? label.getText() : label.getText().toLowerCase(), namedEntityLabel);
                    if (actMatch > match.getLevenshtein()) {
                        match.setLevenshtein(actMatch);
                        match.setMatchedLabel(label);
                    }
                }

View Full Code Here

        rep.addNaturalText(field, strTextNoLang, (String) null);
        assertTrue(asCollection(rep.getFieldNames()).contains(field));
        rep.removeNaturalText(field, strTextNoLang, (String) null);
        assertFalse(asCollection(rep.getFieldNames()).contains(field));


        Text text = vf.createText("Das ist ein Text zum testen des Text Objektes", "de");
        rep.add(field, text);
        assertTrue(asCollection(rep.getFieldNames()).contains(field));
        rep.remove(field, text);
        assertFalse(asCollection(rep.getFieldNames()).contains(field));

View Full Code Here

        // test conversion of String[] with language as second element
        String[] textWithLang = new String[] {"Test text with language", "en"};
        rep.add(field, textWithLang);
        Iterator<Text> refs = rep.get(field, (String[]) null);
        assertTrue(refs.hasNext());
        Text test = refs.next();
        assertEquals(textWithLang[1], test.getLanguage());
        assertEquals(textWithLang[0], test.getText());
        assertFalse(refs.hasNext());
        // test multiple adds do not generate duplicate References
        rep.add(field, textWithLang);
        assertTrue(asCollection(rep.get(field)).size() == 1);
        // test adding a equivalent reference
        rep.add(field, vf.createText(textWithLang[0], textWithLang[1]));
        assertTrue(asCollection(rep.get(field)).size() == 1);
        // test removing
        rep.remove(field, textWithLang);
        assertFalse(rep.get(field).hasNext());


        // test conversion of String[] with only one element (default language)
        String[] textWithoutLang = new String[] {"Test text without language"};
        rep.add(field, textWithoutLang);
        refs = rep.get(field, (String[]) null);
        assertTrue(refs.hasNext());
        test = refs.next();
        assertNull(test.getLanguage());
        assertEquals(textWithoutLang[0], test.getText());
        assertFalse(refs.hasNext());
        // test multiple adds do not generate duplicate References
        rep.add(field, textWithoutLang);
        assertTrue(asCollection(rep.get(field)).size() == 1);
        // test adding a equivalent reference
        rep.add(field, vf.createText(textWithoutLang[0]));
        assertTrue(asCollection(rep.get(field)).size() == 1);
        // test removing
        rep.remove(field, textWithoutLang);
        assertFalse(rep.get(field).hasNext());


        // test conversion of String[] with null as second element (default language)
        String[] textWithDefaultLang = new String[] {"Test text with default language", null};
        rep.add(field, textWithDefaultLang);
        refs = rep.get(field, (String[]) null);
        assertTrue(refs.hasNext());
        test = refs.next();
        assertNull(test.getLanguage());
        assertEquals(textWithDefaultLang[0], test.getText());
        assertFalse(refs.hasNext());
        // test multiple adds do not generate duplicate References
        rep.add(field, textWithDefaultLang);
        assertTrue(asCollection(rep.get(field)).size() == 1);
        // test adding a equivalent reference
        rep.add(field, vf.createText(textWithDefaultLang[0], null));
        assertTrue(asCollection(rep.get(field)).size() == 1);
        // test removing
        rep.remove(field, textWithDefaultLang);
        assertFalse(rep.get(field).hasNext());


        // finally test if additional Elements are correctly ignored
        String[] ignoreAdditionalElements = new String[] {"Test if additional elements are ignored", "en",
                                                          "ignored1", "ignored2", null, "ignored4"};
        String[] sameText = new String[] {"Test if additional elements are ignored", "en"};
        rep.add(field, ignoreAdditionalElements);
        refs = rep.get(field, (String[]) null);
        assertTrue(refs.hasNext());
        test = refs.next();
        assertEquals(ignoreAdditionalElements[1], test.getLanguage());
        assertEquals(ignoreAdditionalElements[0], test.getText());
        assertFalse(refs.hasNext());
        // test multiple adds do not generate duplicate References
        rep.add(field, ignoreAdditionalElements);
        assertTrue(asCollection(rep.get(field)).size() == 1);
        // test if an Array with only the first two elements generate the same Text

View Full Code Here

0 1 2 3 4 5 6

TOP

Related Classes of org.apache.stanbol.entityhub.servicesapi.model.Text

org.apache.stanbol.enhancer.engine.topic.TopicClassificationEngine

org.apache.stanbol.enhancer.engines.entitytagging.impl.EnhancementRDFUtils

org.apache.stanbol.enhancer.engines.entitytagging.impl.NamedEntityTaggingEngine

org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine

org.apache.stanbol.enhancer.engines.keywordextraction.impl.EntityLinker

org.apache.stanbol.enhancer.engines.keywordextraction.impl.Suggestion

org.apache.stanbol.enhancer.engines.keywordextraction.impl.TestSearcherImpl

org.apache.stanbol.enhancer.engines.keywordextraction.linking.EntityLinker

org.apache.stanbol.enhancer.engines.keywordextraction.linking.Suggestion

org.apache.stanbol.enhancer.engines.taxonomy.impl.EnhancementRDFUtils

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.