Examples of ReutersCorpusDocument


Examples of org.mediameter.cliff.test.reuters.ReutersCorpusDocument

        @Override
        public FileVisitResult visitFile(Path aFile, BasicFileAttributes aAttrs)
                throws IOException {
            logger.info("--------------------------------------------------------------------------------");
            if( aFile.getFileName().toString().endsWith(".xml") ) {
                ReutersCorpusDocument doc;
                try {
                   
                    doc = ReutersCorpusDocument.fromFile(aFile.toString(),substitutions);
                    if(doc.hasCodedCountries()){
                        ExtractedEntities entities =  ParseManager.extractAndResolve(doc.getCompiledText());
                       
                        logger.info("Checking file "+aFile);
                        articlesWithLocations++;
                        List<GeoName> countriesTheyCoded = new ArrayList<GeoName>();
                        for(CountryCode countryCode:doc.getCountryCodeObjects()){
                            countriesTheyCoded.add( CountryGeoNameLookup.lookup(countryCode.name()) );
                        }
                        logger.info(doc.getId()+": "+countriesTheyCoded);
                        List<GeoName> ourMentionedCountries = entities.getUniqueCountryGeoNames();

                        // check to make sure we found all the countries they coded
                        if(ourMentionedCountries.size()>0){
                            boolean allMatched = true;
                            for(GeoName countryTheyCoded:countriesTheyCoded){
                                if(!ourMentionedCountries.contains(countryTheyCoded)){
                                    allMatched = false;
                                }
                            }
                            if(allMatched){
                                mentionsArticlesWeGotRight++;
                            } else {
                                logger.warn(doc.getId()+": mentions "+ourMentionedCountries+" they coded "+countriesTheyCoded);
                            }
                        }

                        //also have a measure for making sure the main "about" country is included in their list of countries
                        FocusStrategy focus = ParseManager.getFocusStrategy();
                        List<FocusLocation> ourAboutnessCountries = focus.selectCountries(entities.getResolvedLocations());
                        List<GeoName> ourAboutnessGeoNames = new ArrayList<GeoName>();
                        for(FocusLocation aboutLocation: ourAboutnessCountries){
                            ourAboutnessGeoNames.add(aboutLocation.getGeoName());
                        }
                        if(ourAboutnessGeoNames.size()>0){
                            boolean allMatched = true;
                            for(GeoName focusGeoName:ourAboutnessGeoNames){
                                if(!countriesTheyCoded.contains(focusGeoName)){
                                    allMatched = false;
                                }
                            }
                            if(allMatched){
                                focusArticlesWeGotRight++;
                            } else {
                                logger.warn(doc.getId()+": about "+ourAboutnessGeoNames+" they found "+countriesTheyCoded);
                            }
                        }
                       
                    }
                } catch (Exception e) {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.