Package org.apache.ctakes.dictionary.lookup2.dictionary.RareWordTermMapCreator

Examples of org.apache.ctakes.dictionary.lookup2.dictionary.RareWordTermMapCreator.CuiTuiTerm


      } catch ( IOException ioE ) {
         throw new AnnotatorContextException( "Could not parse " + descriptorFile.getPath(), new Object[0], ioE );
      }
      final Map<String, RareWordDictionary> dictionaries
            = parseDictionaries( uimaContext, doc.getRootElement().getChild( DICTIONARIES_KEY ) );
      final TermConsumer consumer = parseConsumerXml( uimaContext,
                                                              doc.getRootElement().getChild( CONSUMER_KEY ) );
      return new DictionarySpec( dictionaries.values(), consumer );
   }
View Full Code Here


      final List<RelatedCui> relatedCuis = new ArrayList<RelatedCui>();
      try {
         initMetaDataStatement( cui );
         final ResultSet resultSet = _metadataStatement.executeQuery();
         while ( resultSet.next() ) {
            final RelatedCui relatedCui = new RelatedCui( resultSet.getString( FIELD_INDEX.CUI.__index),
                                                          resultSet.getString( FIELD_INDEX.RELATION_TYPE.__index ) );
            relatedCuis.add( relatedCui );
         }
         // Though the ResultSet interface documentation states that there are automatic closures,
         // it is up to the driver to implement this behavior ...  historically some drivers have not done so
View Full Code Here

      try {
         final BufferedReader reader = new BufferedReader( new FileReader( bsvFile ) );
         String line = reader.readLine();
         while ( line != null ) {
            final String[] columns = LookupUtil.fastSplit( line, '|' );
            final CuiTuiTerm cuiTuiTerm = createCuiTuiTerm( columns, entityId );
            if ( cuiTuiTerm != null ) {
               // Add to the dictionary
               cuiTuiTerms.add( cuiTuiTerm );
            } else {
               LOGGER.warn( "Bad BSV line " + line + " in " + bsvFile.getPath() );
View Full Code Here

         return null;
      }
      final String cui = columns[ cuiIndex ].trim();
      final String tui = (tuiIndex < 0 || columns[tuiIndex].trim().isEmpty()) ? entityId : columns[ tuiIndex ].trim();
      final String term = columns[ termIndex ].trim().toLowerCase();
      return new CuiTuiTerm( cui, tui, term );
   }
View Full Code Here

         Collection<RareWordTerm> rareWordTerms = rareWordTermMap.get( rareWord );
         if ( rareWordTerms == null ) {
            rareWordTerms = new ArrayList<RareWordTerm>();
            rareWordTermMap.put( rareWord, rareWordTerms );
         }
         rareWordTerms.add( new RareWordTerm( cuiTuiTerm.getTerm(), cuiTuiTerm.__cui, cuiTuiTerm.__tui,
                                              rareWord, wordIndex, tokenCount ) );
      }
      return rareWordTermMap;
   }
View Full Code Here

      final List<RareWordTerm> rareWordTerms = new ArrayList<RareWordTerm>();
      try {
         initMetaDataStatement( rareWordText );
         final ResultSet resultSet = _metadataStatement.executeQuery();
         while ( resultSet.next() ) {
            final RareWordTerm rareWordTerm = new RareWordTerm( resultSet.getString( FIELD_INDEX.TEXT.__index),
                                                                resultSet.getString( FIELD_INDEX.CUI.__index ),
                                                                resultSet.getString( FIELD_INDEX.TUI.__index ),
                                                                resultSet.getString( FIELD_INDEX.RWORD.__index ),
                                                                resultSet.getInt( FIELD_INDEX.RINDEX.__index ),
                                                                resultSet.getInt( FIELD_INDEX.TCOUNT.__index ) );
View Full Code Here

            continue;
         }
         for ( RareWordTerm rareWordHit : rareWordHits ) {
            if ( rareWordHit.getTokenCount() == 1 ) {
               // Single word term, add and move on
               termsFromDictionary.add( new SpannedRareWordTerm( rareWordHit, lookupToken.getTextSpan() ) );
               continue;
            }
            final int termStartIndex = lookupTokenIndex - rareWordHit.getRareWordIndex();
            if ( termStartIndex < 0 || termStartIndex + rareWordHit.getTokenCount() > allTokens.size() ) {
               // term will extend beyond window
               continue;
            }
            final int termEndIndex = termStartIndex + rareWordHit.getTokenCount() - 1;
            if ( TokenMatchUtil.isTermMatch( rareWordHit, allTokens, termStartIndex, termEndIndex ) ) {
               final int spanStart = allTokens.get( termStartIndex ).getStart();
               final int spanEnd = allTokens.get( termEndIndex ).getEnd();
               termsFromDictionary.add( new SpannedRareWordTerm( rareWordHit, spanStart, spanEnd ) );
            }
         }
      }
   }
View Full Code Here

            continue;
         }
         for ( RareWordTerm rareWordHit : rareWordHits ) {
            if ( rareWordHit.getTokenCount() == 1 ) {
               // Single word term, add and move on
               termsFromDictionary.add( new SpannedRareWordTerm( rareWordHit, lookupToken.getTextSpan() ) );
               continue;
            }
            final int termStartIndex = lookupTokenIndex - rareWordHit.getRareWordIndex();
            if ( termStartIndex < 0 || termStartIndex + rareWordHit.getTokenCount() > allTokens.size() ) {
               // term will extend beyond window
               continue;
            }
            final SpannedRareWordTerm overlapTerm = getOverlapTerm( allTokens, lookupTokenIndex, rareWordHit,
                                                                    _consecutiveSkipMax, _totalSkipMax );
            if ( overlapTerm != null ) {
               termsFromDictionary.add( overlapTerm );
            }
         }
View Full Code Here

         if ( lastWordIndex == -1 ) {
            return null;
         }
      }
      if ( missingSpanKeys.isEmpty() ) {
         return new SpannedRareWordTerm( rareWordHit,
                                         allTokens.get( firstWordIndex ).getStart(),
                                         allTokens.get( lastWordIndex ).getEnd() );
      }
      final TextSpan discontiguousSpanKey = new MultiTextSpan( allTokens.get( firstWordIndex ).getStart(),
                                                                     allTokens.get( lastWordIndex ).getEnd(),
                                                                     missingSpanKeys );
      return new SpannedRareWordTerm( rareWordHit, discontiguousSpanKey );
   }
View Full Code Here

    * @param rareWordTerm contains a term from a {@link org.apache.ctakes.dictionary.lookup2.dictionary.RareWordDictionary}
    * @param startOffset the start index of the term
    * @param endOffset the end index of the term
    */
   public SpannedRareWordTerm( final RareWordTerm rareWordTerm, final int startOffset, final int endOffset ) {
      this( rareWordTerm, new DefaultTextSpan( startOffset, endOffset ) );
   }
View Full Code Here

TOP

Related Classes of org.apache.ctakes.dictionary.lookup2.dictionary.RareWordTermMapCreator.CuiTuiTerm

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.