Examples of LookupToken

org.apache.ctakes.dictionary.lookup.vo.LookupToken
This object represents the SMALLEST span of text that could potentially be a lookup hit. In some cases this may be a single word or it can be a group of words such as a noun phrase. @author Mayo Clinic

Examples of org.apache.ctakes.dictionary.lookup.vo.LookupToken

               || annotation instanceof SymbolToken;
         if ( isNonLookup ) {
            continue;
         }
         final BaseToken bta = (BaseToken) annotation;
         final LookupToken lt = new LookupAnnotationToJCasAdapter( bta );
         // POS exclusion logic for first word lookup
         if ( isTagExcluded( bta.getPartOfSpeech() ) ) {
            lt.addStringAttribute( FirstTokenPermutationImpl.LT_KEY_USE_FOR_LOOKUP, FALSE_STRING );
         } else {
            lt.addStringAttribute( FirstTokenPermutationImpl.LT_KEY_USE_FOR_LOOKUP, TRUE_STRING );
         }
         if ( bta instanceof WordToken ) {
            final WordToken wta = (WordToken) bta;
            final String canonicalForm = wta.getCanonicalForm();
            if ( canonicalForm != null ) {
               lt.addStringAttribute( CANONICAL_VARIANT_ATTR, canonicalForm );
            }
         }
         ltList.add( lt );
      }
      return ltList.iterator();

View Full Code Here

Examples of org.apache.ctakes.dictionary.lookup.vo.LookupToken

               || bta instanceof ContractionToken
               || bta instanceof SymbolToken;
         if ( isNonLookup ) {
            continue;
         }
         final LookupToken lt = new LookupAnnotationToJCasAdapter( bta );
         // POS exclusion logic for first word lookup
         if ( isTagExcluded( bta.getPartOfSpeech() ) ) {
            lt.addStringAttribute( FirstTokenPermutationImpl.LT_KEY_USE_FOR_LOOKUP, FALSE_STRING );
         } else {
            lt.addStringAttribute( FirstTokenPermutationImpl.LT_KEY_USE_FOR_LOOKUP, TRUE_STRING );
         }
         if ( bta instanceof WordToken ) {
            final WordToken wta = (WordToken) bta;
            final String canonicalForm = wta.getCanonicalForm();
            if ( canonicalForm != null ) {
               lt.addStringAttribute( CANONICAL_VARIANT_ATTR, canonicalForm );
            }
         }
         ltList.add( lt );
      }
      return ltList;

View Full Code Here

Examples of org.apache.ctakes.dictionary.lookup.vo.LookupToken

               || annotation instanceof SymbolToken;
         if ( isNonLookup ) {
            continue;
         }
         final BaseToken bta = (BaseToken) annotation;
         final LookupToken lt = new LookupAnnotationToJCasAdapter( bta );
         // POS exclusion logic for first word lookup
         if ( isTagExcluded( bta.getPartOfSpeech() ) ) {
            lt.addStringAttribute( FirstTokenPermutationImpl.LT_KEY_USE_FOR_LOOKUP, FALSE_STRING );
         } else {
            lt.addStringAttribute( FirstTokenPermutationImpl.LT_KEY_USE_FOR_LOOKUP, TRUE_STRING );
         }
         if ( bta instanceof WordToken ) {
            final WordToken wta = (WordToken) bta;
            final String canonicalForm = wta.getCanonicalForm();
            if ( canonicalForm != null ) {
               lt.addStringAttribute( CANONICAL_VARIANT_ATTR, canonicalForm );
            }
         }
         ltList.add( lt );
      }
      return ltList.iterator();

View Full Code Here

Examples of org.apache.ctakes.dictionary.lookup.vo.LookupToken

               || bta instanceof ContractionToken
               || bta instanceof SymbolToken;
         if ( isNonLookup ) {
            continue;
         }
         final LookupToken lt = new LookupAnnotationToJCasAdapter( bta );
         // POS exclusion logic for first word lookup
         if ( isTagExcluded( bta.getPartOfSpeech() ) ) {
            lt.addStringAttribute( FirstTokenPermutationImpl.LT_KEY_USE_FOR_LOOKUP, FALSE_STRING );
         } else {
            lt.addStringAttribute( FirstTokenPermutationImpl.LT_KEY_USE_FOR_LOOKUP, TRUE_STRING );
         }
         if ( bta instanceof WordToken ) {
            final WordToken wta = (WordToken) bta;
            final String canonicalForm = wta.getCanonicalForm();
            if ( canonicalForm != null ) {
               lt.addStringAttribute( CANONICAL_VARIANT_ATTR, canonicalForm );
            }
         }
         ltList.add( lt );
      }
      return ltList;

View Full Code Here

Examples of org.apache.ctakes.dictionary.lookup.vo.LookupToken

                                        final Map<String,List<LookupAnnotation>> contextMap) throws Exception
    {
        List<LookupHit> lhList = new ArrayList<>();
        for (int tokenIdx = 0; tokenIdx < lookupTokenList.size(); tokenIdx++)
        {
            LookupToken lt = lookupTokenList.get(tokenIdx);


            List<LookupToken> singleLtList = new ArrayList<>();
            singleLtList.add(lt);


            String[] strArr = iv_phrBuilder.getPhrases(singleLtList);
            Collection<MetaDataHit> mdhCol = getHits(strArr);


            if ((mdhCol != null) && (mdhCol.size() > 0))
            {
                Iterator<MetaDataHit> mdhMatchItr = mdhCol.iterator();
                while (mdhMatchItr.hasNext())
                {
                    MetaDataHit mdh = mdhMatchItr.next();
                    LookupHit lh = new LookupHit(mdh, lt.getStartOffset(), lt
                            .getEndOffset());
                    lhList.add(lh);
                }
            }
        }

View Full Code Here

Examples of org.apache.ctakes.dictionary.lookup.vo.LookupToken


   public String[] getPhrases( final List<LookupToken> lookupTokenList ) {
      final Set<String> phraseSet = new HashSet<>();
      for ( TextExtractor extractor : iv_textExtractorList ) {
         final StringBuilder sb = new StringBuilder();
         LookupToken previousLt = null;
         for ( LookupToken lt : lookupTokenList ) {
            String variant = extractor.getText( lt );
            if ( variant == null ) {
               variant = lt.getText();
            }
            if ( previousLt != null && previousLt.getEndOffset() != lt.getStartOffset() ) {
               // check delta between previous token and current token
               // this delta represents whitespace between tokens
               // insert whitespace
               sb.append( ' ' );
            }

View Full Code Here

Examples of org.apache.ctakes.dictionary.lookup.vo.LookupToken

      // map of all the token end indices as keys and the tokens with those indices as values
      final Map<Integer, List<LookupToken>> ltEndOffsetMap = getMultipleEndOffsetMap( lookupTokenList );


      final List<LookupHit> lookupHits = new ArrayList<>();
      for ( int currentIndex = 0; currentIndex < lookupTokenList.size(); currentIndex++ ) {
         final LookupToken lookupToken = lookupTokenList.get( currentIndex );
         final String useForLookupString = lookupToken.getStringAttribute( LT_KEY_USE_FOR_LOOKUP );
         final boolean useForLookup = Boolean.valueOf( useForLookupString );
         if ( !useForLookup ) {
            continue;
         }
         final Collection<MetaDataHit> firstTokenHits = getFirstTokenHits( lookupToken );
         if ( firstTokenHits == null || firstTokenHits.isEmpty() ) {
            continue;
         }
         int wEndOffset = -1;
         if ( useWindowAnnots ) {
            // get the largest overlapping window annotation
            final LookupAnnotation windowAnnotation = getLargestWindowAnnotation( currentIndex, lookupToken,
                                                                                 ltStartOffsetMap, ltEndOffsetMap,
                                                                                 ltListIndexMap,
                                                                                 wStartOffsetMap, wEndOffsetMap );
            if ( windowAnnotation != null ) {
               wEndOffset = windowAnnotation.getEndOffset();
            }
         }
         if ( wEndOffset == -1 ) {
            iv_logger.debug( "Window size set to max perm level." );
            wEndOffset = getFixedWindowEndOffset( currentIndex, lookupToken, lookupTokenList );
         }
         final List<LookupToken> endLookupTokenList = getLookupTokenList( wEndOffset, ltEndOffsetMap, false );
         if ( endLookupTokenList.isEmpty() ) {
            iv_logger.debug( "Invalid window:" + currentIndex + "," + wEndOffset );
            continue;
         }
         final LookupToken endLookupToken = endLookupTokenList.get( endLookupTokenList.size() - 1 );
         final int startTokenIndex = currentIndex;
         final int endTokenIndex = ltListIndexMap.get( endLookupToken );
         // list of LookupToken objects bound by the window
         final List<LookupToken> wLookupTokenList = lookupTokenList.subList( startTokenIndex, endTokenIndex + 1 );
         // use permutation algorithm to find any hits inside the window

View Full Code Here

Examples of org.apache.ctakes.dictionary.lookup.vo.LookupToken

         return Collections.emptyList();
      }
      final Map<String,Set<MetaDataHit>> namedMetaDataHits = getNamedMetaDataHits( firstTokenHits );


      final List<LookupHit> lookupHits = new ArrayList<>();
      final LookupToken firstWordLookupToken = wLookupTokenList.get( firstTokenIndex );
      final int firstWordStartOffset = firstWordLookupToken.getStartOffset();
      final int firstWordEndOffset = firstWordLookupToken.getEndOffset();
      final List<LookupToken> singleTokenList = Arrays.asList( firstWordLookupToken );
      final String[] firstWordPhrases = iv_phrBuilder.getPhrases( singleTokenList );
      for ( int i=0; i<firstWordPhrases.length; i++ ) {
         // perform toLowerCase() here instead of in the iterations below 2-21-13 spf
         firstWordPhrases[i] = firstWordPhrases[i].toLowerCase();
      }
      int permutationIndex = wLookupTokenList.size();
      if ( firstTokenIndex < wLookupTokenList.size() && permutationIndex > 0 ) {
         permutationIndex--;
      }
      final List<List<Integer>> permutationList = iv_permCacheMap.get( permutationIndex );
      for ( List<Integer> permutations : permutationList ) {
         // Moved sort and offset calculation from inner (per MetaDataHit) iteration 2-21-2013 spf
         Collections.sort( permutations );
         int startOffset = firstWordStartOffset;
         int endOffset = firstWordEndOffset;
         if ( !permutations.isEmpty() ) {
            int firstIdx = permutations.get( 0 );
            if ( firstIdx <= firstTokenIndex ) {
               firstIdx--;
            }
            final LookupToken firstToken = wLookupTokenList.get( firstIdx );
            if ( firstToken.getStartOffset() < firstWordStartOffset ) {
               startOffset = firstToken.getStartOffset();
            }
            int lastIdx = permutations.get( permutations.size() - 1 );
            if ( lastIdx <= firstTokenIndex ) {
               lastIdx--;
            }
            final LookupToken lastToken = wLookupTokenList.get( lastIdx );
            if ( lastToken.getEndOffset() > firstWordEndOffset ) {
               endOffset = lastToken.getEndOffset();
            }
         }
         // convert permutation idx back into LookupTokens
         final List<LookupToken> tempLookupTokens = new ArrayList<>();
         for ( Integer idx : permutations ) {
            if ( idx <= firstTokenIndex ) {
               idx--;
            }
            final LookupToken lookupToken = wLookupTokenList.get( idx );
            tempLookupTokens.add( lookupToken );
         }
         final String[] lookupTokenPhrases = iv_phrBuilder.getPhrases( tempLookupTokens );
         for ( String lookupTokenPhrase : lookupTokenPhrases ) {
            // perform toLowerCase() here instead of repeating in each inner loop

View Full Code Here

Examples of org.apache.ctakes.dictionary.lookup.vo.LookupToken


      if ( startLookupTokenList.isEmpty() || endLookupTokenList.isEmpty() ) {
         iv_logger.debug( "Invalid window:" + startOffset + "," + endOffset );
         return -1;
      }
      final LookupToken startLookupToken = startLookupTokenList.get( 0 );
      final Integer startIdx = ltListIndexMap.get( startLookupToken );


      final LookupToken endLookupToken = endLookupTokenList.get( endLookupTokenList.size() - 1 );
      final Integer endIdx = ltListIndexMap.get( endLookupToken );


      return endIdx - startIdx + 1;
   }

View Full Code Here

Examples of org.apache.ctakes.dictionary.lookup.vo.LookupToken

      final int count = Math.min( tokenIdx + iv_maxPermutationLevel, ltList.size() );
      if ( count <= 0 ) {
         return 0;
      }
      for ( int i = count - 1; i >= 0; i-- ) {
         final LookupToken tempLookupToken = ltList.get( i );
         if ( tempLookupToken != null ) {
            return tempLookupToken.getEndOffset();
         }
      }
      return 0;
   }

View Full Code Here

0 1 2 3

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.