Package org.apache.ctakes.dictionary.lookup2.dictionary.RareWordTermMapCreator

Examples of org.apache.ctakes.dictionary.lookup2.dictionary.RareWordTermMapCreator.CuiTuiTerm


   final private TextSpan _textSpan;
   final private String _text;
   private String _variant;

   public FastLookupToken( final Annotation jcasAnnotation ) {
      _textSpan = new DefaultTextSpan( jcasAnnotation.getBegin(), jcasAnnotation.getEnd() );
      _text = jcasAnnotation.getCoveredText().toLowerCase();
      if ( jcasAnnotation instanceof WordToken ) {
         final String canonicalForm = ((WordToken)jcasAnnotation).getCanonicalForm();
         // If canonical is not null AND not the same as the plain text then it is a valid variant for lookup
         if ( canonicalForm != null && !canonicalForm.equals( _text ) ) {
View Full Code Here


      if ( missingSpanKeys.isEmpty() ) {
         return new SpannedRareWordTerm( rareWordHit,
                                         allTokens.get( firstWordIndex ).getStart(),
                                         allTokens.get( lastWordIndex ).getEnd() );
      }
      final TextSpan discontiguousSpanKey = new MultiTextSpan( allTokens.get( firstWordIndex ).getStart(),
                                                                     allTokens.get( lastWordIndex ).getEnd(),
                                                                     missingSpanKeys );
      return new SpannedRareWordTerm( rareWordHit, discontiguousSpanKey );
   }
View Full Code Here

      if ( missingSpanKeys.isEmpty() ) {
         return new SpannedRareWordTerm( rareWordHit,
                                         allTokens.get( firstWordIndex ).getStart(),
                                         allTokens.get( lastWordIndex ).getEnd() );
      }
      final TextSpan discontiguousSpanKey = new MultiTextSpan( allTokens.get( firstWordIndex ).getStart(),
                                                                     allTokens.get( lastWordIndex ).getEnd(),
                                                                     missingSpanKeys );
      return new SpannedRareWordTerm( rareWordHit, discontiguousSpanKey );
   }
View Full Code Here

         final Map<TextSpan, Collection<RareWordTerm>> lookupHitMap ) {
      final Collection<TextSpan> discardSpans = new HashSet<TextSpan>();
      final List<TextSpan> textSpans = new ArrayList<TextSpan>( lookupHitMap.keySet() );
      final int count = textSpans.size();
      for ( int i=0; i<count; i++ ) {
         final TextSpan spanKeyI = textSpans.get( i );
         for ( int j=i+1; j<count; j++ ) {
            final TextSpan spanKeyJ = textSpans.get( j );
            if ( (spanKeyJ.getStart() <= spanKeyI.getStart() && spanKeyJ.getEnd() > spanKeyI.getEnd())
                  || (spanKeyJ.getStart() < spanKeyI.getStart() && spanKeyJ.getEnd() >= spanKeyI.getEnd()) ) {
               // J contains I, discard less precise concepts for span I and move on to next span I
               if ( spanKeyJ instanceof MultiTextSpan ) {
                  boolean spanIok = false;
                  for ( TextSpan missingSpanKey : ((MultiTextSpan)spanKeyJ).getMissingSpans() ) {
                     if ( (missingSpanKey.getStart() >= spanKeyI.getStart() && missingSpanKey.getStart() < spanKeyI.getEnd())
                           || (missingSpanKey.getEnd() > spanKeyI.getStart() && missingSpanKey.getEnd() <= spanKeyI.getEnd()) ) {
                        // I overlaps a missing span, so it is actually ok
                        spanIok = true;
                        break;
                     }
                  }
                  if ( !spanIok ) {
                     discardSpans.add( spanKeyI );
                     break;
                  }
               } else {
                  discardSpans.add( spanKeyI );
                  break;
               }
            }
            if ( ( (spanKeyI.getStart() <= spanKeyJ.getStart() && spanKeyI.getEnd() > spanKeyJ.getEnd() )
                  || (spanKeyI.getStart() < spanKeyJ.getStart() && spanKeyI.getEnd() >= spanKeyJ.getEnd()) ) ) {
               // I contains J, discard less precise concepts for span J and move on to next span J
               if ( spanKeyI instanceof MultiTextSpan ) {
                  boolean spanJok = false;
                  for ( TextSpan missingSpanKey : ((MultiTextSpan)spanKeyI).getMissingSpans() ) {
                     if ( (missingSpanKey.getStart() >= spanKeyJ.getStart() && missingSpanKey.getStart() < spanKeyJ.getEnd())
                           || (missingSpanKey.getEnd() > spanKeyJ.getStart() && missingSpanKey.getEnd() <= spanKeyJ.getEnd()) ) {
                        // J overlaps a missing span, so it is actually ok
                        spanJok = true;
                        break;
                     }
                  }
View Full Code Here

      }
      final Map<String, RareWordDictionary> dictionaries
            = parseDictionaries( uimaContext, doc.getRootElement().getChild( DICTIONARIES_KEY ) );
      final TermConsumer consumer = parseConsumerXml( uimaContext,
                                                              doc.getRootElement().getChild( CONSUMER_KEY ) );
      return new DictionarySpec( dictionaries.values(), consumer );
   }
View Full Code Here

            final String partOfSpeech = baseToken.getPartOfSpeech();
            if ( partOfSpeech == null || !_exclusionPartsOfSpeech.contains( partOfSpeech ) ) {
               lookupTokenIndices.add( allTokens.size() );
            }
         }
         final FastLookupToken lookupToken = new FastLookupToken( baseToken );
         allTokens.add( lookupToken );
      }
   }
View Full Code Here

   public void findTerms( final RareWordDictionary dictionary,
                           final List<FastLookupToken> allTokens, final List<Integer> lookupTokenIndices,
                           final Collection<SpannedRareWordTerm> termsFromDictionary ) {
      Collection<RareWordTerm> rareWordHits;
      for ( Integer lookupTokenIndex : lookupTokenIndices ) {
         final FastLookupToken lookupToken = allTokens.get( lookupTokenIndex );
         rareWordHits = dictionary.getRareWordHits( lookupToken );
         if ( rareWordHits == null || rareWordHits.isEmpty() ) {
            continue;
         }
         for ( RareWordTerm rareWordHit : rareWordHits ) {
            if ( rareWordHit.getTokenCount() == 1 ) {
               // Single word term, add and move on
               termsFromDictionary.add( new SpannedRareWordTerm( rareWordHit, lookupToken.getTextSpan() ) );
               continue;
            }
            final int termStartIndex = lookupTokenIndex - rareWordHit.getRareWordIndex();
            if ( termStartIndex < 0 || termStartIndex + rareWordHit.getTokenCount() > allTokens.size() ) {
               // term will extend beyond window
View Full Code Here

   public void findTerms( final RareWordDictionary dictionary,
                          final List<FastLookupToken> allTokens, final List<Integer> lookupTokenIndices,
                          final Collection<SpannedRareWordTerm> termsFromDictionary ) {
      Collection<RareWordTerm> rareWordHits;
      for ( Integer lookupTokenIndex : lookupTokenIndices ) {
         final FastLookupToken lookupToken = allTokens.get( lookupTokenIndex );
         rareWordHits = dictionary.getRareWordHits( lookupToken );
         if ( rareWordHits == null || rareWordHits.isEmpty() ) {
            continue;
         }
         for ( RareWordTerm rareWordHit : rareWordHits ) {
            if ( rareWordHit.getTokenCount() == 1 ) {
               // Single word term, add and move on
               termsFromDictionary.add( new SpannedRareWordTerm( rareWordHit, lookupToken.getTextSpan() ) );
               continue;
            }
            final int termStartIndex = lookupTokenIndex - rareWordHit.getRareWordIndex();
            if ( termStartIndex < 0 || termStartIndex + rareWordHit.getTokenCount() > allTokens.size() ) {
               // term will extend beyond window
View Full Code Here

TOP

Related Classes of org.apache.ctakes.dictionary.lookup2.dictionary.RareWordTermMapCreator.CuiTuiTerm

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.