Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.PerFieldAnalyzerWrapper


    doc.add(createFieldLastUpdated(bookmark.getLastUpdated()));   
    return doc;
  }

  public static Analyzer createDocumentAnalyzer(){
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer());
    wrapper.addAnalyzer(DocumentCreator.FIELD_BOOKMARK_ID,new KeywordAnalyzer());
    wrapper.addAnalyzer(DocumentCreator.FIELD_URL_MD5,new KeywordAnalyzer());
    wrapper.addAnalyzer(DocumentCreator.FIELD_TAG, new KeywordAnalyzer());
    wrapper.addAnalyzer(DocumentCreator.FIELD_USER,new KeywordAnalyzer());
    return wrapper;
  }
View Full Code Here


   *
   * @return Returns the <code>Analyzer</code> that is used for all
   * analyzation of Fields and Queries.
   */
  Analyzer createAnalyzer() {
    PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new DefaultAnalyzer());

    /* Standard (Lowercase, Letter, Stop,...) */
    StandardAnalyzer stdAnalyzer = new StandardAnalyzer();
    analyzer.addAnalyzer(String.valueOf(INews.TITLE), stdAnalyzer);
    analyzer.addAnalyzer(String.valueOf(INews.DESCRIPTION), stdAnalyzer);
    analyzer.addAnalyzer(String.valueOf(INews.ATTACHMENTS_CONTENT), stdAnalyzer);

    /* Simple (Lowercase, Whitespace Tokzenizer) */
    LowercaseWhitespaceAnalyzer simpleAnalyzer = new LowercaseWhitespaceAnalyzer();
    analyzer.addAnalyzer(String.valueOf(INews.AUTHOR), simpleAnalyzer);
    analyzer.addAnalyzer(String.valueOf(INews.LABEL), simpleAnalyzer);

    /* Simple (Lowercase, Delim Tokenizer) */
    analyzer.addAnalyzer(String.valueOf(INews.CATEGORIES), new LowercaseDelimiterAnalyzer('\n'));

    return analyzer;
  }
View Full Code Here

   *
   * @return Returns the <code>Analyzer</code> that is used for all analyzation
   * of Fields and Queries.
   */
  public static Analyzer createAnalyzer() {
    PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new DefaultAnalyzer());

    /* Standard (Lowercase, Letter, Stop,...) */
    StandardAnalyzer stdAnalyzer;
    if (DISABLE_STOP_WORDS)
      stdAnalyzer = new StandardAnalyzer(Collections.EMPTY_SET);
    else
      stdAnalyzer = new StandardAnalyzer();

    analyzer.addAnalyzer(String.valueOf(INews.TITLE), stdAnalyzer);
    analyzer.addAnalyzer(String.valueOf(INews.DESCRIPTION), stdAnalyzer);
    analyzer.addAnalyzer(String.valueOf(INews.ATTACHMENTS_CONTENT), stdAnalyzer);

    /* Simple (Lowercase, Whitespace Tokzenizer) */
    LowercaseWhitespaceAnalyzer simpleAnalyzer = new LowercaseWhitespaceAnalyzer();
    analyzer.addAnalyzer(String.valueOf(INews.AUTHOR), simpleAnalyzer);
    analyzer.addAnalyzer(String.valueOf(INews.LABEL), simpleAnalyzer);

    /* Simple (Lowercase, Delim Tokenizer) */
    analyzer.addAnalyzer(String.valueOf(INews.CATEGORIES), new LowercaseDelimiterAnalyzer('\n'));

    return analyzer;
  }
View Full Code Here

                                   Analyzer franceAnalyzer,
                                   Analyzer swedenAnalyzer,
                                   Analyzer denmarkAnalyzer,
                                   String usResult) throws Exception {
    RAMDirectory indexStore = new RAMDirectory();
    PerFieldAnalyzerWrapper analyzer
      = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer());
    analyzer.addAnalyzer("US", usAnalyzer);
    analyzer.addAnalyzer("France", franceAnalyzer);
    analyzer.addAnalyzer("Sweden", swedenAnalyzer);
    analyzer.addAnalyzer("Denmark", denmarkAnalyzer);
    IndexWriter writer = new IndexWriter
      (indexStore, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

    // document data:
    // the tracer field is used to determine which document was hit
View Full Code Here

                                   String usResult,
                                   String frResult,
                                   String svResult,
                                   String dkResult) throws Exception {
    RAMDirectory indexStore = new RAMDirectory();
    PerFieldAnalyzerWrapper analyzer
      = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
    analyzer.addAnalyzer("US", usAnalyzer);
    analyzer.addAnalyzer("France", franceAnalyzer);
    analyzer.addAnalyzer("Sweden", swedenAnalyzer);
    analyzer.addAnalyzer("Denmark", denmarkAnalyzer);
    IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(
        TEST_VERSION_CURRENT, analyzer));

    // document data:
    // the tracer field is used to determine which document was hit
View Full Code Here

   */
  public static Analyzer createAnalyzer(final GenericConfiguration config) {
    // Caching the analyzer instances is not possible as those do not implement Serializable
    // TODO: cache the config (imho caching should be implemented in the config itself)

    PerFieldAnalyzerWrapper analyzerWrapper = new PerFieldAnalyzerWrapper(createDefaultAnalyzer(config));
    configuredAnalyzerMap.clear();

    //Load analyzer config
    GenericConfiguration analyzerConfig = loadAnalyzerConfig(config);
    if (analyzerConfig != null) {
      ArrayList<String> addedReverseAttributes = new ArrayList<String>();
      List<String> reverseAttributes = getReverseAttributes(config);
      Map<String, GenericConfiguration> subconfigs = analyzerConfig.getSortedSubconfigs();
      if (subconfigs != null) {
        for (Map.Entry<String, GenericConfiguration> entry : subconfigs.entrySet()) {
          GenericConfiguration analyzerconfig = entry.getValue();
          String fieldname = analyzerconfig.getString(FIELD_NAME_KEY);
          String analyzerclass = analyzerconfig.getString(ANALYZER_CLASS_KEY);

          Analyzer analyzerInstance = createAnalyzer(analyzerclass, analyzerconfig);
          analyzerWrapper.addAnalyzer(fieldname, analyzerInstance);
          configuredAnalyzerMap.put(fieldname, analyzerInstance.getClass().getCanonicalName());

          //ADD REVERSE ANALYZERS
          if (reverseAttributes != null && reverseAttributes.contains(fieldname)) {
            addedReverseAttributes.add(fieldname);

            ReverseAnalyzer reverseAnalyzer = new ReverseAnalyzer(analyzerInstance);
            analyzerWrapper.addAnalyzer(fieldname + REVERSE_ATTRIBUTE_SUFFIX, reverseAnalyzer);

            configuredAnalyzerMap.put(fieldname + REVERSE_ATTRIBUTE_SUFFIX, reverseAnalyzer.getClass()
                .getCanonicalName());
          }
        }
      }
      //ADD ALL NON CONFIGURED REVERSE ANALYZERS
      if (reverseAttributes != null && reverseAttributes.size() > 0) {
        for (String att : reverseAttributes) {
          if (!addedReverseAttributes.contains(att)) {
            ReverseAnalyzer reverseAnalyzer = new ReverseAnalyzer(null);
            analyzerWrapper.addAnalyzer(att + REVERSE_ATTRIBUTE_SUFFIX, reverseAnalyzer);
            configuredAnalyzerMap.put(att + REVERSE_ATTRIBUTE_SUFFIX, reverseAnalyzer.getClass()
                .getCanonicalName());
          }
        }
      }
View Full Code Here

  * only the fields to be tokenized.
  */
  private Analyzer createAnalyzer() {
        final String[] stopwords = commonConfig.getStringArray("stopwords");
        StandardAnalyzer stdAnalyzer = new LatinStandardAnalyzer(stopwords);
    PerFieldAnalyzerWrapper retval = new PerFieldAnalyzerWrapper(stdAnalyzer);
    String[] nonTokenizedField = searcherConfig.getStringArray("QueryParser.nonTokenizedFields");
        String[] synonymFields = searcherConfig.getStringArray("QueryParser.synonymFields");
        String synonymFile = searcherConfig.getString("QueryParser.synonymFile");

      KeywordAnalyzer keywordAnalyzer = new KeywordAnalyzer();
    for (String field : nonTokenizedField) {
        logger.info("createAnalyzer: adding field to be skipped during tokenization: " + field);
      retval.addAnalyzer(field, keywordAnalyzer);
    }
 
        if (synonymFields.length > 0 ) {
            SynonymAnalyzer synonymAnalyzer = new SynonymAnalyzer(stdAnalyzer,synonymFile);
            for (String field : synonymFields) {
                logger.info("createAnalyzer: adding field to be expanded with synonyms: " + field);
                retval.addAnalyzer(field, synonymAnalyzer);
            }
        }
        return retval;

    }
View Full Code Here

                                   String usResult,
                                   String frResult,
                                   String svResult,
                                   String dkResult) throws Exception {
    RAMDirectory indexStore = new RAMDirectory();
    PerFieldAnalyzerWrapper analyzer
      = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
    analyzer.addAnalyzer("US", usAnalyzer);
    analyzer.addAnalyzer("France", franceAnalyzer);
    analyzer.addAnalyzer("Sweden", swedenAnalyzer);
    analyzer.addAnalyzer("Denmark", denmarkAnalyzer);
    IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(
        TEST_VERSION_CURRENT, analyzer));

    // document data:
    // the tracer field is used to determine which document was hit
View Full Code Here

    };

    static PerFieldAnalyzerWrapper analyzerForField (String fieldName, Analyzer defaultAnalyzer)
    {
        if (defaultAnalyzer == null) defaultAnalyzer = new StandardAnalyzer();
        PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer);
        analyzer.addAnalyzer(fieldName, new SourceCodeAnalyzer());
        return analyzer;
    }
View Full Code Here

    private void buildPerFieldAnalyzerWrapper(Analyzer anazlyer, String field) {
        if (anazlyer == null || field == null || field.length() == 0)
            return;
        if (this.perFieldAnalyzer == null)
            this.perFieldAnalyzer = new PerFieldAnalyzerWrapper(
                    this.serviceAnalyzer);
        this.perFieldAnalyzer.addAnalyzer(field, anazlyer);
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.PerFieldAnalyzerWrapper

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.