Package org.apache.uima.cas.test

Source Code of org.apache.uima.cas.test.CASTestSetup

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.uima.cas.test;

import junit.framework.TestCase;

import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.admin.CASAdminException;
import org.apache.uima.cas.admin.FSIndexComparator;
import org.apache.uima.cas.admin.FSIndexRepositoryMgr;
import org.apache.uima.cas.admin.LinearTypeOrderBuilder;
import org.apache.uima.cas.admin.TypeSystemMgr;

public class CASTestSetup  implements AnnotatorInitializer {

  // Type system constants.
  public static final String TOKEN_TYPE = "Token";

  public static final String TOKEN_TYPE_FEAT = "type";

  public static final String TOKEN_TYPE_FEAT_Q = TOKEN_TYPE + TypeSystem.FEATURE_SEPARATOR
          + TOKEN_TYPE_FEAT;

  public static final String TOKEN_TYPE_TYPE = "TokenType";

  public static final String WORD_TYPE = "Word";
 
  public static final String ARRAYFSWITHSUBTYPE_TYPE = "ArrayFsWithSubtype";
 
  public static final String ARRAYFSWITHSUBTYPE_TYPE_FEAT = "subArrayOfAnnot";
 
  public static final String ARRAYFSWITHSUBTYPE_TYPE_FEAT_Q = ARRAYFSWITHSUBTYPE_TYPE + TypeSystem.FEATURE_SEPARATOR
          + ARRAYFSWITHSUBTYPE_TYPE_FEAT;

  public static final String SEP_TYPE = "Separator";

  public static final String EOS_TYPE = "EndOfSentence";

  public static final String SENT_TYPE = "Sentence";

  // public static final String INT_ARRAY_SUB = "IntArraySub";
  public static final String INT_SUB_NAME = "intArrayName";

  public static final String LEMMA_FEAT = "lemma";

  public static final String LEMMA_FEAT_Q = TOKEN_TYPE + TypeSystem.FEATURE_SEPARATOR + LEMMA_FEAT;

  public static final String SENT_LEN_FEAT = "sentenceLength";

  public static final String SENT_LEN_FEAT_Q = SENT_TYPE + TypeSystem.FEATURE_SEPARATOR
          + SENT_LEN_FEAT;

  public static final String TOKEN_FLOAT_FEAT = "tokenFloatFeat";

  public static final String TOKEN_FLOAT_FEAT_Q = TOKEN_TYPE + TypeSystem.FEATURE_SEPARATOR
          + TOKEN_FLOAT_FEAT;

  public static final String LEMMA_LIST_FEAT = "lemmaList";

  public static final String LEMMA_LIST_FEAT_Q = TOKEN_TYPE + TypeSystem.FEATURE_SEPARATOR
          + LEMMA_LIST_FEAT;

  public static final String LANG_PAIR = "org.apache.lang.LanguagePair";

  public static final String LANG1 = "lang1";

  public static final String LANG2 = "lang2";

  public static final String DESCR_FEAT = "description";

  public static final String GROUP_1 = "org.apache.lang.Group1";

  public static final String GROUP_2 = "org.apache.lang.Group2";

  public static final String[] GROUP_1_LANGUAGES = { "Chinese", "Japanese", "Korean", "English",
      "French", "German", "Italian", "Spanish", "Portuguese" };

  public static final String[] GROUP_2_LANGUAGES = { "Arabic", "Czech", "Danish", "Dutch",
      "Finnish", "Greek", "Hebrew", "Hungarian", "Norwegian", "Polish", "Portuguese", "Russian",
      "Turkish" };

  // Index name constants.
  public static final String ANNOT_SET_INDEX = "Annotation Set Index";

  public static final String ANNOT_BAG_INDEX = "Annotation Bag Index";

  public static final String ANNOT_SORT_INDEX = "Annotation Sort Index";

  /**
   * Constructor for CASTestSetup.
   */
  public CASTestSetup() {
    super();
  }

  /**
   * @see org.apache.uima.cas.test.AnnotatorInitializer#initTypeSystem(TypeSystemMgr)
   */
 
  /* Types:
   * TOP
   *   Token  TOKEN_TYPE
   *     Word
   *     Separator
   *     EndOfSentence
   *   ArrayFSwithSubtype
   *   Annotation
   *     Sentence
   *  
   */
  public void initTypeSystem(TypeSystemMgr tsm) {
    // Add new types and features.
    Type topType = tsm.getTopType();
    Type annotType = tsm.getType(CAS.TYPE_NAME_ANNOTATION);
    // assert(annotType != null);
    tsm.addType(SENT_TYPE, annotType);
    Type tokenType = tsm.addType(TOKEN_TYPE, annotType);
    Type tokenTypeType = tsm.addType(TOKEN_TYPE_TYPE, topType);
    tsm.addType(WORD_TYPE, tokenTypeType);
    Type arrayFsWithSubtypeType = tsm.addType(ARRAYFSWITHSUBTYPE_TYPE, topType);
    Type arrayOfAnnot = tsm.getArrayType(annotType);
    tsm.addFeature(ARRAYFSWITHSUBTYPE_TYPE_FEAT, arrayFsWithSubtypeType, arrayOfAnnot);
    tsm.addType(SEP_TYPE, tokenTypeType);
    tsm.addType(EOS_TYPE, tokenTypeType);
    tsm.addFeature(TOKEN_TYPE_FEAT, tokenType, tokenTypeType);
    tsm.addFeature(TOKEN_FLOAT_FEAT, tokenType, tsm.getType(CAS.TYPE_NAME_FLOAT));
    // Add a type that inherits from IntArray.
    // tsm.addType(INT_ARRAY_SUB, tsm.getType(CAS.TYPE_NAME_INTEGER_ARRAY));
    // tsm.addFeature(
    // INT_SUB_NAME,
    // tsm.getType(INT_ARRAY_SUB),
    // tsm.getType(CAS.TYPE_NAME_STRING));
    tsm.addFeature(LEMMA_FEAT, tokenType, tsm.getType(CAS.TYPE_NAME_STRING));
    tsm.addFeature(SENT_LEN_FEAT, tsm.getType(SENT_TYPE), tsm.getType(CAS.TYPE_NAME_INTEGER));
    tsm.addFeature(LEMMA_LIST_FEAT, tsm.getType(TOKEN_TYPE), tsm
            .getType(CAS.TYPE_NAME_STRING_ARRAY));
    Type group1 = tsm.addStringSubtype(GROUP_1, GROUP_1_LANGUAGES);
    Type group2 = tsm.addStringSubtype(GROUP_2, GROUP_2_LANGUAGES);
    Type langPair = tsm.addType(LANG_PAIR, topType);
    tsm.addFeature(LANG1, langPair, group1);
    tsm.addFeature(LANG2, langPair, group2);
    Type stringType = tsm.getType(CAS.TYPE_NAME_STRING);
    tsm.addFeature(DESCR_FEAT, langPair, stringType);
    boolean exc = false;
    try {
      tsm.addType("some.new.Name", group1);
    } catch (CASAdminException e) {
      TestCase.assertTrue(e.getError() == CASAdminException.TYPE_IS_INH_FINAL);
      exc = true;
    }
    TestCase.assertTrue(exc);
    exc = false;
    try {
      tsm.addFeature("some.new.Name", group1, stringType);
    } catch (CASAdminException e) {
      TestCase.assertTrue(e.getError() == CASAdminException.TYPE_IS_FEATURE_FINAL);
      exc = true;
    }
    TestCase.assertTrue(exc);
  }

  public void initIndexes(FSIndexRepositoryMgr irm, TypeSystem ts) {
    FSIndexComparator comp = irm.createComparator();
    Type annotation = ts.getType(CAS.TYPE_NAME_ANNOTATION);
    comp.setType(annotation);
    comp.addKey(annotation.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_BEGIN),
            FSIndexComparator.STANDARD_COMPARE);
    comp.addKey(annotation.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_END),
            FSIndexComparator.REVERSE_STANDARD_COMPARE);
    LinearTypeOrderBuilder tob = irm.createTypeSortOrder();
    try {
      tob.add(new String[] { CAS.TYPE_NAME_ANNOTATION, SENT_TYPE, TOKEN_TYPE });
      comp.addKey(tob.getOrder(), FSIndexComparator.STANDARD_COMPARE);
    } catch (CASException e) {
      TestCase.assertTrue(false);
    }
    irm.createIndex(comp, ANNOT_BAG_INDEX, FSIndex.BAG_INDEX);
    irm.createIndex(comp, ANNOT_SET_INDEX, FSIndex.SET_INDEX);
    irm.createIndex(comp, ANNOT_SORT_INDEX, FSIndex.SORTED_INDEX);

  }
}
TOP

Related Classes of org.apache.uima.cas.test.CASTestSetup

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.