Package org.apache.incubator.uima.regex

Examples of org.apache.incubator.uima.regex.ConceptSetDocument


   public Concept[] parseConceptFile(String conceptFilePathName,
         InputStream conceptFileStream) throws ResourceInitializationException {
      ArrayList<Concept> conceptList = new ArrayList<Concept>();

      // parse regex concept file and extract content to local objects
      ConceptSetDocument conceptSetDoc;
      try {
         conceptSetDoc = ConceptSetDocument.Factory.parse(conceptFileStream);
      } catch (Exception ex) {
         throw new RegexAnnotatorConfigException(
               "regex_annotator_error_parsing_rule_set_file",
               new Object[] { conceptFilePathName }, ex);
      }

      // validate input file
      ArrayList<XmlError> validationErrors = new ArrayList<XmlError>();
      XmlOptions validationOptions = new XmlOptions();
      validationOptions.setErrorListener(validationErrors);

      boolean isValid = conceptSetDoc.validate(validationOptions);

      // output the errors if the XML is invalid.
      if (!isValid) {
         Iterator<XmlError> iter = validationErrors.iterator();
         StringBuffer errorMessages = new StringBuffer();
         while (iter.hasNext()) {
            errorMessages.append("\n>> ");
            errorMessages.append(iter.next());
         }
         throw new RegexAnnotatorConfigException(
               "regex_annotator_error_xml_validation", new Object[] {
                     conceptFilePathName, errorMessages.toString() });
      }

      // get concept file regex variables and store them all to the variables
      // object
      VariablesDocument.Variables variablesDoc = conceptSetDoc.getConceptSet()
            .getVariables();
      RegexVariables variables = null;
      if (variablesDoc != null) {
         VariableDocument.Variable[] varArray = variablesDoc.getVariableArray();
         if (varArray.length > 0) {
            variables = new RegexVariables_impl();
            for (int i = 0; i < varArray.length; i++) {
               String value = varArray[i].getValue().replaceAll("\\\\", "\\\\\\\\");
               variables.addVariable(varArray[i].getName(), value);
            }
         }
      }

      // ***************************************************
      // get the concepts from the concept file document
      // ***************************************************
      ConceptSetDocument.ConceptSet conceptSet = conceptSetDoc.getConceptSet();
      ConceptDocument.Concept[] concepts = conceptSet.getConceptArray();
      for (int i = 0; i < concepts.length; i++) {
         // get concept meta data
         String conceptName = concepts[i].getName();
         boolean processAllRules = concepts[i].getProcessAllRules();
View Full Code Here

TOP

Related Classes of org.apache.incubator.uima.regex.ConceptSetDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.