Package org.apache.uima.flow.impl

Source Code of org.apache.uima.flow.impl.CapabilityLanguageFlowController

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.uima.flow.impl;

import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.uima.UIMAFramework;
import org.apache.uima.UIMARuntimeException;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.analysis_engine.ResultSpecification;
import org.apache.uima.analysis_engine.TypeOrFeature;
import org.apache.uima.analysis_engine.metadata.AnalysisEngineMetaData;
import org.apache.uima.analysis_engine.metadata.CapabilityLanguageFlow;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.text.Language;
import org.apache.uima.flow.CasFlowController_ImplBase;
import org.apache.uima.flow.Flow;
import org.apache.uima.flow.FlowControllerContext;
import org.apache.uima.flow.FlowControllerDescription;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.metadata.Capability;
import org.apache.uima.util.InvalidXMLException;
import org.apache.uima.util.XMLInputSource;

/**
* FlowController for the CapabilityLanguageFlow, which uses a linear flow but may skip some of the
* AEs in the flow if they do not handle the language of the current document or if their outputs
* have already been produced by a previous AE in the flow.
*/
public class CapabilityLanguageFlowController extends CasFlowController_ImplBase {
  private List<AnalysisSequenceCapabilityNode> mStaticSequence;

  private Map<String, AnalysisEngineMetaData> mComponentMetaDataMap;

  private Map<String, List<AnalysisSequenceCapabilityNode>> mFlowTable;
 
  private final Map<String, ResultSpecification> lastResultSpecForComponent =
    new HashMap<String, ResultSpecification>();

  /**
   * main language separator e.g 'en' and 'en-US'
   */
  private static final char LANGUAGE_SEPARATOR = '-';

  /*
   * (non-Javadoc)
   *
   * @see org.apache.uima.flow.FlowController#initialize(FlowControllerContext)
   */
  public void initialize(FlowControllerContext aContext) throws ResourceInitializationException {
    super.initialize(aContext);
    mComponentMetaDataMap = (Map<String, AnalysisEngineMetaData>)aContext.getAnalysisEngineMetaDataMap();

    // build a list of AnalysisSequenceNodes from the capabilityLanguageFlow
    mStaticSequence = new ArrayList<AnalysisSequenceCapabilityNode>();
    CapabilityLanguageFlow flowConstraints = (CapabilityLanguageFlow) aContext
            .getAggregateMetadata().getFlowConstraints();
    for (String aeKey : flowConstraints.getCapabilityLanguageFlow()) {
      mStaticSequence.add(
          new AnalysisSequenceCapabilityNode(
              aeKey,
              mComponentMetaDataMap.get(aeKey).getCapabilities(),
              null));
    }

    // compute flow table with the specified capabilities
    mFlowTable = computeFlowTable(aContext.getAggregateMetadata().getCapabilities());
  }

  /*
   * (non-Javadoc)
   *
   * @see org.apache.uima.flow.CasFlowController_ImplBase#computeFlow(org.apache.uima.cas.CAS)
   */
  public Flow computeFlow(CAS aCAS) throws AnalysisEngineProcessException {
    CapabilityLanguageFlowObject flow = new CapabilityLanguageFlowObject(mFlowTable, this);
    flow.setCas(aCAS);
    return flow;
  }

  /**
   * method computeFlowTable create the flow table for faster processing. The flow table includes
   * the corresponding flow sequence for all languages in the capabilities
   *
   * @param aCapabilities
   *          aggregate engine capabilities
   * @return Map - flow table includes all sequences for all languages
   */
  protected Map<String, List<AnalysisSequenceCapabilityNode>> computeFlowTable(Capability[] aCapabilities) {
    // create flowTable
    Map<String, List<AnalysisSequenceCapabilityNode>> flowTable =
      new HashMap<String, List<AnalysisSequenceCapabilityNode>>();

    // get all languages from the capabilities
    Set<String> languages = new HashSet<String>();
    for (Capability capability : aCapabilities) {
      for (String capabilityLanguage : capability.getLanguagesSupported()) {
        languages.add(capabilityLanguage);
      }
    }

    // create flow table with sequences for all languages
    for (String capabilityLanguage : languages) {
      flowTable.put(capabilityLanguage, computeSequence(capabilityLanguage, aCapabilities));
    }
   
    return flowTable;
  }

  /**
   * method computeSequence creates a capabilityLanguageAnalysisSequence for the given language
   *
   * @param language
   *          current language
   * @param aCapabilities
   *          output capabilities of the aggregate engine
   *
   * @return List - capabilityLanguageAnalysisSequence for the current language
   */
  protected List<AnalysisSequenceCapabilityNode> computeSequence(String language, Capability[] aCapabilities) {
    language = Language.normalize(language)// lower-cases, replaces _ with -, changes null to x-unspecified

    // create resultSpec from the current aggregate capabilities
    ResultSpecification aggrResultsToProduce = UIMAFramework.getResourceSpecifierFactory()
            .createResultSpecification();

    if (aCapabilities != null) {
      aggrResultsToProduce.addCapabilities(aCapabilities);
    } else {
      return null;
    }

    // create array list for the current sequence
    List<AnalysisSequenceCapabilityNode> newSequence = new ArrayList<AnalysisSequenceCapabilityNode>();

    // loop over all annotators that should be called
    // In this loop we will gradually reduce the set of output capabilities
    for (int sequenceIndex = 0; sequenceIndex < mStaticSequence.size(); sequenceIndex++) {
      // get array of output capabilities for the current language from the current result spec
      TypeOrFeature[] tofsNeeded = aggrResultsToProduce.getResultTypesAndFeatures(language);

      // Augment these outputCapabilities if the language-spec is for a country, to
      // include the outputCapabilities for the language without the country-spec.
     
      // strip language extension if available
      int index = language.indexOf(LANGUAGE_SEPARATOR);

      // if country extension is available
      if (index >= 0) {
        // create Set for outputSpecs, so we can eliminate duplicates
        Set<TypeOrFeature> outputSpec = new HashSet<TypeOrFeature>();

        // add language with country extension removed,
        // to the existing output capabilities (or if non exist, just use
        // the capabilities for the language without the country extension)
        if (tofsNeeded.length > 0) {
          // copy all existing capabilities to the Set
          for (TypeOrFeature outputCapability : tofsNeeded) {
            outputSpec.add(outputCapability);
          }

          // get array of output capabilities only for the language without country extension
          tofsNeeded = aggrResultsToProduce.getResultTypesAndFeatures(language.substring(0, index));

          // add language output capabilities to the Set
          for (TypeOrFeature outputCapability : tofsNeeded) {
            outputSpec.add(outputCapability);
          }

          // convert all output capabilities to a outputCapabilities array
          tofsNeeded = new TypeOrFeature[outputSpec.size()];
          outputSpec.toArray(tofsNeeded);
        } else { // for language with country extension was noting found       
          // get array of output capabilities with the new main language without country extension
          tofsNeeded = aggrResultsToProduce.getResultTypesAndFeatures(language.substring(0, index));
        }
      }

      // current analysis node which contains the current analysis engine
      AnalysisSequenceCapabilityNode node;

      // result spec for the current analysis engine
      ResultSpecification currentAnalysisResultSpec = null;

      // flag if current analysis engine should be called or not
      boolean shouldEngineBeCalled = false;

      // check output capabilities from the current result spec

      // get next analysis engine from the sequence node
      node = mStaticSequence.get(sequenceIndex);

      // get capability container from the current analysis engine
      ResultSpecification delegateProduces = node.getCapabilityContainer();

      // create current analysis result spec without any language information
      currentAnalysisResultSpec = UIMAFramework.getResourceSpecifierFactory()
              .createResultSpecification();

      // check if engine should be called -
      //   loop over all remaining output capabilities of the aggregate's result spec
      //     to see if this component of the aggregate produces that type or feature,
      //     for this language
      for (TypeOrFeature tof : tofsNeeded) {
        if ((tof.isType() && delegateProduces.containsType(tof.getName(), language)) ||
            (!tof.isType() && delegateProduces.containsFeature(tof.getName(), language))) {
//        if (capabilityContainer.hasOutputTypeOrFeature(tof, language, true)) {
          currentAnalysisResultSpec.addResultTypeOrFeature(tof);
          shouldEngineBeCalled = true;
          // remove current ToF from the result spec
          aggrResultsToProduce.removeTypeOrFeature(tof);
        }
      }
     
      // skip engine if not output capability match

      // should be called is false if this engine produces none of the
      //   needed outputs of the aggregate
      if (shouldEngineBeCalled == true) {
        // tell this component which output types/features need to be produced
        //   note: As an exception to the way normal result-specifications are produced,
        //         here we *don't* add the types/features which are input to
        //         other delegates need to be produced.
        //         This is for backward compatibility.
        node.setResultSpec(currentAnalysisResultSpec);

        // add note to the current sequence
        newSequence.add((AnalysisSequenceCapabilityNode)node.clone());
      } else {
      // engine should not be called, but add null to the sequence to track that
      // engine should not be called
        newSequence.add(null);
      }
    } // loop over all delegates in the flow sequence

    return newSequence;
  }

  public static FlowControllerDescription getDescription() {
    URL descUrl = FixedFlowController.class
            .getResource("/org/apache/uima/flow/CapabilityLanguageFlowController.xml");
    FlowControllerDescription desc;
    try {
      desc = (FlowControllerDescription) UIMAFramework.getXMLParser().parse(
              new XMLInputSource(descUrl));
    } catch (InvalidXMLException e) {
      throw new UIMARuntimeException(e);
    } catch (IOException e) {
      throw new UIMARuntimeException(e);
    }
    return desc;
  }

  public Map<String, ResultSpecification> getLastResultSpecForComponent() {
    return lastResultSpecForComponent;
  }
}
TOP

Related Classes of org.apache.uima.flow.impl.CapabilityLanguageFlowController

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.