Package org.apache.ctakes.ytex.kernel

Source Code of org.apache.ctakes.ytex.kernel.IntrinsicInfoContentEvaluatorImpl$IntrinsicICInfo

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.ytex.kernel;

import gnu.trove.iterator.TIntIterator;
import gnu.trove.set.TIntSet;
import gnu.trove.set.hash.TIntHashSet;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.lang.ref.SoftReference;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.WeakHashMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.ctakes.ytex.kernel.dao.ClassifierEvaluationDao;
import org.apache.ctakes.ytex.kernel.dao.ConceptDao;
import org.apache.ctakes.ytex.kernel.model.ConcRel;
import org.apache.ctakes.ytex.kernel.model.ConceptGraph;
import org.apache.ctakes.ytex.kernel.model.FeatureEvaluation;
import org.apache.ctakes.ytex.kernel.model.FeatureRank;


public class IntrinsicInfoContentEvaluatorImpl implements
    IntrinsicInfoContentEvaluator {
  public static class IntrinsicICInfo {
    private ConcRel concept;

    private int leafCount = 0;

    private int subsumerCount = 0;

    public IntrinsicICInfo(ConcRel concept) {
      this.concept = concept;
    }

    public ConcRel getConcept() {
      return concept;
    }

    public int getLeafCount() {
      return leafCount;
    }

    public int getSubsumerCount() {
      return subsumerCount;
    }

    public void setConcept(ConcRel concept) {
      this.concept = concept;
    }

    public void setLeafCount(int leafCount) {
      this.leafCount = leafCount;
    }

    public void setSubsumerCount(int subsumerCount) {
      this.subsumerCount = subsumerCount;
    }
  }

  private static final Log log = LogFactory
      .getLog(IntrinsicInfoContentEvaluatorImpl.class);
  private static final double log2adjust = 1d / Math.log(2);

  /**
   * @param args
   * @throws IOException
   */
  public static void main(String[] args) throws IOException {
    Properties props = (Properties) KernelContextHolder
        .getApplicationContext().getBean("ytexProperties");
    props.putAll(System.getProperties());
    if (!props.containsKey("org.apache.ctakes.ytex.conceptGraphName")) {
      System.err.println("error: org.apache.ctakes.ytex.conceptGraphName not specified");
      System.exit(1);
    } else {
      IntrinsicInfoContentEvaluator corpusEvaluator = KernelContextHolder
          .getApplicationContext().getBean(
              IntrinsicInfoContentEvaluator.class);
      corpusEvaluator.evaluateIntrinsicInfoContent(props);
      System.exit(0);
    }
  }

  private ClassifierEvaluationDao classifierEvaluationDao;

  private ConceptDao conceptDao;

  private double computeIC(IntrinsicICInfo icInfo, int maxLeaves) {
    // |leaves(c)|/|subsumers(c)| + 1
    double denom = log2adjust
        * Math.log((double) icInfo.getLeafCount()
            / (double) icInfo.getSubsumerCount() + 1d);
    // max_leaves + 1
    double num = log2adjust * Math.log((double) maxLeaves + 1d);
    if (denom == Double.NaN || num == Double.NaN) {
      log.error("IC = NaN for " + icInfo.getConcept().getConceptID()
          + ", leafCount=" + icInfo.getLeafCount()
          + ", subsumerCount = " + icInfo.getSubsumerCount());
      return -1d;
    } else
      return num - denom;
  }

  /**
   * recursively compute the number of leaves. fill in the icInfoMap as we go
   * along
   *
   * @param concept
   *            concept for which we should get the leaves
   * @param leafCache
   *            cache of concept's leaves
   * @param icInfoMap
   *            to be updated with leaf counts
   * @param cg
   * @param w
   * @param visitedNodes
   *            list of nodes that have already been visited - we don't need
   *            to revisit them when getting the leaves
   * @return
   * @throws IOException
   */
  private TIntSet getLeaves(ConcRel concept,
      SoftReference<TIntSet>[] leafCache,
      Map<String, IntrinsicICInfo> icInfoMap, ConceptGraph cg,
      BufferedWriter w, TIntSet visitedNodes) throws IOException {
    // look in cache
    SoftReference<TIntSet> refLeaves = leafCache[concept.getNodeIndex()];
    if (refLeaves != null && refLeaves.get() != null) {
      return refLeaves.get();
    }
    // not in cache - compute recursively
    TIntSet leaves = new TIntHashSet();
    leafCache[concept.getNodeIndex()] = new SoftReference<TIntSet>(leaves);
    if (concept.isLeaf()) {
      // for leaves, just add the concept id
      leaves.add(concept.getNodeIndex());
    } else {
      IntrinsicICInfo icInfo = icInfoMap.get(concept.getConceptID());
      // have we already computed the leaf count for this node?
      // if yes, then we can ignore previously visited nodes
      // if no, then compute it now and revisit previously visited nodes
      // if we have to
      boolean needLeaves = (icInfo != null && icInfo.getLeafCount() == 0);
      TIntSet visitedNodesLocal = visitedNodes;
      if (needLeaves || visitedNodesLocal == null) {
        // allocate a set to keep track of nodes we've already visited
        // so that we don't revisit them. if we have already computed
        // this node's leaf count then we reuse whatever the caller gave
        // us if non null, else allocate a new one.
        // if we haven't already computed this node's leaf count,
        // allocate a new set to avoid duplications in the traversal for
        // this node
        visitedNodesLocal = new TIntHashSet();
      }
      // for inner nodes, recurse
      for (ConcRel child : concept.getChildren()) {
        // if we've already visited a node, then don't bother adding
        // that node's leaves - we already have them
        if (!visitedNodesLocal.contains(child.getNodeIndex())) {
          leaves.addAll(getLeaves(child, leafCache, icInfoMap, cg, w,
              visitedNodesLocal));
        }
      }
      // add this node to the set of visited nodes so we know not to
      // revisit. This is only of importance if the caller gave us
      // a non-empty set.
      if (visitedNodes != null && visitedNodes != visitedNodesLocal) {
        visitedNodes.add(concept.getNodeIndex());
        visitedNodes.addAll(visitedNodesLocal);
      }
      // update the leaf count if we haven't done so already
      if (needLeaves) {
        icInfo.setLeafCount(leaves.size());
        // output leaves if desired
        if (w != null) {
          w.write(concept.getConceptID());
          w.write("\t");
          w.write(Integer.toString(leaves.size()));
          w.write("\t");
          TIntIterator iter = leaves.iterator();
          while (iter.hasNext()) {
            w.write(cg.getConceptList().get(iter.next())
                .getConceptID());
            w.write(" ");
          }
          w.newLine();
        }
      }
    }
    return leaves;
  }

  // /**
  // * add/update icInfoMap entry for concept with the concept's leaf count
  // *
  // * @param concept
  // * @param icInfoMap
  // * @param w
  // * @param subsumerMap
  // * @throws IOException
  // */
  // private void computeLeafCount(ConcRel concept,
  // Map<String, IntrinsicICInfo> icInfoMap,
  // SoftReference<TIntSet>[] leafCache, ConceptGraph cg,
  // BufferedWriter w) throws IOException {
  // // see if we already computed this
  // IntrinsicICInfo icInfo = icInfoMap.get(concept.getConceptID());
  // if (icInfo != null && icInfo.getLeafCount() > 0) {
  // return;
  // }
  // // if not, figure it out
  // if (icInfo == null) {
  // icInfo = new IntrinsicICInfo(concept);
  // icInfoMap.put(concept.getConceptID(), icInfo);
  // }
  // // for leaves the default (0) is correct
  // if (!concept.isLeaf()) {
  // TIntSet leaves = this.getLeaves(concept, leafCache);
  // icInfo.setLeafCount(leaves.size());
  // if (w != null) {
  // w.write(concept.getConceptID());
  // w.write("\t");
  // w.write(Integer.toString(leaves.size()));
  // w.write("\t");
  // TIntIterator iter = leaves.iterator();
  // while (iter.hasNext()) {
  // w.write(cg.getConceptList().get(iter.next()).getConceptID());
  // w.write(" ");
  // }
  // w.newLine();
  // }
  // }
  // // recurse to parents
  // for (ConcRel parent : concept.getParents()) {
  // computeLeafCount(parent, icInfoMap, leafCache, cg, w);
  // }
  // }

  /**
   * add/update icInfoMap entry for concept with the concept's subsumer count
   *
   * @param concept
   * @param icInfoMap
   * @param subsumerMap
   * @param w
   * @throws IOException
   */
  private void computeSubsumerCount(ConcRel concept,
      Map<String, IntrinsicICInfo> icInfoMap,
      Map<String, Set<String>> subsumerMap, short[] depthArray,
      BufferedWriter w) throws IOException {
    // see if we already computed this
    IntrinsicICInfo icInfo = icInfoMap.get(concept.getConceptID());
    if (icInfo != null && icInfo.getSubsumerCount() > 0) {
      return;
    }
    // if not, figure it out
    if (icInfo == null) {
      icInfo = new IntrinsicICInfo(concept);
      icInfoMap.put(concept.getConceptID(), icInfo);
    }
    Set<String> subsumers = this.getSubsumers(concept, subsumerMap,
        depthArray);
    if (w != null) {
      w.write(concept.getConceptID());
      w.write("\t");
      w.write(Integer.toString(subsumers.size()));
      w.write("\t");
      w.write(subsumers.toString());
      w.newLine();
    }
    icInfo.setSubsumerCount(subsumers.size());
    // recursively compute the children's subsumer counts
    for (ConcRel child : concept.getChildren()) {
      computeSubsumerCount(child, icInfoMap, subsumerMap, depthArray, w);
    }
  }

  /*
   * (non-Javadoc)
   *
   * @see
   * org.apache.ctakes.ytex.kernel.IntrinsicInfoContentEvaluator#evaluateIntrinsicInfoContent
   * (java.lang.String)
   */
  @Override
  public void evaluateIntrinsicInfoContent(final Properties props)
      throws IOException {
    String conceptGraphName = props.getProperty("org.apache.ctakes.ytex.conceptGraphName");
    String conceptGraphDir = props.getProperty("org.apache.ctakes.ytex.conceptGraphDir",
        System.getProperty("java.io.tmpdir"));
    ConceptGraph cg = this.conceptDao.getConceptGraph(conceptGraphName);
    evaluateIntrinsicInfoContent(conceptGraphName, conceptGraphDir, cg);
  }

  @Override
  public void evaluateIntrinsicInfoContent(String conceptGraphName,
      String conceptGraphDir, ConceptGraph cg) throws IOException {
    log.info("computing subsumer counts");
    // compute the subsumer count
    Map<String, IntrinsicICInfo> icInfoMap = new HashMap<String, IntrinsicICInfo>();
    Map<String, Set<String>> subsumerMap = new WeakHashMap<String, Set<String>>();
    short[] depthArray = new short[cg.getConceptList().size()];
    BufferedWriter w = null;
    try {
      w = this.getOutputFile(conceptGraphName, conceptGraphDir,
          "subsumer");
      computeSubsumerCount(cg.getConceptMap().get(cg.getRoot()),
          icInfoMap, subsumerMap, depthArray, w);
    } finally {
      if (w != null) {
        try {
          w.close();
        } catch (IOException e) {
        }
      }
    }
    subsumerMap = null;
    log.info("computing max leaves");
    // get the leaves in this concept graph
    Set<String> leafSet = null;
    try {
      w = this.getOutputFile(conceptGraphName, conceptGraphDir, "allleaf");
      leafSet = this.getAllLeaves(cg, w);
    } finally {
      if (w != null) {
        try {
          w.close();
        } catch (IOException e) {
        }
      }
    }
    log.info("computing leaf counts");
    @SuppressWarnings("unchecked")
    SoftReference<TIntSet>[] leafCache = (SoftReference<TIntSet>[]) Array
        .newInstance((new SoftReference<TIntSet>(new TIntHashSet()))
            .getClass(), cg.getConceptList().size());
    // compute leaf count of all concepts in this graph
    try {
      w = this.getOutputFile(conceptGraphName, conceptGraphDir, "leaf");
      // for (String leaf : leafSet) {
      // computeLeafCount(cg.getConceptMap().get(leaf), icInfoMap,
      // leafCache, cg, w);
      // }
      this.getLeaves(cg.getConceptMap().get(cg.getRoot()), leafCache,
          icInfoMap, cg, w, null);
    } finally {
      if (w != null) {
        try {
          w.close();
        } catch (IOException e) {
        }
      }
    }
    leafCache = null;
    log.info("storing intrinsic ic");
    storeIntrinsicIC(conceptGraphName, leafSet.size(), icInfoMap,
        depthArray, cg);
    log.info("finished computing intrinsic ic");
  }

  private BufferedWriter getOutputFile(final String conceptGraphName,
      final String conceptGraphDir, String type) throws IOException {
    if ("true".equalsIgnoreCase(System
        .getProperty("org.apache.ctakes.ytex.ic.debug", "false"))) {
      return new BufferedWriter(new FileWriter(FileUtil.addFilenameToDir(
          conceptGraphDir, conceptGraphName + "-" + type + ".txt")));
    } else
      return null;
  }

  public Set<String> getAllLeaves(ConceptGraph cg, BufferedWriter w)
      throws IOException {
    Set<String> leafSet = new HashSet<String>();
    for (Map.Entry<String, ConcRel> con : cg.getConceptMap().entrySet()) {
      if (con.getValue().isLeaf()) {
        leafSet.add(con.getValue().getConceptID());
      }
    }
    if (w != null) {
      w.write(Integer.toString(leafSet.size()));
      w.write("\t");
      w.write(leafSet.toString());
      w.newLine();
    }
    return leafSet;
  }

  public ClassifierEvaluationDao getClassifierEvaluationDao() {
    return classifierEvaluationDao;
  }

  public ConceptDao getConceptDao() {
    return conceptDao;
  }

  // private TIntSet getLeaves(ConcRel concept,
  // SoftReference<TIntSet>[] leafCache) {
  // // look in cache
  // SoftReference<TIntSet> refLeaves = leafCache[concept.getNodeIndex()];
  // if (refLeaves != null && refLeaves.get() != null) {
  // return refLeaves.get();
  // }
  // // not in cache - compute recursively
  // TIntSet leaves = new TIntHashSet();
  // leafCache[concept.getNodeIndex()] = new SoftReference<TIntSet>(leaves);
  // if (concept.isLeaf()) {
  // // for leaves, just add the concept id
  // leaves.add(concept.getNodeIndex());
  // } else {
  // // for inner nodes, recurse
  // for (ConcRel child : concept.getChildren()) {
  // leaves.addAll(getLeaves(child, leafCache));
  // }
  // }
  // return leaves;
  // }

  /**
   * recursively compute the subsumers of a concept
   *
   * @param concept
   * @param subsumerMap
   * @return
   */
  private Set<String> getSubsumers(ConcRel concept,
      Map<String, Set<String>> subsumerMap, short depthArray[]) {
    // look in cache
    if (subsumerMap.containsKey(concept.getConceptID()))
      return subsumerMap.get(concept.getConceptID());
    // not in cache - compute recursively
    Set<String> subsumers = new HashSet<String>();
    boolean calcDepth = depthArray[concept.getNodeIndex()] == 0;
    short parentMaxDepth = 0;
    if (concept.getParents() != null && !concept.getParents().isEmpty()) {
      // parents - recurse
      for (ConcRel parent : concept.getParents()) {
        subsumers.addAll(getSubsumers(parent, subsumerMap, depthArray));
        // get the deepest parent
        if (calcDepth) {
          short parentDepth = depthArray[parent.getNodeIndex()];
          if (parentDepth > parentMaxDepth)
            parentMaxDepth = parentDepth;
        }
      }
    }
    if (calcDepth)
      depthArray[concept.getNodeIndex()] = (short) (parentMaxDepth + 1);
    // add the concept itself to the set of subsumers
    subsumers.add(concept.getConceptID());
    // add this to the cache - copy the key so that this can be gc'ed as
    // needed
    subsumerMap.put(new String(concept.getConceptID()), subsumers);
    return subsumers;
  }

  public void setClassifierEvaluationDao(
      ClassifierEvaluationDao classifierEvaluationDao) {
    this.classifierEvaluationDao = classifierEvaluationDao;
  }

  public void setConceptDao(ConceptDao conceptDao) {
    this.conceptDao = conceptDao;
  }

  private void storeIntrinsicIC(String conceptGraphName, int maxLeaves,
      Map<String, IntrinsicICInfo> icInfoMap, short depthArray[],
      ConceptGraph cg) {
    FeatureEvaluation fe = new FeatureEvaluation();
    fe.setEvaluationType("intrinsic-infocontent");
    fe.setParam2(conceptGraphName);
    List<FeatureRank> listFeatureRank = new ArrayList<FeatureRank>(
        icInfoMap.size());
    double maxIC = 0d;
    short maxDepth = 0;
    for (IntrinsicICInfo icInfo : icInfoMap.values()) {
      ConcRel cr = icInfo.getConcept();
      short depth = depthArray[cr.getNodeIndex()];
      cr.setDepth(depth);
      if (depth > maxDepth)
        maxDepth = depth;
      double ic = computeIC(icInfo, maxLeaves);
      cr.setIntrinsicInfoContent(ic);
      if (ic > maxIC)
        maxIC = ic;
      if (log.isDebugEnabled())
        log.debug(icInfo.getConcept().getConceptID() + "=" + ic);
      listFeatureRank.add(new FeatureRank(fe, icInfo.getConcept()
          .getConceptID(), ic, depthArray[icInfo.getConcept()
          .getNodeIndex()]));
    }
    cg.setDepthMax(maxDepth);
    cg.setIntrinsicICMax(maxIC);
    if ("true".equalsIgnoreCase(System
        .getProperty("org.apache.ctakes.ytex.ic.debug", "false"))) {
      this.classifierEvaluationDao.deleteFeatureEvaluation(null, null,
          null, fe.getEvaluationType(), null, 0d, conceptGraphName);
      this.classifierEvaluationDao.saveFeatureEvaluation(fe,
          listFeatureRank);
    }
  }
}
TOP

Related Classes of org.apache.ctakes.ytex.kernel.IntrinsicInfoContentEvaluatorImpl$IntrinsicICInfo

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.