Package edu.ucla.sspace.wordsi

Source Code of edu.ucla.sspace.wordsi.RandomIndexingDependencyContextGenerator

/*
* Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at
* the Lawrence Livermore National Laboratory. Written by Keith Stevens,
* kstevens@cs.ucla.edu OCEC-10-073 All rights reserved.
*
* This file is part of the C-Cat package and is covered under the terms and
* conditions therein.
*
* The S-Space package is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation and distributed hereunder to you.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
* EXPRESS OR IMPLIED ARE MADE.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
* NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
* PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
* WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
* RIGHTS.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package edu.ucla.sspace.wordsi;

import edu.ucla.sspace.dependency.DependencyPath;
import edu.ucla.sspace.dependency.DependencyPathAcceptor;
import edu.ucla.sspace.dependency.DependencyPermutationFunction;
import edu.ucla.sspace.dependency.DependencyTreeNode;
import edu.ucla.sspace.dependency.FilteredDependencyIterator;

import edu.ucla.sspace.vector.CompactSparseVector;
import edu.ucla.sspace.vector.SparseDoubleVector;
import edu.ucla.sspace.vector.TernaryVector;

import java.util.Iterator;
import java.util.Map;

/**
* A {@link DependencyContextGenerator} that forms context vectors using a
* summation of index vectors.  Each index vector is fixed {@link TernaryVector}
* corresponding to each term in the corpus.  Only index vectors for terms
* reachable by a valid {@link DependencyPath} starting from the focus wordwill
* be applied to a particular context.
*
* @author Keith Stevens
*/
public class RandomIndexingDependencyContextGenerator
        implements DependencyContextGenerator {

    /**
     * The {@link DependencyPermutationFunction} that permutes {@link
     * TernaryVector}.
     */
    private final DependencyPermutationFunction<TernaryVector> permFunc;

    /**
     * A mapping from word forms to their index vectors.
     */
    private final Map<String, TernaryVector> indexMap;

    /**
     * The length of each index vector.
     */
    private final int indexVectorLength;

    /**
     * The maximum valid length.
     */
    private final int pathLength;

    /**
     * The filter that accepts only dependency paths that match predefined
     * criteria.
     */
    private final DependencyPathAcceptor acceptor;

    /**
     * Set to true when new words should not be auto assigned an index vector.
     */
    private boolean readOnly;

    /**
     * Creates a new {@link RandomIndexingDependencyContextGenerator}.
     *
     * @param permFunc the {@link DependencyPermutationFunction} responsible for
     *        permuting {@link TernaryVector}, which serve as index vectors.
     * @param acceptor The {@link DependencyPathAcceptor} used to validate
     *        {@link DependencyPath}s stemming from focus word.
     * @param indexMap A mapping from word forms to {@link TernaryVector}s.
     * @param indexVectorLength The length of each index vector.
     * @param pathLength The maximum acceptable length of any {@link
     *        DependencyPath}.
     */
    public RandomIndexingDependencyContextGenerator(
                    DependencyPermutationFunction<TernaryVector> permFunc,
                    DependencyPathAcceptor acceptor,
                    Map<String, TernaryVector> indexMap,
                    int indexVectorLength,
                    int pathLength) {
            this.permFunc = permFunc;
            this.acceptor = acceptor;
            this.indexMap = indexMap;
            this.indexVectorLength = indexVectorLength;
            this.pathLength = pathLength;
    }

    /**
     * {@inheritDoc}
     */
    public SparseDoubleVector generateContext(DependencyTreeNode[] tree,
                                              int focusIndex) {
        DependencyTreeNode focusNode = tree[focusIndex];

        SparseDoubleVector meaning = new CompactSparseVector(indexVectorLength);

        Iterator<DependencyPath> paths = new FilteredDependencyIterator(
                focusNode, acceptor, pathLength);

        while (paths.hasNext()) {
            DependencyPath path = paths.next();
            if (readOnly && !indexMap.containsKey(path.last().word()))
                continue;

            TernaryVector termVector = indexMap.get(path.last().word());
            if (permFunc != null)
                termVector = permFunc.permute(termVector, path);
            add(meaning, termVector);
        }
        return meaning;
    }

    /**
     * {@inheritDoc}
     */
    public int getVectorLength() {
        return indexVectorLength;
    }

    /**
     * {@inheritDoc}
     */
    public void setReadOnly(boolean readOnly) {
        this.readOnly = readOnly;
    }

    /**
     * Adds a {@link TernaryVector} to a {@link IntegerVector}
     */
    private void add(SparseDoubleVector dest, TernaryVector src) {
        for (int p : src.positiveDimensions())
            dest.add(p, 1);
        for (int n : src.negativeDimensions())
            dest.add(n, -1);
    }
}
TOP

Related Classes of edu.ucla.sspace.wordsi.RandomIndexingDependencyContextGenerator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.