Package edu.ucla.sspace.dv

Source Code of edu.ucla.sspace.dv.MediumMiniparTemplateAcceptor

/*
* Copyright 2010 David Jurgens
*
* This file is part of the S-Space package and is covered under the terms and
* conditions therein.
*
* The S-Space package is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation and distributed hereunder to you.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
* EXPRESS OR IMPLIED ARE MADE.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
* NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
* PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
* WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
* RIGHTS.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package edu.ucla.sspace.dv;

import edu.ucla.sspace.dependency.DependencyPath;
import edu.ucla.sspace.dependency.DependencyPathAcceptor;
import edu.ucla.sspace.dependency.DependencyTreeNode;

import edu.ucla.sspace.text.IteratorFactory;

import java.util.HashSet;
import java.util.Set;


/**     
* A {@code DependencyPathAcceptor} that accepts the minimum set of path
* templates specified by <a
* href="http://www.nlpado.de/~sebastian/pub/papers/cl07_pado.pdf">Padó and
* Lapata (2007)</a>. This acceptor is designed to
* be used with the <a
* link="http://webdocs.cs.ualberta.ca/~lindek/minipar.htm">Minipar</a> parser
* and its associated part of speech tag set.
*
* @see MinimumMiniparTemplateAcceptor
* @see MaximumMiniparTemplateAcceptor
*/
public class MediumMiniparTemplateAcceptor implements DependencyPathAcceptor {

    static final Set<String> MEDIUM_TEMPLATES = new HashSet<String>();

    static {
        MEDIUM_TEMPLATES.add("A:mod:N,N:lex-mod:(null)");
        MEDIUM_TEMPLATES.add("A:mod:N,N:nn:N");
        MEDIUM_TEMPLATES.add("A:subj:N,N:lex-mod:(null)");
        MEDIUM_TEMPLATES.add("A:subj:N,N:nn:N");
        MEDIUM_TEMPLATES.add("N:conj:N,N:lex-mod:(null)");
        MEDIUM_TEMPLATES.add("N:conj:N,N:nn:N");
        MEDIUM_TEMPLATES.add("N:gen:N,N:lex-mod:(null)");
        MEDIUM_TEMPLATES.add("N:gen:N,N:nn:N");
        MEDIUM_TEMPLATES.add("N:nn:N,N:conj:N");
        MEDIUM_TEMPLATES.add("N:nn:N,N:conj:N,N:nn:N");
        MEDIUM_TEMPLATES.add("N:nn:N,N:gen:N");
        MEDIUM_TEMPLATES.add("N:nn:N,N:gen:N,N:nn:N");
        MEDIUM_TEMPLATES.add("N:nn:N,N:mod:A");
        MEDIUM_TEMPLATES.add("N:nn:N,N:mod:Pred");
        MEDIUM_TEMPLATES.add("N:nn:N,N:obj:V");
        MEDIUM_TEMPLATES.add("N:nn:N,N:subj:A");
        MEDIUM_TEMPLATES.add("N:nn:N,N:subj:V");
        MEDIUM_TEMPLATES.add("(null):lex-mod:N,N:conj:N");
        MEDIUM_TEMPLATES.add("(null):lex-mod:N,N:conj:N,N:lex-mod:(null)");
        MEDIUM_TEMPLATES.add("(null):lex-mod:N,N:gen:N");
        MEDIUM_TEMPLATES.add("(null):lex-mod:N,N:gen:N,N:lex-mod:(null)");
        MEDIUM_TEMPLATES.add("(null):lex-mod:N,N:mod:A");
        MEDIUM_TEMPLATES.add("(null):lex-mod:N,N:mod:Pred");
        MEDIUM_TEMPLATES.add("(null):lex-mod:N,N:obj:V");
        MEDIUM_TEMPLATES.add("(null):lex-mod:N,N:subj:A");
        MEDIUM_TEMPLATES.add("(null):lex-mod:N,N:subj:V");
        MEDIUM_TEMPLATES.add("Prep:mod:N,N:lex-mod:(null)");
        MEDIUM_TEMPLATES.add("Prep:mod:N,N:nn:N");
        MEDIUM_TEMPLATES.add("V:obj:N,N:lex-mod:(null)");
        MEDIUM_TEMPLATES.add("V:obj:N,N:nn:N");
        MEDIUM_TEMPLATES.add("V:subj:N,N:lex-mod:(null)");
        MEDIUM_TEMPLATES.add("V:subj:N,N:nn:N");
    }
   
    /**
     * Creates the acceptor with its standard templates
     */
    public MediumMiniparTemplateAcceptor() { }
  
    /**
     * Returns {@code true} if the path matches one of the predefined templates
     *
     * @param path a dependency path
     *
     * @return {@code true} if the path matches a template
     */
    public boolean accepts(DependencyPath path) {
        return acceptsInternal(path);
    }

    /**
     * A package-private method that checks whether the path matches any of the
     * predefined templates.  This method is provided so other template classes
     * have access to the accept logic used by this class.
     *
     * @param path a dependency path
     *
     * @return {@code true} if the path matches a template
     */
    static boolean acceptsInternal(DependencyPath path) {
        // First check whether the minimum template acceptor would allow this
        // path
        if (MinimumMiniparTemplateAcceptor.acceptsInternal(path))
            return true;

        // Filter out paths that can't match the template due to length
        if (path.length() > 3)
            return false;

        int pathLength = path.length();

        // The medium set of templates contains "null" matches which are wild
        // cards against any part of speech.  We handle these by generating
        // three possible patterns that could represent the provided path.  If
        // any of these patterns are found in the medium set, the path is valid.
        StringBuilder nullStart = new StringBuilder(pathLength * 16);
        StringBuilder nullEnd = new StringBuilder(pathLength * 16);
        StringBuilder noNulls = new StringBuilder(pathLength * 16);

        // Iterate over each pair in the path and create the pattern string that
        // represents this path.  The pattern string is pos:rel:pos[,...] .
        DependencyTreeNode first = path.first();
        for (int i = 1; i < pathLength; ++i) {
            DependencyTreeNode second = path.getNode(i);
            // Check that the nodes weren't filtered out.  If so reject the path
            // even if the part of speech and relation text may have matched a
            // template.
            if (first.word().equals(IteratorFactory.EMPTY_TOKEN))
                return false;

            // Get the relation between the two nodes
            String rel = path.getRelation(i - 1);
            String firstPos = first.pos();
            String secPos = second.pos();
           
            nullStart.append((i == 0) ? "(null)" : firstPos);
            nullStart.append(":").append(rel).append(":").append(secPos);

            nullEnd.append(firstPos).append(":").append(rel).append(":");
            nullEnd.append((i + 1 == pathLength) ? "(null)" : secPos);

            noNulls.append(firstPos).append(":").append(rel)
                .append(":").append(secPos);

            // Check whether more elements existing, and if so, add the ','
            if (i + 1 < pathLength) {
                nullStart.append(",");
                nullEnd.append(",");
                noNulls.append(",");
            }

            // Last, shift over the node
            first = second;
        }
       
        // Extra case for the last token in the path
        if (first.word().equals(IteratorFactory.EMPTY_TOKEN))
            return false;

        return MEDIUM_TEMPLATES.contains(noNulls.toString())
            || MEDIUM_TEMPLATES.contains(nullStart.toString())
            || MEDIUM_TEMPLATES.contains(nullEnd.toString());
    }

    /**
     * {@inheritDoc}
     */
    public int maxPathLength() {
        return 4;
    }
}
TOP

Related Classes of edu.ucla.sspace.dv.MediumMiniparTemplateAcceptor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.