Package org.openbel.framework.compiler

Source Code of org.openbel.framework.compiler.PhaseThreeImpl

/**
* Copyright (C) 2012-2013 Selventa, Inc.
*
* This file is part of the OpenBEL Framework.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The OpenBEL Framework is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with the OpenBEL Framework. If not, see <http://www.gnu.org/licenses/>.
*
* Additional Terms under LGPL v3:
*
* This license does not authorize you and you are prohibited from using the
* name, trademarks, service marks, logos or similar indicia of Selventa, Inc.,
* or, in the discretion of other licensors or authors of the program, the
* name, trademarks, service marks, logos or similar indicia of such authors or
* licensors, in any marketing or advertising materials relating to your
* distribution of the program or any covered product. This restriction does
* not waive or limit your obligation to keep intact all copyright notices set
* forth in the program as delivered to you.
*
* If you distribute the program in whole or in part, or any modified version
* of the program, and you assume contractual liability to the recipient with
* respect to the program or modified version, then you will indemnify the
* authors and licensors of the program for any liabilities that these
* contractual assumptions directly impose on those licensors and authors.
*/
package org.openbel.framework.compiler;

import static java.util.Collections.emptySet;
import static org.openbel.framework.common.BELUtilities.constrainedHashSet;
import static org.openbel.framework.common.BELUtilities.hasItems;
import static org.openbel.framework.common.BELUtilities.noItems;
import static org.openbel.framework.common.BELUtilities.sizedArrayList;
import static org.openbel.framework.common.BELUtilities.sizedHashMap;
import static org.openbel.framework.common.enums.FunctionEnum.PROTEIN_ABUNDANCE;
import static org.openbel.framework.common.enums.RelationshipType.IS_A;
import static org.openbel.framework.common.protonetwork.model.ProtoNetwork.NAMESPACE_INDEX;
import static org.openbel.framework.common.protonetwork.model.ProtoNetwork.PARAM_INDEX;
import static org.openbel.framework.common.protonetwork.model.ProtoNetwork.TERM_INDEX;
import static org.openbel.framework.common.protonetwork.model.TermTable.PARAMETER_SUBSTITUTION;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.openbel.framework.common.enums.FunctionEnum;
import org.openbel.framework.common.enums.RelationshipType;
import org.openbel.framework.common.lang.ComplexAbundance;
import org.openbel.framework.common.lang.ProteinAbundance;
import org.openbel.framework.common.model.DataFileIndex;
import org.openbel.framework.common.model.Document;
import org.openbel.framework.common.model.EquivalenceDataIndex;
import org.openbel.framework.common.model.Namespace;
import org.openbel.framework.common.model.Parameter;
import org.openbel.framework.common.model.Statement;
import org.openbel.framework.common.model.Statement.Object;
import org.openbel.framework.common.model.StatementGroup;
import org.openbel.framework.common.model.Term;
import org.openbel.framework.common.protonetwork.model.NamespaceTable;
import org.openbel.framework.common.protonetwork.model.NamespaceTable.TableNamespace;
import org.openbel.framework.common.protonetwork.model.ParameterTable;
import org.openbel.framework.common.protonetwork.model.ParameterTable.TableParameter;
import org.openbel.framework.common.protonetwork.model.ProtoNetwork;
import org.openbel.framework.common.protonetwork.model.ProtoNetworkError;
import org.openbel.framework.common.protonetwork.model.SkinnyUUID;
import org.openbel.framework.common.protonetwork.model.TermParameterMapTable;
import org.openbel.framework.common.protonetwork.model.TermTable;
import org.openbel.framework.core.equivalence.EquivalenceMapResolutionFailure;
import org.openbel.framework.core.indexer.JDBMEquivalenceLookup;
import org.openbel.framework.core.protonetwork.ProtoNetworkDescriptor;
import org.openbel.framework.core.protonetwork.ProtoNetworkService;

/**
* BEL compiler phase three implementation.
*/
public class PhaseThreeImpl implements DefaultPhaseThree {
    private final ProtoNetworkService protoNetworkService;
    private final DefaultPhaseTwo p2;

    private final static String PF_SHORT;
    private final static String PF_LONG;
    private final static String NC_SHORT;
    private final static String NC_LONG;
    static {
        String suffix = "(" + PARAMETER_SUBSTITUTION + ")";
        // proteinAbundance(#) - the p() form w/in the proto-network
        PF_SHORT = ProteinAbundance.ABBREVIATION.concat(suffix);
        PF_LONG = ProteinAbundance.NAME.concat(suffix);
        // complexAbundance(#) - the complex() form w/in the proto-network
        NC_SHORT = ComplexAbundance.ABBREVIATION.concat(suffix);
        NC_LONG = ComplexAbundance.NAME.concat(suffix);
    }

    /**
     * Creates the phase three implementation with the associated
     * {@link ProtoNetworkService}.
     *
     * @param protoNetSvc Proto-network service
     * @param p2 Selventa phase two
     */
    public PhaseThreeImpl(final ProtoNetworkService protoNetSvc,
            final DefaultPhaseTwo p2) {
        this.protoNetworkService = protoNetSvc;
        this.p2 = p2;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public DocumentModificationResult pruneFamilies(boolean expand,
            final Document families,
            final ProtoNetwork network) {

        DocumentModificationResult pr = new DocumentModificationResult();

        // Search for all p(#) parameters within the network.
        Set<TableEntry> networkLiterals = search(network, PF_SHORT, PF_LONG);

        // Load the equivalences
        Set<EquivalenceDataIndex> equivs;
        try {
            equivs = p2.stage2LoadNamespaceEquivalences();
        } catch (EquivalenceMapResolutionFailure f) {
            // Unrecoverable error
            pr.addError(f.getUserFacingMessage());
            pr.setSuccess(false);
            return pr;
        }

        // Map namespace to lookup
        Map<String, JDBMEquivalenceLookup> lookups =
                sizedHashMap(equivs.size());
        for (final EquivalenceDataIndex edi : equivs) {
            String rl = edi.getNamespaceResourceLocation();
            DataFileIndex dfi = edi.getEquivalenceIndex();
            lookups.put(rl, new JDBMEquivalenceLookup(dfi.getIndexPath()));
        }

        // Open the indices
        for (final JDBMEquivalenceLookup jl : lookups.values()) {
            try {
                jl.open();
            } catch (IOException e) {
                pr.addError(e.getMessage());
                pr.setSuccess(false);
                return pr;
            }
        }

        // It's helpful to reference the contents of the protein families
        // BEL document in understanding this section.

        // Establish set of UUIDs relevant for the document's member parameters
        // (this only matters if the expand flag is set)
        Set<SkinnyUUID> uuids = new HashSet<SkinnyUUID>();
        final List<Statement> familyStmts = families.getAllStatements();
        if (expand) {
            uuids = findUUIDs(network, lookups, familyStmts);
        }
        boolean haveUUIDs = !uuids.isEmpty();

        int pruned = 0, total = familyStmts.size();
        List<Parameter> parameters;

        // Pruning is simply a matter of iterating all statements in families...
        FAMILIES: for (final Statement stmt : familyStmts) {

            // ... all protein family parameters
            parameters = stmt.getSubject().getAllParameters();
            for (final Parameter p : parameters) {
                if (!validParameter(p)) {
                    continue;
                }
                Namespace ns = p.getNamespace();
                String val = p.getValue();
                String rl = ns.getResourceLocation();

                // ... do we have the family?
                if (networkLiterals.contains(new TableEntry(val, rl))) {
                    continue FAMILIES;
                }
            }

            Object stmtObj = stmt.getObject();
            if (haveUUIDs && stmtObj != null && stmtObj.getTerm() != null) {
                // ... all family members
                parameters = stmtObj.getTerm().getAllParameters();
                for (final Parameter p : parameters) {
                    if (!validParameter(p)) {
                        continue;
                    }
                    Namespace ns = p.getNamespace();
                    String val = p.getValue();
                    String rl = ns.getResourceLocation();

                    // ... do we have set membership?
                    JDBMEquivalenceLookup jdbmLookup = lookups.get(rl);
                    if (jdbmLookup != null) {
                        SkinnyUUID uuid = jdbmLookup.lookup(val);
                        // Check for set membership, uuid ∊ uuids
                        if (uuids.contains(uuid)) {

                            // This indicates equivalent parameters are
                            // used by both document and network. This
                            // statement will remain (i.e., not be pruned).
                            continue FAMILIES;

                        }
                    }
                }
            }

            // Map statement group to statement set
            Map<StatementGroup, Set<Statement>> groupMap =
                    families.mapStatements();

            // Failed to match both either family or family member.
            // Prune the statement.
            Set<Entry<StatementGroup, Set<Statement>>> entries =
                    groupMap.entrySet();
            for (final Entry<StatementGroup, Set<Statement>> entry : entries) {
                StatementGroup group = entry.getKey();
                Set<Statement> stmts = entry.getValue();
                if (stmts.contains(stmt)) {
                    group.getStatements().remove(stmt);
                    pruned++;
                    break;
                }
            }
        }

        // represent pruned statements as negative
        pr.setDeltaStatements(-(pruned));
        pr.setTotalStatements(total);
        pr.setSuccess(true);

        for (final JDBMEquivalenceLookup jl : lookups.values()) {
            try {
                jl.close();
            } catch (IOException e) {
                pr.addWarning(e.getMessage());
            }
        }

        return pr;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public DocumentModificationResult inferFamilies(Document d, ProtoNetwork p) {
        final DocumentModificationResult pr = new DocumentModificationResult();

        // Load the equivalences
        Set<EquivalenceDataIndex> equivs;
        try {
            equivs = p2.stage2LoadNamespaceEquivalences();
        } catch (EquivalenceMapResolutionFailure f) {
            // Unrecoverable error
            pr.addError(f.getUserFacingMessage());
            pr.setSuccess(false);
            return pr;
        }

        // Map namespace to lookup
        Map<String, JDBMEquivalenceLookup> lookups =
                sizedHashMap(equivs.size());
        for (final EquivalenceDataIndex edi : equivs) {
            String rl = edi.getNamespaceResourceLocation();
            DataFileIndex dfi = edi.getEquivalenceIndex();
            lookups.put(rl, new JDBMEquivalenceLookup(dfi.getIndexPath()));
        }

        // Open the indices
        for (final JDBMEquivalenceLookup jl : lookups.values()) {
            try {
                jl.open();
            } catch (IOException e) {
                pr.addError(e.getMessage());
                pr.setSuccess(false);
                return pr;
            }
        }

        // get all statements for protein family document
        final List<Statement> stmts = d.getAllStatements();

        // the number of inferred statements
        int inferred = 0;

        // find molecular activity functions referencing protein family param
        TableEntry entry;
        // iterate all activity functions of a proteinAbundance
        final List<Statement> isaStmts = new ArrayList<Statement>();
        final Set<FunctionEnum> actFunctions = FunctionEnum.getActivities();
        for (final FunctionEnum f : actFunctions) {
            final String actShort = f.getShortestForm().concat("(")
                    .concat(PF_SHORT).concat(")");
            final String actLong = f.getDisplayValue().concat("(")
                    .concat(PF_LONG).concat(")");

            // find parameters for this activity function in the proto network
            final Set<TableEntry> entries = search(p, actShort, actLong);

            // if no results for activity function, continue to next one
            if (entries.isEmpty()) {
                continue;
            }

            Set<SkinnyUUID> eu = null;

            // entries found, iterate protein family statements
            for (final Statement stmt : stmts) {
                // inspect protein family parameter
                final List<Parameter> sp = stmt.getSubject().getAllParameters();
                for (final Parameter fp : sp) {
                    if (!validParameter(fp)) {
                        continue;
                    }
                    Namespace ns = fp.getNamespace();
                    String val = fp.getValue();
                    String rl = ns.getResourceLocation();

                    // have we seen a molecular activity function for this
                    // protein family parameter
                    entry = new TableEntry(val, rl);
                    if (entries.contains(entry)) {
                        // construct family term once, in case we find matches
                        final Term fpt = new Term(PROTEIN_ABUNDANCE);
                        fpt.addFunctionArgument(new Parameter(ns, val));
                        final Term fat = new Term(f);
                        fat.addFunctionArgument(fpt);

                        // since we found a molecular activity function for a
                        // protein family look up UUIDs for all entries
                        // ... loading uuids on demand to save resources
                        if (eu == null) {
                            eu = findUUIDs(entries, lookups);
                        }

                        // search protein family member parameters for same
                        // molecular activity function
                        final List<Parameter> op = stmt.getObject().getTerm()
                                .getAllParameters();
                        for (final Parameter mp : op) {
                            Namespace mns = mp.getNamespace();
                            String mval = mp.getValue();
                            String mrl = mns.getResourceLocation();

                            // lookup UUID for protein family member
                            JDBMEquivalenceLookup jdbmLookup = lookups.get(mrl);
                            if (jdbmLookup == null) {
                                continue;
                            }
                            SkinnyUUID uuid = jdbmLookup.lookup(mval);

                            // do we have protein family member parameter uuid
                            // in molecular activity function entries?
                            if (eu.contains(uuid)) {
                                // construct member term
                                final Term mpt = new Term(PROTEIN_ABUNDANCE);
                                mpt.addFunctionArgument(new Parameter(mns, mval));
                                final Term mat = new Term(f);
                                mat.addFunctionArgument(mpt);

                                // construct isA statement between member and family
                                isaStmts.add(new Statement(mat, null,
                                        null, new Object(fat), IS_A));
                                inferred++;
                            }
                        }
                    }
                }
            }
        }

        // add isA relationships to protein family document, if necessary
        if (!isaStmts.isEmpty()) {
            final StatementGroup isaStmtGroup = new StatementGroup();
            isaStmtGroup.setStatements(isaStmts);
            d.addStatementGroup(isaStmtGroup);
        }

        pr.setDeltaStatements(inferred);
        pr.setTotalStatements(d.getStatementCount());
        pr.setSuccess(true);

        // Close the indices
        for (final JDBMEquivalenceLookup jl : lookups.values()) {
            try {
                jl.close();
            } catch (IOException e) {
                pr.addWarning(e.getMessage());
            }
        }

        return pr;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public DocumentModificationResult pruneComplexes(boolean expand,
            final Document complexes,
            final ProtoNetwork network) {

        DocumentModificationResult pr = new DocumentModificationResult();

        // Search for all complex(#) parameters within the network.
        Set<TableEntry> networkLiterals = search(network, NC_SHORT, NC_LONG);

        // Load the equivalences
        Set<EquivalenceDataIndex> equivs;
        try {
            equivs = p2.stage2LoadNamespaceEquivalences();
        } catch (EquivalenceMapResolutionFailure f) {
            // Unrecoverable error
            pr.addError(f.getUserFacingMessage());
            pr.setSuccess(false);
            return pr;
        }

        // Map namespace to lookup
        Map<String, JDBMEquivalenceLookup> lookups =
                sizedHashMap(equivs.size());
        for (final EquivalenceDataIndex edi : equivs) {
            String rl = edi.getNamespaceResourceLocation();
            DataFileIndex dfi = edi.getEquivalenceIndex();
            lookups.put(rl, new JDBMEquivalenceLookup(dfi.getIndexPath()));
        }

        // Open the indices
        for (final JDBMEquivalenceLookup jl : lookups.values()) {
            try {
                jl.open();
            } catch (IOException e) {
                pr.addError(e.getMessage());
                pr.setSuccess(false);
                return pr;
            }
        }

        // It's helpful to reference the contents of the protein families
        // BEL document in understanding this section.

        // Establish set of UUIDs relevant for the document's member parameters
        // (this only matters if the expand flag is set)
        Set<SkinnyUUID> uuids = new HashSet<SkinnyUUID>();
        final List<Statement> complexStmts = complexes.getAllStatements();
        if (expand) {
            uuids = findUUIDs(network, lookups, complexStmts);
        }
        boolean haveUUIDs = !uuids.isEmpty();

        int pruned = 0, total = complexStmts.size();
        List<Parameter> parameters;

        // Pruning is simply a matter of iterating all statements in document...
        COMPLEXES: for (final Statement stmt : complexStmts) {

            // ... all named complex parameters
            parameters = stmt.getSubject().getAllParameters();
            for (final Parameter p : parameters) {
                if (!validParameter(p)) {
                    continue;
                }
                Namespace ns = p.getNamespace();
                String val = p.getValue();
                String rl = ns.getResourceLocation();

                // ... do we have the complex?
                if (networkLiterals.contains(new TableEntry(val, rl))) {
                    continue COMPLEXES;
                }
            }

            Object stmtObj = stmt.getObject();
            if (haveUUIDs && stmtObj != null && stmtObj.getTerm() != null) {
                // ... all complex components
                parameters = stmtObj.getTerm().getAllParameters();
                for (final Parameter p : parameters) {
                    if (!validParameter(p)) {
                        continue;
                    }
                    Namespace ns = p.getNamespace();
                    String val = p.getValue();
                    String rl = ns.getResourceLocation();

                    // ... do we have set membership?
                    JDBMEquivalenceLookup jdbmLookup = lookups.get(rl);
                    if (jdbmLookup != null) {
                        SkinnyUUID uuid = jdbmLookup.lookup(val);
                        // Check for set membership, uuid ∊ uuids
                        if (uuids.contains(uuid)) {

                            // This indicates equivalent parameters are being
                            // used by both document and network. This
                            // statement will remain (i.e., not be pruned).
                            continue COMPLEXES;

                        }
                    }
                }
            }

            // Map statement group to statement set
            Map<StatementGroup, Set<Statement>> groupMap =
                    complexes.mapStatements();

            // Failed to match both either complex or component.
            // Prune the statement.
            Set<Entry<StatementGroup, Set<Statement>>> entries =
                    groupMap.entrySet();
            for (final Entry<StatementGroup, Set<Statement>> entry : entries) {
                StatementGroup group = entry.getKey();
                Set<Statement> stmts = entry.getValue();
                if (entry.getValue().contains(stmt)) {
                    stmts.remove(stmt);
                    group.getStatements().remove(stmt);
                    pruned++;
                    break;
                }
            }
        }

        pr.setDeltaStatements(-(pruned));
        pr.setTotalStatements(total);
        pr.setSuccess(true);

        for (final JDBMEquivalenceLookup jl : lookups.values()) {
            try {
                jl.close();
            } catch (IOException e) {
                pr.addWarning(e.getMessage());
            }
        }

        return pr;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public DocumentModificationResult pruneGene(final Document genes,
            final ProtoNetwork network) {

        DocumentModificationResult pr = new DocumentModificationResult();

        // Load the equivalences
        Set<EquivalenceDataIndex> equivs;
        try {
            equivs = p2.stage2LoadNamespaceEquivalences();
        } catch (EquivalenceMapResolutionFailure f) {
            // Unrecoverable error
            pr.addError(f.getUserFacingMessage());
            pr.setSuccess(false);
            return pr;
        }

        // Map namespace to lookup
        Map<String, JDBMEquivalenceLookup> lookups =
                sizedHashMap(equivs.size());
        for (final EquivalenceDataIndex edi : equivs) {
            String rl = edi.getNamespaceResourceLocation();
            DataFileIndex dfi = edi.getEquivalenceIndex();
            lookups.put(rl, new JDBMEquivalenceLookup(dfi.getIndexPath()));
        }

        // Open the indices
        for (final JDBMEquivalenceLookup jl : lookups.values()) {
            try {
                jl.open();
            } catch (IOException e) {
                pr.addError(e.getMessage());
                pr.setSuccess(false);
                return pr;
            }
        }

        // It's helpful to reference the contents of the gene scaffolding
        // BEL document in understanding this section.

        // Establish set of UUIDs relevant for the document's member parameters
        Set<SkinnyUUID> uuids = new HashSet<SkinnyUUID>();
        pr.setTotalStatements(genes.getNumberOfStatements());

        for (final Statement stmt : genes) {
            // Add all the parameters
            final List<Parameter> parameters = stmt.getAllParameters();
            for (final Parameter p : parameters) {
                if (!validParameter(p)) {
                    continue;
                }
                Namespace ns = p.getNamespace();
                String val = p.getValue();
                String rl = ns.getResourceLocation();

                JDBMEquivalenceLookup jdbmLookup = lookups.get(rl);
                if (jdbmLookup == null) {
                    continue;
                }

                SkinnyUUID uuid = jdbmLookup.lookup(val);
                if (uuid != null) {
                    uuids.add(uuid);
                }
            }
        }

        // Establish set of UUIDs relevant for the proto-network
        Set<SkinnyUUID> pnUUIDs = new HashSet<SkinnyUUID>();
        ParameterTable paramTbl = network.getParameterTable();
        TableParameter[] paramArr = paramTbl.getTableParameterArray();
        for (final TableParameter tp : paramArr) {
            if (!validParameter(tp)) {
                continue;
            }
            TableNamespace namespace = tp.getNamespace();
            String value = tp.getValue();
            String rl = namespace.getResourceLocation();
            JDBMEquivalenceLookup jdbmLookup = lookups.get(rl);
            if (jdbmLookup == null) {
                continue;
            }

            SkinnyUUID uuid = jdbmLookup.lookup(value);
            if (uuid != null) {
                pnUUIDs.add(uuid);
            }
        }

        // Two sets of UUIDs have been established at this point.
        // The first set, uuids, is the set based on the genes.
        // The second set, pnUUIDs, is the set based on the proto-network.

        // Make uuids the intersection of the two: (uuids ∩ pnUUIDs)
        uuids.retainAll(pnUUIDs);

        final Set<Statement> toPrune = new HashSet<Statement>();
        int pruned = 0;

        // Pruning is simply a matter of iterating all statements in genes...
        GENES: for (final Statement stmt : genes) {
            List<Parameter> parameters = stmt.getAllParameters();

            // ... and all parameters in each statement...
            for (final Parameter p : parameters) {
                if (!validParameter(p)) {
                    continue;
                }

                Namespace ns = p.getNamespace();
                String val = p.getValue();
                String rl = ns.getResourceLocation();

                // ... do we have set membership?
                JDBMEquivalenceLookup jdbmLookup = lookups.get(rl);
                if (jdbmLookup == null) {
                    continue;
                }
                SkinnyUUID uuid = jdbmLookup.lookup(val);
                // Check for set membership, uuid ∊ uuids
                if (uuids.contains(uuid)) {

                    // This indicates equivalent parameters are being
                    // used by both document and network. This
                    // statement will remain (i.e., not be pruned).
                    continue GENES;

                }
            }
            toPrune.add(stmt);
        }

        for (final StatementGroup sg : genes.getStatementGroups()) {
            List<Statement> curstmts = sg.getStatements();
            List<Statement> newstmts = sizedArrayList(curstmts.size());
            for (final Statement stmt : curstmts) {
                if (!toPrune.contains(stmt)) {
                    newstmts.add(stmt);
                } else {
                    pruned++;
                }
            }
            sg.setStatements(newstmts);
        }

        pr.setDeltaStatements(-(pruned));
        pr.setSuccess(true);

        for (final JDBMEquivalenceLookup jl : lookups.values()) {
            try {
                jl.close();
            } catch (IOException e) {
                pr.addWarning(e.getMessage());
            }
        }

        return pr;
    }

    /**
     * Finds the {@link SkinnyUUID} for every {@link Parameter parameter} in
     * the {@link ProtoNetwork proto network} that is also referenced in the
     * {@link Term object term} of each {@link Statement statement}.
     *
     * @param network {@link ProtoNetwork}, the proto network containing
     * {@link Parameter parameters} that we'll get {@link String uuids} for
     * @param lookups {@link Map} of {@link JDBMEquivalenceLookup} keyed by
     * resource location and assumed to be open / ready for lookups
     * @param stmts {@link List} of {@link Statement}, the statements
     * that contain object-term {@link Parameter parameters} that we'll get
     * {@link SkinnyUUID}s for
     * @return the {@link SkinnyUUID} intersection between the statement's
     * object term {@link Parameter parameters} and the proto network's
     * {@link Parameter parameters}
     */
    private Set<SkinnyUUID> findUUIDs(final ProtoNetwork network,
            Map<String, JDBMEquivalenceLookup> lookups,
            final List<Statement> stmts) {
        final Set<SkinnyUUID> uuids = new HashSet<SkinnyUUID>();

        for (final Statement stmt : stmts) {
            // Add all the object's parameters
            Object stmtObj = stmt.getObject();
            if (stmtObj == null || stmtObj.getTerm() == null) {
                continue;
            }
            for (final Parameter p : stmtObj.getTerm()) {
                if (!validParameter(p)) {
                    continue;
                }
                Namespace ns = p.getNamespace();
                String val = p.getValue();
                String rl = ns.getResourceLocation();

                JDBMEquivalenceLookup jdbmLookup = lookups.get(rl);
                if (jdbmLookup == null) {
                    continue;
                }

                SkinnyUUID uuid = jdbmLookup.lookup(val);
                if (uuid != null) {
                    uuids.add(uuid);
                }
            }
        }

        // Establish set of UUIDs relevant for the proto-network
        Set<SkinnyUUID> pnUUIDs = new HashSet<SkinnyUUID>();
        ParameterTable paramTbl = network.getParameterTable();
        TableParameter[] paramArr = paramTbl.getTableParameterArray();
        for (final TableParameter tp : paramArr) {
            if (!validParameter(tp)) {
                continue;
            }
            TableNamespace namespace = tp.getNamespace();
            String value = tp.getValue();
            String rl = namespace.getResourceLocation();
            JDBMEquivalenceLookup jdbmLookup = lookups.get(rl);
            if (jdbmLookup == null) {
                continue;
            }

            SkinnyUUID uuid = jdbmLookup.lookup(value);
            if (uuid != null) {
                pnUUIDs.add(uuid);
            }
        }

        // Two sets of UUIDs have been established at this point.
        // The first set, uuids, is the set based on the statements.
        // The second set, pnUUIDs, is the set based on the proto-network.
        // Make uuids the intersection of the two: (uuids ∩ pnUUIDs)
        uuids.retainAll(pnUUIDs);

        return uuids;
    }

    private Set<SkinnyUUID> findUUIDs(final Set<TableEntry> entries,
            Map<String, JDBMEquivalenceLookup> lookups) {
        final Set<SkinnyUUID> uuids = new HashSet<SkinnyUUID>();

        for (final TableEntry e : entries) {
            String val = e.value;
            String rl = e.resourceLocation;

            JDBMEquivalenceLookup jdbmLookup = lookups.get(rl);
            if (jdbmLookup == null) {
                continue;
            }

            SkinnyUUID uuid = jdbmLookup.lookup(val);
            if (uuid != null) {
                uuids.add(uuid);
            }
        }

        return uuids;
    }

    /**
     * Aggregate the set of all table entries for a proto-network that match at
     * least one of the provided term literals.
     *
     * @param literals Literals, e.g., {@code proteinAbundance(#)}
     * @return Table entry set
     */
    private Set<TableEntry> search(final ProtoNetwork pn, String... literals) {
        Set<TableEntry> ret;
        if (noItems(literals)) {
            return emptySet();
        }

        // An initial capacity would be ideal for this set, but we don't
        // have any reasonable estimations.
        ret = new HashSet<TableEntry>();

        // Establish access to the relevant table objects
        final ParameterTable paramTbl = pn.getParameterTable();
        final TermTable termTbl = pn.getTermTable();
        final List<String> terms = termTbl.getTermValues();
        final NamespaceTable nsTbl = pn.getNamespaceTable();

        final int[][] indata = pn.getTermIndices();

        final Set<String> literalSet = constrainedHashSet(literals.length);
        for (final String s : literals) {
            literalSet.add(s);
        }

        // Search indata for elements of the term literal set
        for (int i = 0; i < indata.length; i++) {
            int tid = indata[i][TERM_INDEX];
            String string = terms.get(tid);
            if (!literalSet.contains(string)) {
                continue;
            }

            int pid = indata[i][PARAM_INDEX];
            final TableParameter tp = paramTbl.getTableParameter(pid);

            final String inval = tp.getValue();
            int nid = indata[i][NAMESPACE_INDEX];

            final TableNamespace tns = nsTbl.getTableNamespace(nid);
            String inrl = null;
            if (tns != null) inrl = tns.getResourceLocation();

            final TableEntry te = new TableEntry(inval, inrl);
            ret.add(te);
        }

        return ret;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public DocumentModificationResult pruneOrthologyDocument(final Document d,
            final ProtoNetwork pn) {

        DocumentModificationResult result = new DocumentModificationResult();

        // Load the equivalences
        Set<EquivalenceDataIndex> equivs;
        try {
            equivs = p2.stage2LoadNamespaceEquivalences();
        } catch (EquivalenceMapResolutionFailure f) {
            // Unrecoverable error
            result.addError(f.getUserFacingMessage());
            result.setSuccess(false);
            return result;
        }

        // Map namespace to lookup
        Map<String, JDBMEquivalenceLookup> lookups =
                sizedHashMap(equivs.size());
        for (final EquivalenceDataIndex edi : equivs) {
            String rl = edi.getNamespaceResourceLocation();
            DataFileIndex dfi = edi.getEquivalenceIndex();
            lookups.put(rl, new JDBMEquivalenceLookup(dfi.getIndexPath()));
        }

        // Open the indices
        for (final JDBMEquivalenceLookup jl : lookups.values()) {
            try {
                jl.open();
            } catch (IOException e) {
                result.addError(e.getMessage());
                result.setSuccess(false);
                return result;
            }
        }

        // Approach
        // 1.   map UUID to list of terms for proto network
        // 2.   iterate ortho statements
        //   a.   if valid orthologous statement
        //      -   lookup exact match of subject term in proto network, if found
        //          continue (orthologous statement intersects the proto network)
        //      -   find subject parameter's uuid in a proto network term, if found
        //          continue (orthologous statement intersects the proto network)
        //      -   lookup exact match of object term in proto network, if found
        //          continue (orthologous statement intersects the proto network)
        //      -   find object parameter's uuid in a proto network term, if found
        //          continue (orthologous statement intersects the proto network)

        // 1. Map parameter UUIDs to containing term ids in the proto network
        final TermTable tt = pn.getTermTable();
        final TermParameterMapTable tpmt = pn.getTermParameterMapTable();
        final ParameterTable pt = pn.getParameterTable();
        final Map<Integer, Integer> pglob = pt.getGlobalIndex();
        final Map<Integer, SkinnyUUID> puuid = pt.getGlobalUUIDs();
        final Set<Integer> tidset = tt.getIndexedTerms().keySet();
        final Map<SkinnyUUID, Set<Integer>> uuidterms = sizedHashMap(puuid
                .size());
        final Set<Term> pnterms = new HashSet<Term>(tt.getVisitedTerms().keySet());
        // for each term
        for (final Integer tid : tidset) {
            // get its parameters
            final List<Integer> pids = tpmt.getParameterIndexes(tid);
            for (final Integer pid : pids) {
                // find global parameter index for pid
                Integer globalpid = pglob.get(pid);

                // find UUID for global parameter index
                final SkinnyUUID pu = puuid.get(globalpid);

                // save this term to this UUID
                Set<Integer> terms = uuidterms.get(pu);
                if (terms == null) {
                    terms = new HashSet<Integer>();
                    uuidterms.put(pu, terms);
                }
                terms.add(tid);
            }
        }

        // get all statement in orthology document
        final List<Statement> orthoStmts = d.getAllStatements();
        // map them to statement groups for efficient pruning
        Map<StatementGroup, Set<Statement>> orthomap = d.mapStatements();

        // set up pruning result
        int total = orthoStmts.size();
        int pruned = 0;

        // establish cache to skinny uuids to avoid superfluous jdbm lookups
        final Map<Parameter, SkinnyUUID> paramcache = sizedHashMap(total * 2);

        // iterate all statements in the orthology document
        ORTHO_STATEMENT: for (final Statement orthoStmt : orthoStmts) {

            // rule out invalid or non-orthologous statements
            if (validOrthologousStatement(orthoStmt)) {
                // break down subject
                final Term sub = orthoStmt.getSubject();
                final FunctionEnum subf = sub.getFunctionEnum();
                final List<Parameter> subp = sub.getParameters();
                final Parameter subjectParam = subp.get(0);

                // break down object
                final Term obj = orthoStmt.getObject().getTerm();
                final FunctionEnum objf = obj.getFunctionEnum();
                final List<Parameter> objp = obj.getParameters();
                final Parameter objectParam = objp.get(0);

                // lookup exact match of subject term
                if (pnterms.contains(sub)) {
                    pnterms.add(obj);

                    continue;
                }

                // find UUID for subject parameter
                SkinnyUUID uuid = paramcache.get(subjectParam);
                if (uuid == null) {
                    final Namespace ns = subjectParam.getNamespace();
                    final JDBMEquivalenceLookup lookup = lookups.get(ns
                            .getResourceLocation());
                    if (lookup == null) {
                        continue;
                    }

                    uuid = lookup.lookup(subjectParam.getValue());
                    paramcache.put(subjectParam, uuid);
                }

                // if there is a proto network term with this UUID contained, then
                // this orthologous statement intersects the proto network, continue
                if (uuid != null) {
                    Set<Integer> tids = uuidterms.get(uuid);
                    if (hasItems(tids)) {
                        for (final Integer tid : tids) {
                            final Term t = tt.getIndexedTerms().get(tid);
                            if (t.getFunctionEnum() == subf) {
                                pnterms.add(sub);
                                pnterms.add(t);
                                pnterms.add(obj);

                                continue ORTHO_STATEMENT;
                            }
                        }
                    }
                }

                // lookup exact match of object term
                if (pnterms.contains(obj)) {
                    pnterms.add(sub);
                    continue;
                }

                // find UUID for object parameter
                uuid = paramcache.get(objectParam);
                if (uuid == null) {
                    final Namespace ns = objectParam.getNamespace();
                    final JDBMEquivalenceLookup lookup = lookups.get(ns
                            .getResourceLocation());
                    if (lookup == null) {
                        continue;
                    }

                    uuid = lookup.lookup(objectParam.getValue());
                    paramcache.put(objectParam, uuid);
                }

                // if there is a proto network term with this UUID contained, then
                // this orthologous statement intersects the proto network, continue
                if (uuid != null) {
                    Set<Integer> tids = uuidterms.get(uuid);
                    if (hasItems(tids)) {
                        for (final Integer tid : tids) {
                            final Term t = tt.getIndexedTerms().get(tid);
                            if (t.getFunctionEnum() == objf) {
                                pnterms.add(obj);
                                pnterms.add(t);
                                pnterms.add(sub);

                                continue ORTHO_STATEMENT;
                            }
                        }
                    }
                }

                // proto network does not contain either equivalent term so prune
                // from its parent statement group
                Set<Entry<StatementGroup, Set<Statement>>> entries = orthomap
                        .entrySet();
                for (final Entry<StatementGroup, Set<Statement>> e : entries) {
                    StatementGroup group = e.getKey();
                    Set<Statement> stmts = e.getValue();
                    if (stmts.contains(orthoStmt)) {
                        group.getStatements().remove(orthoStmt);
                        pruned++;
                        break;
                    }
                }
            }
        }

        // close equivalences
        for (final JDBMEquivalenceLookup jl : lookups.values()) {
            try {
                jl.close();
            } catch (IOException e) {
                result.addWarning(e.getMessage());
            }
        }

        // set result data
        result.setDeltaStatements(-pruned);
        result.setTotalStatements(total);
        result.setSuccess(true);

        return result;
    }

    /**
     * Determines whether the {@link Statement statement} is a valid
     * {@link RelationshipType#ORTHOLOGOUS orthologous} statement.
     *
     * @param stmt {@link Statement} to evaluate
     * @return {@code true} if it is valid, {@code false} otherwise
     */
    private boolean validOrthologousStatement(final Statement stmt) {
        // test statement is well-formed
        if (stmt.getRelationshipType() == RelationshipType.ORTHOLOGOUS
                && stmt.getObject() != null
                && stmt.getObject().getTerm() != null) {

            // test subject has only one namespace parameter
            final Term subject = stmt.getSubject();
            List<Parameter> subparams = subject.getParameters();
            if (subparams == null || subparams.size() != 1
                    || subparams.get(0).getNamespace() == null) {
                return false;
            }

            // test object has only one namespace parameter
            final Term object = stmt.getObject().getTerm();
            List<Parameter> objparams = object.getParameters();
            if (objparams == null || objparams.size() != 1
                    || objparams.get(0).getNamespace() == null) {
                return false;
            }

            return true;
        }

        return false;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public ProtoNetwork compile(final Document d) {
        return protoNetworkService.compile(d);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void merge(
            final ProtoNetwork pnDestination,
            final ProtoNetwork pndSource)
            throws ProtoNetworkError {

        protoNetworkService.merge(pnDestination, pndSource);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public ProtoNetworkDescriptor write(final String path,
            final ProtoNetwork network) throws ProtoNetworkError {
        return protoNetworkService.write(path, network);
    }

    /**
     * Returns true if the parameter has a namespace and value, false if not.
     *
     * @param p Parameter
     */
    private static boolean validParameter(final Parameter p) {
        if (p.getNamespace() == null) {
            return false;
        }
        if (p.getValue() == null) {
            return false;
        }
        return true;
    }

    /**
     * Returns true if the table parameter has a namespace and value, false if
     * not.
     *
     * @param tp Table parameter
     */
    private static boolean validParameter(final TableParameter tp) {
        if (tp.getNamespace() == null) {
            return false;
        }
        if (tp.getValue() == null) {
            return false;
        }
        return true;
    }

    /**
     * Table entry.
     */
    private static class TableEntry {
        /** Parameter value. */
        private final String value;
        /** Namespace containing this parameter value. */
        private final String resourceLocation;
        private final int hash;

        /**
         * Creates a table entry; establishes a hash code.
         *
         * @param val String value
         * @param rl String resource location
         */
        private TableEntry(final String val, final String rl) {
            this.value = val;
            this.resourceLocation = rl;

            int prime = 31;
            int result = prime;

            result *= prime;
            result += value.hashCode();

            if (rl != null) {
                result *= prime;
                result += rl.hashCode();
            }

            hash = result;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public int hashCode() {
            return hash;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public boolean equals(final java.lang.Object o) {
            if (this == o) {
                return true;
            }
            if (!(o instanceof TableEntry)) {
                return false;
            }

            final TableEntry te = (TableEntry) o;
            if (!value.equals(te.value)) {
                return false;
            }

            if (resourceLocation == null) {
                if (te.resourceLocation != null) {
                    return false;
                }
            } else if (!resourceLocation.equals(te.resourceLocation)) {
                return false;
            }

            return true;
        }
    }
}
TOP

Related Classes of org.openbel.framework.compiler.PhaseThreeImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.