Package org.openbel.framework.core.indexer

Examples of org.openbel.framework.core.indexer.JDBMEquivalenceLookup


    }

    @Test
    public void testLookupSuccess() {
        try {
            JDBMEquivalenceLookup jdbmLookup =
                    new JDBMEquivalenceLookup(indexPath.getAbsolutePath());
            jdbmLookup.open();

            for (int i = 0; i < numToTest; i++) {
                SkinnyUUID uuid = uuids.get(i);
                assertEquals("Failed for " + i, uuid,
                        jdbmLookup.lookup(String.valueOf(i)));
            }

            jdbmLookup.close();
        } catch (Exception e) {
            e.printStackTrace();
            fail(e.getMessage());
        }
    }
View Full Code Here


        Map<String, JDBMEquivalenceLookup> lookups =
                sizedHashMap(equivs.size());
        for (final EquivalenceDataIndex edi : equivs) {
            String rl = edi.getNamespaceResourceLocation();
            DataFileIndex dfi = edi.getEquivalenceIndex();
            lookups.put(rl, new JDBMEquivalenceLookup(dfi.getIndexPath()));
        }

        // Open the indices
        for (final JDBMEquivalenceLookup jl : lookups.values()) {
            try {
                jl.open();
            } catch (IOException e) {
                pr.addError(e.getMessage());
                pr.setSuccess(false);
                return pr;
            }
        }

        // It's helpful to reference the contents of the protein families
        // BEL document in understanding this section.

        // Establish set of UUIDs relevant for the document's member parameters
        // (this only matters if the expand flag is set)
        Set<SkinnyUUID> uuids = new HashSet<SkinnyUUID>();
        final List<Statement> familyStmts = families.getAllStatements();
        if (expand) {
            uuids = findUUIDs(network, lookups, familyStmts);
        }
        boolean haveUUIDs = !uuids.isEmpty();

        int pruned = 0, total = familyStmts.size();
        List<Parameter> parameters;

        // Pruning is simply a matter of iterating all statements in families...
        FAMILIES: for (final Statement stmt : familyStmts) {

            // ... all protein family parameters
            parameters = stmt.getSubject().getAllParameters();
            for (final Parameter p : parameters) {
                if (!validParameter(p)) {
                    continue;
                }
                Namespace ns = p.getNamespace();
                String val = p.getValue();
                String rl = ns.getResourceLocation();

                // ... do we have the family?
                if (networkLiterals.contains(new TableEntry(val, rl))) {
                    continue FAMILIES;
                }
            }

            Object stmtObj = stmt.getObject();
            if (haveUUIDs && stmtObj != null && stmtObj.getTerm() != null) {
                // ... all family members
                parameters = stmtObj.getTerm().getAllParameters();
                for (final Parameter p : parameters) {
                    if (!validParameter(p)) {
                        continue;
                    }
                    Namespace ns = p.getNamespace();
                    String val = p.getValue();
                    String rl = ns.getResourceLocation();

                    // ... do we have set membership?
                    JDBMEquivalenceLookup jdbmLookup = lookups.get(rl);
                    if (jdbmLookup != null) {
                        SkinnyUUID uuid = jdbmLookup.lookup(val);
                        // Check for set membership, uuid ∊ uuids
                        if (uuids.contains(uuid)) {

                            // This indicates equivalent parameters are
                            // used by both document and network. This
View Full Code Here

        Map<String, JDBMEquivalenceLookup> lookups =
                sizedHashMap(equivs.size());
        for (final EquivalenceDataIndex edi : equivs) {
            String rl = edi.getNamespaceResourceLocation();
            DataFileIndex dfi = edi.getEquivalenceIndex();
            lookups.put(rl, new JDBMEquivalenceLookup(dfi.getIndexPath()));
        }

        // Open the indices
        for (final JDBMEquivalenceLookup jl : lookups.values()) {
            try {
                jl.open();
            } catch (IOException e) {
                pr.addError(e.getMessage());
                pr.setSuccess(false);
                return pr;
            }
        }

        // get all statements for protein family document
        final List<Statement> stmts = d.getAllStatements();

        // the number of inferred statements
        int inferred = 0;

        // find molecular activity functions referencing protein family param
        TableEntry entry;
        // iterate all activity functions of a proteinAbundance
        final List<Statement> isaStmts = new ArrayList<Statement>();
        final Set<FunctionEnum> actFunctions = FunctionEnum.getActivities();
        for (final FunctionEnum f : actFunctions) {
            final String actShort = f.getShortestForm().concat("(")
                    .concat(PF_SHORT).concat(")");
            final String actLong = f.getDisplayValue().concat("(")
                    .concat(PF_LONG).concat(")");

            // find parameters for this activity function in the proto network
            final Set<TableEntry> entries = search(p, actShort, actLong);

            // if no results for activity function, continue to next one
            if (entries.isEmpty()) {
                continue;
            }

            Set<SkinnyUUID> eu = null;

            // entries found, iterate protein family statements
            for (final Statement stmt : stmts) {
                // inspect protein family parameter
                final List<Parameter> sp = stmt.getSubject().getAllParameters();
                for (final Parameter fp : sp) {
                    if (!validParameter(fp)) {
                        continue;
                    }
                    Namespace ns = fp.getNamespace();
                    String val = fp.getValue();
                    String rl = ns.getResourceLocation();

                    // have we seen a molecular activity function for this
                    // protein family parameter
                    entry = new TableEntry(val, rl);
                    if (entries.contains(entry)) {
                        // construct family term once, in case we find matches
                        final Term fpt = new Term(PROTEIN_ABUNDANCE);
                        fpt.addFunctionArgument(new Parameter(ns, val));
                        final Term fat = new Term(f);
                        fat.addFunctionArgument(fpt);

                        // since we found a molecular activity function for a
                        // protein family look up UUIDs for all entries
                        // ... loading uuids on demand to save resources
                        if (eu == null) {
                            eu = findUUIDs(entries, lookups);
                        }

                        // search protein family member parameters for same
                        // molecular activity function
                        final List<Parameter> op = stmt.getObject().getTerm()
                                .getAllParameters();
                        for (final Parameter mp : op) {
                            Namespace mns = mp.getNamespace();
                            String mval = mp.getValue();
                            String mrl = mns.getResourceLocation();

                            // lookup UUID for protein family member
                            JDBMEquivalenceLookup jdbmLookup = lookups.get(mrl);
                            if (jdbmLookup == null) {
                                continue;
                            }
                            SkinnyUUID uuid = jdbmLookup.lookup(mval);

                            // do we have protein family member parameter uuid
                            // in molecular activity function entries?
                            if (eu.contains(uuid)) {
                                // construct member term
View Full Code Here

        Map<String, JDBMEquivalenceLookup> lookups =
                sizedHashMap(equivs.size());
        for (final EquivalenceDataIndex edi : equivs) {
            String rl = edi.getNamespaceResourceLocation();
            DataFileIndex dfi = edi.getEquivalenceIndex();
            lookups.put(rl, new JDBMEquivalenceLookup(dfi.getIndexPath()));
        }

        // Open the indices
        for (final JDBMEquivalenceLookup jl : lookups.values()) {
            try {
                jl.open();
            } catch (IOException e) {
                pr.addError(e.getMessage());
                pr.setSuccess(false);
                return pr;
            }
        }

        // It's helpful to reference the contents of the protein families
        // BEL document in understanding this section.

        // Establish set of UUIDs relevant for the document's member parameters
        // (this only matters if the expand flag is set)
        Set<SkinnyUUID> uuids = new HashSet<SkinnyUUID>();
        final List<Statement> complexStmts = complexes.getAllStatements();
        if (expand) {
            uuids = findUUIDs(network, lookups, complexStmts);
        }
        boolean haveUUIDs = !uuids.isEmpty();

        int pruned = 0, total = complexStmts.size();
        List<Parameter> parameters;

        // Pruning is simply a matter of iterating all statements in document...
        COMPLEXES: for (final Statement stmt : complexStmts) {

            // ... all named complex parameters
            parameters = stmt.getSubject().getAllParameters();
            for (final Parameter p : parameters) {
                if (!validParameter(p)) {
                    continue;
                }
                Namespace ns = p.getNamespace();
                String val = p.getValue();
                String rl = ns.getResourceLocation();

                // ... do we have the complex?
                if (networkLiterals.contains(new TableEntry(val, rl))) {
                    continue COMPLEXES;
                }
            }

            Object stmtObj = stmt.getObject();
            if (haveUUIDs && stmtObj != null && stmtObj.getTerm() != null) {
                // ... all complex components
                parameters = stmtObj.getTerm().getAllParameters();
                for (final Parameter p : parameters) {
                    if (!validParameter(p)) {
                        continue;
                    }
                    Namespace ns = p.getNamespace();
                    String val = p.getValue();
                    String rl = ns.getResourceLocation();

                    // ... do we have set membership?
                    JDBMEquivalenceLookup jdbmLookup = lookups.get(rl);
                    if (jdbmLookup != null) {
                        SkinnyUUID uuid = jdbmLookup.lookup(val);
                        // Check for set membership, uuid ∊ uuids
                        if (uuids.contains(uuid)) {

                            // This indicates equivalent parameters are being
                            // used by both document and network. This
View Full Code Here

        Map<String, JDBMEquivalenceLookup> lookups =
                sizedHashMap(equivs.size());
        for (final EquivalenceDataIndex edi : equivs) {
            String rl = edi.getNamespaceResourceLocation();
            DataFileIndex dfi = edi.getEquivalenceIndex();
            lookups.put(rl, new JDBMEquivalenceLookup(dfi.getIndexPath()));
        }

        // Open the indices
        for (final JDBMEquivalenceLookup jl : lookups.values()) {
            try {
                jl.open();
            } catch (IOException e) {
                pr.addError(e.getMessage());
                pr.setSuccess(false);
                return pr;
            }
        }

        // It's helpful to reference the contents of the gene scaffolding
        // BEL document in understanding this section.

        // Establish set of UUIDs relevant for the document's member parameters
        Set<SkinnyUUID> uuids = new HashSet<SkinnyUUID>();
        pr.setTotalStatements(genes.getNumberOfStatements());

        for (final Statement stmt : genes) {
            // Add all the parameters
            final List<Parameter> parameters = stmt.getAllParameters();
            for (final Parameter p : parameters) {
                if (!validParameter(p)) {
                    continue;
                }
                Namespace ns = p.getNamespace();
                String val = p.getValue();
                String rl = ns.getResourceLocation();

                JDBMEquivalenceLookup jdbmLookup = lookups.get(rl);
                if (jdbmLookup == null) {
                    continue;
                }

                SkinnyUUID uuid = jdbmLookup.lookup(val);
                if (uuid != null) {
                    uuids.add(uuid);
                }
            }
        }

        // Establish set of UUIDs relevant for the proto-network
        Set<SkinnyUUID> pnUUIDs = new HashSet<SkinnyUUID>();
        ParameterTable paramTbl = network.getParameterTable();
        TableParameter[] paramArr = paramTbl.getTableParameterArray();
        for (final TableParameter tp : paramArr) {
            if (!validParameter(tp)) {
                continue;
            }
            TableNamespace namespace = tp.getNamespace();
            String value = tp.getValue();
            String rl = namespace.getResourceLocation();
            JDBMEquivalenceLookup jdbmLookup = lookups.get(rl);
            if (jdbmLookup == null) {
                continue;
            }

            SkinnyUUID uuid = jdbmLookup.lookup(value);
            if (uuid != null) {
                pnUUIDs.add(uuid);
            }
        }

        // Two sets of UUIDs have been established at this point.
        // The first set, uuids, is the set based on the genes.
        // The second set, pnUUIDs, is the set based on the proto-network.

        // Make uuids the intersection of the two: (uuids ∩ pnUUIDs)
        uuids.retainAll(pnUUIDs);

        final Set<Statement> toPrune = new HashSet<Statement>();
        int pruned = 0;

        // Pruning is simply a matter of iterating all statements in genes...
        GENES: for (final Statement stmt : genes) {
            List<Parameter> parameters = stmt.getAllParameters();

            // ... and all parameters in each statement...
            for (final Parameter p : parameters) {
                if (!validParameter(p)) {
                    continue;
                }

                Namespace ns = p.getNamespace();
                String val = p.getValue();
                String rl = ns.getResourceLocation();

                // ... do we have set membership?
                JDBMEquivalenceLookup jdbmLookup = lookups.get(rl);
                if (jdbmLookup == null) {
                    continue;
                }
                SkinnyUUID uuid = jdbmLookup.lookup(val);
                // Check for set membership, uuid ∊ uuids
                if (uuids.contains(uuid)) {

                    // This indicates equivalent parameters are being
                    // used by both document and network. This
View Full Code Here

                }
                Namespace ns = p.getNamespace();
                String val = p.getValue();
                String rl = ns.getResourceLocation();

                JDBMEquivalenceLookup jdbmLookup = lookups.get(rl);
                if (jdbmLookup == null) {
                    continue;
                }

                SkinnyUUID uuid = jdbmLookup.lookup(val);
                if (uuid != null) {
                    uuids.add(uuid);
                }
            }
        }

        // Establish set of UUIDs relevant for the proto-network
        Set<SkinnyUUID> pnUUIDs = new HashSet<SkinnyUUID>();
        ParameterTable paramTbl = network.getParameterTable();
        TableParameter[] paramArr = paramTbl.getTableParameterArray();
        for (final TableParameter tp : paramArr) {
            if (!validParameter(tp)) {
                continue;
            }
            TableNamespace namespace = tp.getNamespace();
            String value = tp.getValue();
            String rl = namespace.getResourceLocation();
            JDBMEquivalenceLookup jdbmLookup = lookups.get(rl);
            if (jdbmLookup == null) {
                continue;
            }

            SkinnyUUID uuid = jdbmLookup.lookup(value);
            if (uuid != null) {
                pnUUIDs.add(uuid);
            }
        }
View Full Code Here

        for (final TableEntry e : entries) {
            String val = e.value;
            String rl = e.resourceLocation;

            JDBMEquivalenceLookup jdbmLookup = lookups.get(rl);
            if (jdbmLookup == null) {
                continue;
            }

            SkinnyUUID uuid = jdbmLookup.lookup(val);
            if (uuid != null) {
                uuids.add(uuid);
            }
        }
View Full Code Here

        Map<String, JDBMEquivalenceLookup> lookups =
                sizedHashMap(equivs.size());
        for (final EquivalenceDataIndex edi : equivs) {
            String rl = edi.getNamespaceResourceLocation();
            DataFileIndex dfi = edi.getEquivalenceIndex();
            lookups.put(rl, new JDBMEquivalenceLookup(dfi.getIndexPath()));
        }

        // Open the indices
        for (final JDBMEquivalenceLookup jl : lookups.values()) {
            try {
                jl.open();
            } catch (IOException e) {
                result.addError(e.getMessage());
                result.setSuccess(false);
                return result;
            }
        }

        // Approach
        // 1.   map UUID to list of terms for proto network
        // 2.   iterate ortho statements
        //   a.   if valid orthologous statement
        //      -   lookup exact match of subject term in proto network, if found
        //          continue (orthologous statement intersects the proto network)
        //      -   find subject parameter's uuid in a proto network term, if found
        //          continue (orthologous statement intersects the proto network)
        //      -   lookup exact match of object term in proto network, if found
        //          continue (orthologous statement intersects the proto network)
        //      -   find object parameter's uuid in a proto network term, if found
        //          continue (orthologous statement intersects the proto network)

        // 1. Map parameter UUIDs to containing term ids in the proto network
        final TermTable tt = pn.getTermTable();
        final TermParameterMapTable tpmt = pn.getTermParameterMapTable();
        final ParameterTable pt = pn.getParameterTable();
        final Map<Integer, Integer> pglob = pt.getGlobalIndex();
        final Map<Integer, SkinnyUUID> puuid = pt.getGlobalUUIDs();
        final Set<Integer> tidset = tt.getIndexedTerms().keySet();
        final Map<SkinnyUUID, Set<Integer>> uuidterms = sizedHashMap(puuid
                .size());
        final Set<Term> pnterms = new HashSet<Term>(tt.getVisitedTerms().keySet());
        // for each term
        for (final Integer tid : tidset) {
            // get its parameters
            final List<Integer> pids = tpmt.getParameterIndexes(tid);
            for (final Integer pid : pids) {
                // find global parameter index for pid
                Integer globalpid = pglob.get(pid);

                // find UUID for global parameter index
                final SkinnyUUID pu = puuid.get(globalpid);

                // save this term to this UUID
                Set<Integer> terms = uuidterms.get(pu);
                if (terms == null) {
                    terms = new HashSet<Integer>();
                    uuidterms.put(pu, terms);
                }
                terms.add(tid);
            }
        }

        // get all statement in orthology document
        final List<Statement> orthoStmts = d.getAllStatements();
        // map them to statement groups for efficient pruning
        Map<StatementGroup, Set<Statement>> orthomap = d.mapStatements();

        // set up pruning result
        int total = orthoStmts.size();
        int pruned = 0;

        // establish cache to skinny uuids to avoid superfluous jdbm lookups
        final Map<Parameter, SkinnyUUID> paramcache = sizedHashMap(total * 2);

        // iterate all statements in the orthology document
        ORTHO_STATEMENT: for (final Statement orthoStmt : orthoStmts) {

            // rule out invalid or non-orthologous statements
            if (validOrthologousStatement(orthoStmt)) {
                // break down subject
                final Term sub = orthoStmt.getSubject();
                final FunctionEnum subf = sub.getFunctionEnum();
                final List<Parameter> subp = sub.getParameters();
                final Parameter subjectParam = subp.get(0);

                // break down object
                final Term obj = orthoStmt.getObject().getTerm();
                final FunctionEnum objf = obj.getFunctionEnum();
                final List<Parameter> objp = obj.getParameters();
                final Parameter objectParam = objp.get(0);

                // lookup exact match of subject term
                if (pnterms.contains(sub)) {
                    pnterms.add(obj);

                    continue;
                }

                // find UUID for subject parameter
                SkinnyUUID uuid = paramcache.get(subjectParam);
                if (uuid == null) {
                    final Namespace ns = subjectParam.getNamespace();
                    final JDBMEquivalenceLookup lookup = lookups.get(ns
                            .getResourceLocation());
                    if (lookup == null) {
                        continue;
                    }

                    uuid = lookup.lookup(subjectParam.getValue());
                    paramcache.put(subjectParam, uuid);
                }

                // if there is a proto network term with this UUID contained, then
                // this orthologous statement intersects the proto network, continue
                if (uuid != null) {
                    Set<Integer> tids = uuidterms.get(uuid);
                    if (hasItems(tids)) {
                        for (final Integer tid : tids) {
                            final Term t = tt.getIndexedTerms().get(tid);
                            if (t.getFunctionEnum() == subf) {
                                pnterms.add(sub);
                                pnterms.add(t);
                                pnterms.add(obj);

                                continue ORTHO_STATEMENT;
                            }
                        }
                    }
                }

                // lookup exact match of object term
                if (pnterms.contains(obj)) {
                    pnterms.add(sub);
                    continue;
                }

                // find UUID for object parameter
                uuid = paramcache.get(objectParam);
                if (uuid == null) {
                    final Namespace ns = objectParam.getNamespace();
                    final JDBMEquivalenceLookup lookup = lookups.get(ns
                            .getResourceLocation());
                    if (lookup == null) {
                        continue;
                    }

                    uuid = lookup.lookup(objectParam.getValue());
                    paramcache.put(objectParam, uuid);
                }

                // if there is a proto network term with this UUID contained, then
                // this orthologous statement intersects the proto network, continue
View Full Code Here

TOP

Related Classes of org.openbel.framework.core.indexer.JDBMEquivalenceLookup

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.