Package org.sleuthkit.autopsy.keywordsearch

Source Code of org.sleuthkit.autopsy.keywordsearch.TermComponentQuery

/*
* Autopsy Forensic Browser
*
* Copyright 2011-2014 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
package org.sleuthkit.autopsy.keywordsearch;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import org.sleuthkit.autopsy.coreutils.Logger;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.response.TermsResponse;
import org.apache.solr.client.solrj.response.TermsResponse.Term;
import org.sleuthkit.autopsy.coreutils.Version;
import org.sleuthkit.datamodel.AbstractFile;
import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
import org.sleuthkit.datamodel.TskException;

/**
* Performs a regular expression query to the SOLR/Lucene instance.
*/
class TermComponentQuery implements KeywordSearchQuery {

    private static final int TERMS_UNLIMITED = -1;
    //corresponds to field in Solr schema, analyzed with white-space tokenizer only
    private static final String TERMS_SEARCH_FIELD = Server.Schema.CONTENT_WS.toString();
    private static final String TERMS_HANDLER = "/terms"; //NON-NLS
    private static final int TERMS_TIMEOUT = 90 * 1000; //in ms
    private static final Logger logger = Logger.getLogger(TermComponentQuery.class.getName());
    private String queryEscaped;
    private final KeywordList keywordList;
    private final Keyword keyword;
    private boolean isEscaped;
    private List<Term> terms;
    private final List<KeywordQueryFilter> filters = new ArrayList<>();
    private String field;
    private static final int MAX_TERMS_RESULTS = 20000;
   
    private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);

    public TermComponentQuery(KeywordList keywordList, Keyword keyword) {
        this.field = null;
        this.keyword = keyword;
        this.keywordList = keywordList;
        this.queryEscaped = keyword.getQuery();
        isEscaped = false;
        terms = null;
    }

    @Override
    public void addFilter(KeywordQueryFilter filter) {
        this.filters.add(filter);
    }

    @Override
    public void setField(String field) {
        this.field = field;
    }

    @Override
    public void setSubstringQuery() {
        queryEscaped = ".*" + queryEscaped + ".*";
    }
   
    @Override
    public void escape() {
        queryEscaped = Pattern.quote(keyword.getQuery());
        isEscaped = true;
    }

    @Override
    public boolean validate() {
        if (queryEscaped.equals("")) {
            return false;
        }

        boolean valid = true;
        try {
            Pattern.compile(queryEscaped);
        } catch (PatternSyntaxException ex1) {
            valid = false;
        } catch (IllegalArgumentException ex2) {
            valid = false;
        }
        return valid;
    }

    @Override
    public boolean isEscaped() {
        return isEscaped;
    }

    @Override
    public boolean isLiteral() {
        return false;
    }

    /*
     * helper method to create a Solr terms component query
     */
    protected SolrQuery createQuery() {
        final SolrQuery q = new SolrQuery();
        q.setRequestHandler(TERMS_HANDLER);
        q.setTerms(true);
        q.setTermsLimit(TERMS_UNLIMITED);
        q.setTermsRegexFlag("case_insensitive"); //NON-NLS
        //q.setTermsLimit(200);
        //q.setTermsRegexFlag(regexFlag);
        //q.setTermsRaw(true);
        q.setTermsRegex(queryEscaped);
        q.addTermsField(TERMS_SEARCH_FIELD);
        q.setTimeAllowed(TERMS_TIMEOUT);

        return q;

    }

    /*
     * execute query and return terms, helper method
     */
    protected List<Term> executeQuery(SolrQuery q) throws NoOpenCoreException {
        try {
            Server solrServer = KeywordSearch.getServer();
            TermsResponse tr = solrServer.queryTerms(q);
            List<Term> termsCol = tr.getTerms(TERMS_SEARCH_FIELD);
            return termsCol;
        } catch (KeywordSearchModuleException ex) {
            logger.log(Level.WARNING, "Error executing the regex terms query: " + keyword.getQuery(), ex); //NON-NLS
            return null//no need to create result view, just display error dialog
        }
    }

    @Override
    public String getEscapedQueryString() {
        return this.queryEscaped;
    }

    @Override
    public String getQueryString() {
        return keyword.getQuery();
    }

   

    @Override
    public KeywordCachedArtifact writeSingleFileHitsToBlackBoard(String termHit, AbstractFile newFsHit, String snippet, String listName) {
        final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();

        //there is match actually in this file, create artifact only then
        BlackboardArtifact bba;
        KeywordCachedArtifact writeResult;
        Collection<BlackboardAttribute> attributes = new ArrayList<>();
        try {
            bba = newFsHit.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
            writeResult = new KeywordCachedArtifact(bba);
        } catch (Exception e) {
            logger.log(Level.WARNING, "Error adding bb artifact for keyword hit", e); //NON-NLS
            return null;
        }

        //regex match
        attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, termHit));
       
        if ((listName != null) && (listName.equals("") == false)) {
            attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME.getTypeID(), MODULE_NAME, listName));
        }
       
        //preview
        if (snippet != null) {
            attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, snippet));
        }
        //regex keyword
        attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID(), MODULE_NAME, keyword.getQuery()));

        try {
            bba.addAttributes(attributes);
            writeResult.add(attributes);
            return writeResult;
        } catch (TskException e) {
            logger.log(Level.WARNING, "Error adding bb attributes for terms search artifact", e); //NON-NLS
        }

        return null;
    }

    @Override
    public QueryResults performQuery() throws NoOpenCoreException {
       
        final SolrQuery q = createQuery();
        q.setShowDebugInfo(DEBUG);
        q.setTermsLimit(MAX_TERMS_RESULTS);
        logger.log(Level.INFO, "Query: {0}", q.toString()); //NON-NLS
        terms = executeQuery(q);

        QueryResults results = new QueryResults(this, keywordList);
        int resultSize = 0;
       
        for (Term term : terms) {
            final String termStr = KeywordSearchUtil.escapeLuceneQuery(term.getTerm());
           
            LuceneQuery filesQuery = new LuceneQuery(keywordList, new Keyword(termStr, true));
           
            //filesQuery.setField(TERMS_SEARCH_FIELD);
            for (KeywordQueryFilter filter : filters) {
                //set filter
                //note: we can't set filter query on terms query
                //but setting filter query on terms results query will yield the same result
                filesQuery.addFilter(filter);
            }
            try {
                QueryResults subResults = filesQuery.performQuery();
                Set<ContentHit> filesResults = new HashSet<>();
                for (Keyword key : subResults.getKeywords()) {
                    List<ContentHit> keyRes = subResults.getResults(key);
                    resultSize += keyRes.size();
                    filesResults.addAll(keyRes);
                }
                results.addResult(new Keyword(term.getTerm(), false), new ArrayList<>(filesResults));
            } catch (NoOpenCoreException e) {
                logger.log(Level.WARNING, "Error executing Solr query,", e); //NON-NLS
                throw e;
            } catch (RuntimeException e) {
                logger.log(Level.WARNING, "Error executing Solr query,", e); //NON-NLS
            }

        }
       
        //TODO limit how many results we store, not to hit memory limits
        logger.log(Level.INFO, "Regex # results: {0}", resultSize); //NON-NLS

        return results;
    }
   
    @Override
    public KeywordList getKeywordList() {
        return keywordList;
    }
}
TOP

Related Classes of org.sleuthkit.autopsy.keywordsearch.TermComponentQuery

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.