Package eu.isas.peptideshaker.fileimport

Source Code of eu.isas.peptideshaker.fileimport.FileImporter

package eu.isas.peptideshaker.fileimport;

import com.compomics.util.preferences.IdFilter;
import eu.isas.peptideshaker.gui.MgfFilesNotFoundDialog;
import com.compomics.util.experiment.ProteomicAnalysis;
import com.compomics.util.experiment.identification.*;
import com.compomics.util.experiment.identification.matches.SpectrumMatch;
import com.compomics.util.experiment.io.identifications.IdfileReader;
import com.compomics.util.experiment.io.identifications.IdfileReaderFactory;
import com.compomics.software.CompomicsWrapper;
import com.compomics.util.Util;
import com.compomics.util.exceptions.ExceptionHandler;
import com.compomics.util.exceptions.exception_handlers.CommandLineExceptionHandler;
import com.compomics.util.experiment.identification.protein_inference.proteintree.ProteinTree;
import com.compomics.util.experiment.identification.protein_inference.proteintree.ProteinTreeComponentsFactory;
import com.compomics.util.experiment.massspectrometry.Spectrum;
import com.compomics.util.experiment.massspectrometry.SpectrumFactory;
import com.compomics.util.exceptions.exception_handlers.FrameExceptionHandler;
import com.compomics.util.exceptions.exception_handlers.WaitingDialogExceptionHandler;
import com.compomics.util.gui.JOptionEditorPane;
import eu.isas.peptideshaker.PeptideShaker;
import com.compomics.util.waiting.WaitingHandler;
import com.compomics.util.gui.waiting.waitinghandlers.WaitingDialog;
import com.compomics.util.memory.MemoryConsumptionStatus;
import com.compomics.util.preferences.AnnotationPreferences;
import com.compomics.util.preferences.PTMScoringPreferences;
import com.compomics.util.preferences.ProcessingPreferences;
import com.compomics.util.preferences.SequenceMatchingPreferences;
import com.compomics.util.preferences.UtilitiesUserPreferences;
import eu.isas.peptideshaker.preferences.ProjectDetails;
import eu.isas.peptideshaker.preferences.SpectrumCountingPreferences;
import eu.isas.peptideshaker.protein_inference.PeptideMapper;
import eu.isas.peptideshaker.protein_inference.TagMapper;
import eu.isas.peptideshaker.scoring.InputMap;
import eu.isas.peptideshaker.utils.Metrics;
import org.xml.sax.SAXException;
import uk.ac.ebi.jmzml.xml.io.MzMLUnmarshallerException;

import javax.swing.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.sql.SQLException;
import java.util.*;

/**
* This class is responsible for the import of identifications.
*
* @author Marc Vaudel
* @author Harald Barsnes
*/
public class FileImporter {

    /**
     * The class which will load the information into the various maps and do
     * the associated calculations.
     */
    private PeptideShaker peptideShaker;
    /**
     * The current proteomicAnalysis.
     */
    private ProteomicAnalysis proteomicAnalysis;
    /**
     * The identification filter to use.
     */
    private IdFilter idFilter;
    /**
     * A dialog to display feedback to the user.
     */
    private WaitingHandler waitingHandler;
    /**
     * An exception handler to handle exceptions.
     */
    private ExceptionHandler exceptionHandler;
    /**
     * The spectrum factory.
     */
    private SpectrumFactory spectrumFactory = SpectrumFactory.getInstance(100);
    /**
     * The sequence factory.
     */
    private SequenceFactory sequenceFactory = SequenceFactory.getInstance(30000);
    /**
     * If a Mascot dat file is bigger than this size, an indexed parsing will be
     * used.
     */
    public static final double mascotMaxSize = 400;
    /**
     * Metrics of the dataset picked-up while loading the data.
     */
    private Metrics metrics;
    /**
     * The mass tolerance to be used to match PTMs from search engines and
     * expected PTMs. 0.01 by default, as far as I can remember it is the mass
     * resolution in X!Tandem result files.
     */
    public static final double ptmMassTolerance = 0.01;
    /**
     * The protein tree used to map peptides on protein sequences.
     */
    private ProteinTree proteinTree;
    /**
     * Suffix for folders where the content of zip files should be extracted.
     */
    public final static String tempFolderName = "PeptideShaker_temp";

    /**
     * Constructor for the importer.
     *
     * @param identificationShaker the identification shaker which will load the
     * data into the maps and do the preliminary calculations
     * @param waitingHandler The handler displaying feedback to the user
     * @param proteomicAnalysis The current proteomic analysis
     * @param idFilter The identification filter to use
     * @param metrics metrics of the dataset to be saved for the GUI
     */
    public FileImporter(PeptideShaker identificationShaker, WaitingHandler waitingHandler, ProteomicAnalysis proteomicAnalysis, IdFilter idFilter, Metrics metrics) {
        this.peptideShaker = identificationShaker;
        this.waitingHandler = waitingHandler;
        this.proteomicAnalysis = proteomicAnalysis;
        this.idFilter = idFilter;
        this.metrics = metrics;
        if (waitingHandler instanceof WaitingDialog) {
            exceptionHandler = new WaitingDialogExceptionHandler((WaitingDialog) waitingHandler, "http://code.google.com/p/peptide-shaker/issues/list");
        } else {
            exceptionHandler = new CommandLineExceptionHandler();
        }
    }

    /**
     * Imports the identification from files.
     *
     * @param idFiles the identification files to import the Ids from
     * @param spectrumFiles the files where the corresponding spectra can be
     * imported
     * @param searchParameters the search parameters
     * @param annotationPreferences the annotation preferences to use for PTM
     * scoring
     * @param processingPreferences the processing preferences
     * @param ptmScoringPreferences the PTM scoring preferences
     * @param spectrumCountingPreferences the spectrum counting preferences
     * @param sequenceMatchingPreferences the sequence matching preferences
     * @param projectDetails the project details
     * @param backgroundThread boolean indicating whether the import should be
     * done in a background thread (GUI mode) or in the current thread (command
     * line mode).
     */
    public void importFiles(ArrayList<File> idFiles, ArrayList<File> spectrumFiles, SearchParameters searchParameters,
            AnnotationPreferences annotationPreferences, ProcessingPreferences processingPreferences, PTMScoringPreferences ptmScoringPreferences,
            SpectrumCountingPreferences spectrumCountingPreferences, SequenceMatchingPreferences sequenceMatchingPreferences, ProjectDetails projectDetails, boolean backgroundThread) {

        IdProcessorFromFile idProcessor = new IdProcessorFromFile(idFiles, spectrumFiles, idFilter, searchParameters, annotationPreferences,
                processingPreferences, ptmScoringPreferences, spectrumCountingPreferences, sequenceMatchingPreferences, projectDetails);

        if (backgroundThread) {
            idProcessor.execute();
        } else {
            idProcessor.importFiles();
        }
    }

    /**
     * Imports sequences from a FASTA file.
     *
     * @param waitingHandler the handler displaying feedback to the user
     * @param fastaFile FASTA file to process
     */
    public void importSequences(WaitingHandler waitingHandler, File fastaFile) {

        try {
            waitingHandler.appendReport("Importing sequences from " + fastaFile.getName() + ".", true, true);
            waitingHandler.setSecondaryProgressCounterIndeterminate(false);
            sequenceFactory.loadFastaFile(fastaFile, waitingHandler);

            if (waitingHandler.isRunCanceled()) {
                return;
            }

            waitingHandler.resetSecondaryProgressCounter();
            waitingHandler.setSecondaryProgressCounterIndeterminate(true);

            UtilitiesUserPreferences userPreferences = UtilitiesUserPreferences.loadUserPreferences();
            int memoryPreference = userPreferences.getMemoryPreference();
            long fileSize = fastaFile.length();
            int fileSizeInMb = Math.max((int) fileSize / 1048576, 1);
            long nSequences;
            if (!sequenceFactory.isDefaultReversed() || fileSizeInMb < memoryPreference / 4) {
                nSequences = sequenceFactory.getNSequences();
                sequenceFactory.setDecoyInMemory(true);
            } else {
                nSequences = sequenceFactory.getNTargetSequences();
                sequenceFactory.setDecoyInMemory(false);
            }
            long sequencesPerMb = nSequences / fileSizeInMb;
            long availableCachSize = 3 * memoryPreference * sequencesPerMb / 4;
            if (availableCachSize > nSequences) {
                availableCachSize = nSequences;
            } else {
                waitingHandler.appendReport("Warning: PeptideShaker cannot load your FASTA file into memory. This will slow down the processing. "
                        + "Note that using large large databases also reduces the search engine efficiency. "
                        + "Try to either (i) use a smaller database, (ii) increase the memory provided to PeptideShaker, or (iii) improve the reading speed by using an SSD disc. "
                        + "(See also http://code.google.com/p/compomics-utilities/wiki/ProteinInference.)", true, true);

            }
            int cacheSize = (int) availableCachSize;
            sequenceFactory.setnCache(cacheSize);

            try {
                proteinTree = sequenceFactory.getDefaultProteinTree(waitingHandler);
            } catch (SQLException e) {
                waitingHandler.appendReport("Database " + sequenceFactory.getCurrentFastaFile().getName() + " could not be accessed, make sure that the file is not used by another program.", true, true);
                e.printStackTrace();
                waitingHandler.setRunCanceled();
            }

            if (!waitingHandler.isRunCanceled()) {
                waitingHandler.appendReport("FASTA file import completed.", true, true);
                waitingHandler.increasePrimaryProgressCounter();
            } else {
                sequenceFactory.clearFactory();
            }

        } catch (FileNotFoundException e) {
            System.err.println("File " + fastaFile + " was not found. Please select a different FASTA file.");
            e.printStackTrace();
            waitingHandler.setRunCanceled();
            waitingHandler.appendReport("File " + fastaFile + " was not found. Please select a different FASTA file.", true, true);
        } catch (IOException e) {
            System.err.println("An error occurred while indexing " + fastaFile + ".");
            e.printStackTrace();
            waitingHandler.setRunCanceled();
            waitingHandler.appendReport("An error occurred while indexing " + fastaFile + ": " + e.getMessage(), true, true);
        } catch (SQLException e) {
            System.err.println("An error occurred while indexing " + fastaFile + ".");
            e.printStackTrace();
            waitingHandler.setRunCanceled();
            waitingHandler.appendReport("An error occurred while indexing " + fastaFile + ": " + e.getMessage(), true, true);
        } catch (InterruptedException e) {
            System.err.println("An error occurred while loading " + fastaFile + ".");
            e.printStackTrace();
            waitingHandler.setRunCanceled();
            waitingHandler.appendReport("An error occurred while loading " + fastaFile + ": " + e.getMessage(), true, true);
        } catch (IllegalArgumentException e) {
            System.err.println("An error occurred while loading " + fastaFile + ".");
            e.printStackTrace();
            waitingHandler.setRunCanceled();
            waitingHandler.appendReport(e.getLocalizedMessage() + " Please refer to http://code.google.com/p/peptide-shaker/#Troubleshooting", true, true);
        } catch (ClassNotFoundException e) {
            System.err.println("An error occurred while loading " + fastaFile + ".");
            e.printStackTrace();
            waitingHandler.setRunCanceled();
            waitingHandler.appendReport("Serialization issue while processing the FASTA file. Please delete the .fasta.cui file and retry. "
                    + "If the error occurs again please report bug using our issue tracker: http://code.google.com/p/peptide-shaker/issues/list.", true, true);
        } catch (NullPointerException e) {
            System.err.println("An error occurred while loading " + fastaFile + ".");
            e.printStackTrace();
            waitingHandler.setRunCanceled();
            waitingHandler.appendReport("An error occurred when importing the sequences. "
                    + "Please check the Search Parameters. See the log file for details. "
                    + "If the error persists please let us know using our issue tracker: http://code.google.com/p/peptide-shaker/issues/list.", true, true);
        }
    }

    /**
     * Worker which loads identification from a file and processes them while
     * giving feedback to the user.
     */
    private class IdProcessorFromFile extends SwingWorker {

        /**
         * The identification file reader factory of compomics utilities.
         */
        private IdfileReaderFactory readerFactory = IdfileReaderFactory.getInstance();
        /**
         * The list of identification files.
         */
        private ArrayList<File> idFiles;
        /**
         * The FASTA file.
         */
        private File fastaFile;
        /**
         * A list of spectrum files (can be empty, no spectrum will be
         * imported).
         */
        private HashMap<String, File> spectrumFiles;
        /**
         * The identification filter.
         */
        private IdFilter idFilter;
        /**
         * The search parameters.
         */
        private SearchParameters searchParameters;
        /**
         * The annotation preferences to use for PTM scoring.
         */
        private AnnotationPreferences annotationPreferences;
        /**
         * The processing preferences.
         */
        private ProcessingPreferences processingPreferences;
        /**
         * The PTM scoring preferences.
         */
        private PTMScoringPreferences ptmScoringPreferences;
        /**
         * The project details
         */
        private ProjectDetails projectDetails;
        /**
         * The spectrum counting preferences.
         */
        private SpectrumCountingPreferences spectrumCountingPreferences;
        /**
         * The sequence matching preferences.
         */
        private SequenceMatchingPreferences sequenceMatchingPreferences;
        /**
         * The number of retained first hits.
         */
        private long nRetained = 0;
        /**
         * The number of spectra.
         */
        private long nSpectra = 0;
        /**
         * List of the mgf files used.
         */
        private ArrayList<String> mgfUsed = new ArrayList<String>();
        /**
         * Map of the missing mgf files indexed by identification file.
         */
        private HashMap<File, String> missingMgfFiles = new HashMap<File, String>();
        /**
         * The input map.
         */
        private InputMap inputMap = new InputMap();
        /**
         * Boolean indicating whether we can display GUI stuff.
         */
        private boolean hasGUI = false;
        /**
         * The database connection.
         */
        private Identification identification;
        /**
         * A peptide to protein mapper.
         */
        private PeptideMapper peptideMapper;
        /**
         * A tag to protein mapper.
         */
        private TagMapper tagMapper = null;
        /**
         * List of one hit wonders.
         */
        private HashSet<String> singleProteinList = new HashSet<String>();
        /**
         * Map of proteins found several times with the number of times they
         * appeared as first hit.
         */
        private HashMap<String, Integer> proteinCount = new HashMap<String, Integer>();
        /**
         * The number of first hits.
         */
        private long nPSMs = 0;
        /**
         * The number of secondary hits.
         */
        private long nSecondary = 0;

        /**
         * Constructor for a worker importing matches from a list of files.
         *
         * @param idFiles list of identification files from where matches should
         * be imported
         * @param spectrumFiles list of spectrum files where the searched
         * spectra can be found
         * @param idFilter the matches filter to use
         * @param searchParameters the identification parameters
         * @param annotationPreferences the annotation preferences
         * @param processingPreferences the processing preferences
         * @param ptmScoringPreferences the PTM localization scoring preferences
         * @param spectrumCountingPreferences the spectrum counting preferences
         * @param sequenceMatchingPreferences the sequence matching preferences
         * @param projectDetails the project details
         */
        public IdProcessorFromFile(ArrayList<File> idFiles, ArrayList<File> spectrumFiles, IdFilter idFilter,
                SearchParameters searchParameters, AnnotationPreferences annotationPreferences, ProcessingPreferences processingPreferences,
                PTMScoringPreferences ptmScoringPreferences, SpectrumCountingPreferences spectrumCountingPreferences, SequenceMatchingPreferences sequenceMatchingPreferences, ProjectDetails projectDetails) {

            this.idFiles = new ArrayList<File>();
            HashMap<String, File> filesMap = new HashMap<String, File>();

            for (File file : idFiles) {
                filesMap.put(file.getName(), file);
            }

            ArrayList<String> names = new ArrayList<String>(filesMap.keySet());
            Collections.sort(names);

            // Process sequencing files first, they need much more memory. TODO: make something more generic?
            for (String name : names) {
                if (name.endsWith("tags")) {
                    this.idFiles.add(filesMap.get(name));
                }
            }
            for (String name : names) {
                File file = filesMap.get(name);
                if (!this.idFiles.contains(file)) {
                    this.idFiles.add(file);
                }
            }

            this.spectrumFiles = new HashMap<String, File>();
            this.fastaFile = searchParameters.getFastaFile();
            this.idFilter = idFilter;
            this.searchParameters = searchParameters;
            this.annotationPreferences = annotationPreferences;
            this.processingPreferences = processingPreferences;
            this.ptmScoringPreferences = ptmScoringPreferences;
            this.spectrumCountingPreferences = spectrumCountingPreferences;
            this.projectDetails = projectDetails;
            this.sequenceMatchingPreferences = sequenceMatchingPreferences;

            for (File file : spectrumFiles) {
                this.spectrumFiles.put(file.getName(), file);
            }

            UtilitiesUserPreferences userPreferences = UtilitiesUserPreferences.loadUserPreferences();
            if (userPreferences.getMemoryPreference() > 2000) {
                peptideMapper = new PeptideMapper(sequenceMatchingPreferences, idFilter, waitingHandler, exceptionHandler);
            }
        }

        @Override
        protected Object doInBackground() throws Exception {
            hasGUI = true;
            return importFiles();
        }

        /**
         * Imports the identifications from the files given to the worker.
         *
         * @return 0 if success, 1 if not
         */
        public int importFiles() {

            try {
                importSequences(waitingHandler, fastaFile);

                if (waitingHandler.isRunCanceled()) {
                    return 1;
                }

                waitingHandler.setSecondaryProgressCounterIndeterminate(true);
                waitingHandler.appendReport("Establishing local database connection.", true, true);

                identification = proteomicAnalysis.getIdentification(IdentificationMethod.MS2_IDENTIFICATION);
                identification.setIsDB(true);

                connectToIdDb(identification);

                waitingHandler.increasePrimaryProgressCounter();

                if (!waitingHandler.isRunCanceled()) {

                    waitingHandler.appendReport("Reading identification files.", true, true);

                    for (File idFile : idFiles) {
                        importPsms(idFile);

                        if (waitingHandler.isRunCanceled()) {
                            identification.close();
                            return 1;
                        }
                    }

                    while (!missingMgfFiles.isEmpty()) {
                        if (hasGUI) {
                            new MgfFilesNotFoundDialog((WaitingDialog) waitingHandler, missingMgfFiles);
                            if (waitingHandler.isRunCanceled()) {
                                identification.close();
                                sequenceFactory.clearFactory();
                                return 1;
                            }
                        } else {
                            String missingFiles = "";
                            boolean first = true;
                            for (File mgfFile : missingMgfFiles.keySet()) {
                                if (first) {
                                    first = false;
                                } else {
                                    missingFiles += ", ";
                                }
                                missingFiles += mgfFile.getName();
                            }
                            waitingHandler.appendReport("MGF files missing: " + missingFiles, true, true);
                            identification.close();
                            sequenceFactory.clearFactory();
                            return 1;
                        }
                        waitingHandler.appendReport("Processing files with the new input.", true, true);
                        ArrayList<File> filesToProcess = new ArrayList<File>(missingMgfFiles.keySet());

                        for (String mgfName : missingMgfFiles.values()) {
                            File newFile = spectrumFactory.getSpectrumFileFromIdName(mgfName);
                            spectrumFiles.put(newFile.getName(), newFile);
                            projectDetails.addSpectrumFile(newFile);
                        }
                        missingMgfFiles.clear();
                        for (File idFile : filesToProcess) {
                            importPsms(idFile);
                        }
                        if (waitingHandler.isRunCanceled()) {
                            identification.close();
                            sequenceFactory.clearFactory();
                            return 1;
                        }
                    }

                    // clear the objects not needed anymore
                    singleProteinList.clear();
                    sequenceFactory.emptyCache();

                    if (nRetained == 0) {
                        waitingHandler.appendReport("No identifications retained.", true, true);
                        waitingHandler.setRunCanceled();
                        identification.close();
                        sequenceFactory.clearFactory();
                        return 1;
                    }

                    waitingHandler.appendReport("File import completed. "
                            + nPSMs + " first hits imported (" + nSecondary + " secondary) from " + nSpectra + " spectra.", true, true);
                    waitingHandler.appendReport("[" + nRetained + " first hits passed the initial filtering]", true, true);
                    waitingHandler.increaseSecondaryProgressCounter(spectrumFiles.size() - mgfUsed.size());
                    peptideShaker.setProteinCountMap(proteinCount);
                    peptideShaker.processIdentifications(inputMap, waitingHandler, searchParameters, annotationPreferences,
                            idFilter, processingPreferences, ptmScoringPreferences, spectrumCountingPreferences, projectDetails, sequenceMatchingPreferences);
                }
            } catch (OutOfMemoryError error) {

                System.out.println("<CompomicsError>PeptideShaker ran out of memory! See the PeptideShaker log for details.</CompomicsError>");
                System.err.println("Ran out of memory!");
                System.err.println("Memory given to the Java virtual machine: " + Runtime.getRuntime().maxMemory() + ".");
                System.err.println("Memory used by the Java virtual machine: " + Runtime.getRuntime().totalMemory() + ".");
                System.err.println("Free memory in the Java virtual machine: " + Runtime.getRuntime().freeMemory() + ".");

                Runtime.getRuntime().gc();
                waitingHandler.appendReportEndLine();
                waitingHandler.appendReport("Ran out of memory!", true, true);
                waitingHandler.setRunCanceled();

                if (waitingHandler instanceof WaitingDialog) {
                    JOptionPane.showMessageDialog((WaitingDialog) waitingHandler, JOptionEditorPane.getJOptionEditorPane(
                            "PeptideShaker used up all the available memory and had to be stopped.<br>"
                            + "Memory boundaries are changed in the the Welcome Dialog (Settings<br>"
                            + "& Help > Settings > Java Memory Settings) or in the Edit menu (Edit<br>"
                            + "Java Options). See also <a href=\"http://code.google.com/p/compomics-utilities/wiki/JavaTroubleShooting\">JavaTroubleShooting</a>."),
                            "Out Of Memory", JOptionPane.ERROR_MESSAGE);
                }

                error.printStackTrace();
                if (identification != null) {
                    try {
                        identification.close();
                        sequenceFactory.clearFactory();
                    } catch (SQLException ex) {
                        ex.printStackTrace();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
                return 1;

            } catch (Exception e) {
                waitingHandler.setRunCanceled();

                System.out.println("<CompomicsError>PeptideShaker processing failed. See the PeptideShaker log for details.</CompomicsError>");

                if (e instanceof NullPointerException) {
                    waitingHandler.appendReport("An error occurred while loading the identification files.", true, true);
                    waitingHandler.appendReport("Please see the error log (Help Menu > Bug Report) for details.", true, true);
                } else if (FrameExceptionHandler.getExceptionType(e).equalsIgnoreCase("Protein not found")) {
                    waitingHandler.appendReport("An error occurred while loading the identification files:", true, true);
                    waitingHandler.appendReport(e.getLocalizedMessage(), true, true);
                    waitingHandler.appendReport("Please see http://code.google.com/p/searchgui/wiki/DatabaseHelp.", true, true);
                } else {
                    waitingHandler.appendReport("An error occurred while loading the identification files:", true, true);
                    waitingHandler.appendReport(e.getLocalizedMessage(), true, true);
                }

                e.printStackTrace();
                System.err.println("Free memory: " + Runtime.getRuntime().freeMemory());

                if (identification != null) {
                    try {
                        identification.close();
                        sequenceFactory.clearFactory();
                    } catch (SQLException ex) {
                        ex.printStackTrace();
                    } catch (IOException ex) {
                        ex.printStackTrace();
                    }
                }

                return 1;
            }

            return 0;
        }

        /**
         * Establishes a connection to the identification database.
         *
         * @param identification
         */
        private void connectToIdDb(Identification identification) {
            try {
                String dbFolder = PeptideShaker.getSerializationDirectory(getJarFilePath()).getAbsolutePath();
                identification.establishConnection(dbFolder, true, peptideShaker.getCache());
            } catch (SQLException e) {
                e.printStackTrace();
                waitingHandler.appendReport("The match database could not be created, serialized matches will be used instead. "
                        + "Please let us know using our issue tracker: http://code.google.com/p/peptide-shaker/issues/list.", true, true);
                identification.setIsDB(false);
            }
        }

        /**
         * Imports the PSMs from an identification file.
         *
         * @param idFile the identification file
         * @throws FileNotFoundException exception thrown whenever a file was
         * not found
         * @throws IOException exception thrown whenever an error occurred while
         * reading or writing a file
         * @throws SAXException exception thrown whenever an error occurred
         * while parsing an XML file
         * @throws MzMLUnmarshallerException exception thrown whenever an error
         * occurred while reading an mzML file
         * @throws OutOfMemoryError thrown if the parser if the id files runs
         * out of memory
         */
        public void importPsms(File idFile) throws FileNotFoundException, IOException, SAXException, MzMLUnmarshallerException, IllegalArgumentException, Exception, OutOfMemoryError {

            identification = proteomicAnalysis.getIdentification(IdentificationMethod.MS2_IDENTIFICATION);
            waitingHandler.setSecondaryProgressCounterIndeterminate(true);
            waitingHandler.appendReport("Parsing " + idFile.getName() + ".", true, true);

            IdfileReader fileReader = null;
            try {
                fileReader = readerFactory.getFileReader(idFile);
            } catch (OutOfMemoryError error) {
                waitingHandler.appendReport("Ran out of memory when parsing \'" + Util.getFileName(idFile) + "\'.", true, true);
                throw new OutOfMemoryError("Ran out of memory when parsing \'" + Util.getFileName(idFile) + "\'.");
            }

            if (fileReader == null) {
                waitingHandler.appendReport("Identification result file \'" + Util.getFileName(idFile) + "\' not recognized.", true, true);
                waitingHandler.setRunCanceled();
                return;
            }

            // Clear cache for sequencing files. TODO: make something more generic?
            if (idFile.getName().endsWith("tags")) {
                if (tagMapper == null) {
                    tagMapper = new TagMapper(proteinTree, searchParameters, sequenceMatchingPreferences, annotationPreferences, exceptionHandler);
                }
                if (!peptideShaker.getCache().isEmpty()) {
                    peptideShaker.getCache().reduceMemoryConsumption(0.9, waitingHandler);
                }
            }

            waitingHandler.setSecondaryProgressCounterIndeterminate(false);

            LinkedList<SpectrumMatch> idFileSpectrumMatches = null;
            try {
                if (peptideMapper != null && !peptideMapper.isCanceled()) {
                    idFileSpectrumMatches = fileReader.getAllSpectrumMatches(waitingHandler, sequenceMatchingPreferences, true);
                } else {
                    idFileSpectrumMatches = fileReader.getAllSpectrumMatches(waitingHandler, null, true);
                }
            } catch (Exception e) {
                waitingHandler.appendReport("An error occurred while loading spectrum matches from \'"
                        + Util.getFileName(idFile)
                        + "\'. This file will be ignored. Error: " + e.getMessage()
                        + " See resources/PeptideShaker.log for details.", true, true);
                e.printStackTrace();
            }

            // set the search engine name and version for this file
            HashMap<String, ArrayList<String>> software = fileReader.getSoftwareVersions();
            projectDetails.setIdentificationAlgorithmsForFile(Util.getFileName(idFile), software);

            fileReader.close();

            if (idFileSpectrumMatches != null) {

                boolean allLoaded = true;
                int numberOfMatches = idFileSpectrumMatches.size();
                waitingHandler.setMaxSecondaryProgressCounter(numberOfMatches);
                waitingHandler.appendReport("Loading spectra for " + idFile.getName() + ".", true, true);
                for (SpectrumMatch spectrumMatch : idFileSpectrumMatches) {
                    if (!importSpectrum(idFile, spectrumMatch, numberOfMatches)) {
                        allLoaded = false;
                        String fileName = Spectrum.getSpectrumFile(spectrumMatch.getKey());
                        waitingHandler.appendReport(fileName + " missing.", true, true);
                    }
                    waitingHandler.increaseSecondaryProgressCounter();
                }

                if (allLoaded) {

                    // Map spectrum sequencing matches on protein sequences
                    if (tagMapper != null) {
                        tagMapper.mapTags(fileReader, waitingHandler, processingPreferences.getnThreads());
                    }

                    // Map the peptides on protein sequences
                    if (peptideMapper != null) {
                        try {
                            if (!peptideMapper.isCanceled()) {
                                peptideMapper.mapPeptides(fileReader.getPeptidesMap(), sequenceMatchingPreferences, idFilter, processingPreferences.getnThreads(), waitingHandler);
                            }
                            if (peptideMapper.isCanceled()) {
                                fileReader.clearPeptidesMap();
                            }
                        } catch (OutOfMemoryError e) {
                            e.printStackTrace();
                            fileReader.clearPeptidesMap();
                        }
                    }
                    // empty protein caches
                    if (MemoryConsumptionStatus.memoryUsed() > 0.8) {
                        ProteinTreeComponentsFactory.getInstance().getCache().reduceMemoryConsumption(1, null);
                        sequenceFactory.reduceNodeCacheSize(1);
                    }

                    waitingHandler.setMaxSecondaryProgressCounter(numberOfMatches);
                    waitingHandler.appendReport("Importing PSMs from " + idFile.getName(), true, true);

                    PsmImporter psmImporter = new PsmImporter(peptideShaker.getCache(), idFilter, sequenceMatchingPreferences, searchParameters, processingPreferences, fileReader, idFile, identification,
                            inputMap, proteinCount, singleProteinList, exceptionHandler);
                    psmImporter.importPsms(idFileSpectrumMatches, processingPreferences.getnThreads(), waitingHandler);

                    nPSMs += psmImporter.getnPSMs();
                    nSecondary += psmImporter.getnSecondary();
                    nRetained += psmImporter.getnRetained();

                    metrics.addFoundCharges(psmImporter.getCharges());
                    if (psmImporter.getMaxPeptideErrorDa() > metrics.getMaxPeptidePrecursorErrorDa()) {
                        metrics.setMaxPeptidePrecursorErrorDa(psmImporter.getMaxPeptideErrorDa());
                    }
                    if (psmImporter.getMaxPeptideErrorPpm() > metrics.getMaxPeptidePrecursorErrorPpm()) {
                        metrics.setMaxPeptidePrecursorErrorPpm(psmImporter.getMaxPeptideErrorPpm());
                    }
                    if (psmImporter.getMaxTagErrorDa() > metrics.getMaxTagPrecursorErrorDa()) {
                        metrics.setMaxTagPrecursorErrorDa(psmImporter.getMaxTagErrorDa());
                    }
                    if (psmImporter.getMaxTagErrorPpm() > metrics.getMaxTagPrecursorErrorPpm()) {
                        metrics.setMaxTagPrecursorErrorPpm(psmImporter.getMaxTagErrorPpm());
                    }

                    // Free at least 0.5GB for the next parser if not anymore available
                    if (!MemoryConsumptionStatus.halfGbFree() && !peptideShaker.getCache().isEmpty()) {
                        waitingHandler.appendReport("PeptideShaker is encountering memory issues! "
                                + "See http://peptide-shaker.googlecode.com for help.", true, true);
                        waitingHandler.appendReport("Reducing Memory Consumption.", true, true);
                        waitingHandler.setSecondaryProgressCounterIndeterminate(false);
                        double share = ((double) 1073741824) / (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory());
                        share = Math.min(share, 1);
                        peptideShaker.getCache().reduceMemoryConsumption(share, waitingHandler);
                        waitingHandler.setSecondaryProgressCounterIndeterminate(true);
                    }
                    if (!MemoryConsumptionStatus.halfGbFree() && sequenceFactory.getNodesInCache() > 0) {
                        sequenceFactory.reduceNodeCacheSize(0.5);
                    }
                    projectDetails.addIdentificationFiles(idFile);

                    int psmsRejected = psmImporter.getPsmsRejected();
                    int proteinIssue = psmImporter.getProteinIssue();
                    int peptideIssue = psmImporter.getPeptideIssue();
                    int precursorIssue = psmImporter.getPrecursorIssue();
                    int ptmIssue = psmImporter.getPtmIssue();
                    int totalAssumptionsRejected = proteinIssue + peptideIssue + precursorIssue + ptmIssue;

                    double sharePsmsRejected = 100.0 * psmsRejected / numberOfMatches;

                    if (psmsRejected > 0) {
                        waitingHandler.appendReport(psmsRejected + " PSMs (" + Util.roundDouble(sharePsmsRejected, 1) + "%) excluded by the import filters:", true, true);

                        String padding = "    ";

                        double share = 100 * ((double) proteinIssue) / totalAssumptionsRejected;
                        if (share >= 1) {
                            waitingHandler.appendReport(padding + "- " + Util.roundDouble(share, 1)
                                    + "% peptide mapping to both target and decoy.", true, true);
                        }
                        share = 100 * ((double) peptideIssue) / totalAssumptionsRejected;
                        if (share >= 1) {
                            waitingHandler.appendReport(padding + "- " + Util.roundDouble(share, 1)
                                    + "% peptide length less than " + idFilter.getMinPepLength() + " or greater than " + idFilter.getMaxPepLength() + ".", true, true);
                        }
                        share = 100 * ((double) precursorIssue) / totalAssumptionsRejected;
                        if (share >= 1) {
                            String unit;
                            if (searchParameters.isPrecursorAccuracyTypePpm()) {
                                unit = "ppm";
                            } else {
                                unit = "Da";
                            }
                            waitingHandler.appendReport(padding + "- " + Util.roundDouble(share, 1)
                                    + "% peptide mass deviation bigger than " + idFilter.getMaxMzDeviation() + " " + unit + ".", true, true);
                        }
                        share = 100 * ((double) ptmIssue) / totalAssumptionsRejected;
                        if (share >= 1) {
                            waitingHandler.appendReport(padding + "- " + Util.roundDouble(share, 1) + "% unrecognized modifications.", true, true);
                        }
                    }
                    // inform the user in case more than 75% of the hits were rejected by the filters
                    if (sharePsmsRejected > 75) {
                        String report = "Warning: More than 75% of the PSMs were rejected by the loading filters when importing the matches.";
                        double meanRejected = sharePsmsRejected / 4;
                        if (proteinIssue > meanRejected) {
                            report += " Apparently your database contains a high share of shared peptides between the target and decoy sequences. Please verify your database";
                            if (software.keySet().contains(Advocate.mascot.getName())) {
                                report += " and make sure that you use Mascot with the 'decoy' option disabled.";
                            }
                            report += ".";
                        }
                        if (peptideIssue > meanRejected) {
                            report += " Please verify that your peptide selection criteria are not too restrictive.";
                        }
                        if (precursorIssue > meanRejected) {
                            report += " Please verify that your precursor selection criteria are not too restrictive.";
                        }
                        if (ptmIssue > meanRejected) {
                            report += " Apparently your data contains modifications which are not recognized by PeptideShaker. Please verify the search parameters provided when creating the project.";
                            if (software.keySet().contains(Advocate.mascot.getName())) {
                                report += " When using Mascot alone, you need to specify the search parameters manually when creating the project. We recommend the complementary use of SearchGUI when possible.";
                            }
                        }
                        waitingHandler.appendReport(report, true, true);
                    }
                }
            }

            waitingHandler.increasePrimaryProgressCounter();
        }

        /**
         * Checks whether the spectrum file needed for the given spectrum match
         * is loaded and if the spectrum is present. Try to load it from the
         * factory otherwise.
         *
         * @param idFile the identification file
         * @param spectrumMatch the spectrum match
         * @param numberOfMatches the number of matches expected for this
         * identification file
         *
         * @return indicates whether the spectrum is imported, false if the file
         * was not found
         */
        private boolean importSpectrum(File idFile, SpectrumMatch spectrumMatch, int numberOfMatches) {

            String spectrumKey = spectrumMatch.getKey();
            String fileName = Spectrum.getSpectrumFile(spectrumKey);
            String spectrumTitle = Spectrum.getSpectrumTitle(spectrumKey);

            // remap wrong spectrum file names
            if (spectrumFactory.getSpectrumFileFromIdName(fileName) != null) {
                fileName = spectrumFactory.getSpectrumFileFromIdName(fileName).getName();
                spectrumMatch.setKey(Spectrum.getSpectrumKey(fileName, spectrumTitle));
                spectrumKey = spectrumMatch.getKey();
            }

            // import the mgf file needed if not done already
            if (!mgfUsed.contains(fileName)) {
                File spectrumFile = spectrumFiles.get(fileName);
                if (spectrumFile != null && spectrumFile.exists()) {
                    importSpectra(fileName);
                    waitingHandler.setSecondaryProgressCounterIndeterminate(false);
                    waitingHandler.setMaxSecondaryProgressCounter(numberOfMatches);
                    mgfUsed.add(fileName);
                    projectDetails.addSpectrumFile(spectrumFile);
                    nSpectra += spectrumFactory.getNSpectra(fileName);
                } else {
                    missingMgfFiles.put(idFile, fileName);
                    waitingHandler.appendReport(fileName + " not found.", true, true);
                    return false;
                }
            }

            // remap missing spectrum titles
            if (spectrumFactory.fileLoaded(fileName) && !spectrumFactory.spectrumLoaded(spectrumKey)) {
                String oldTitle = Spectrum.getSpectrumTitle(spectrumKey);
                Integer spectrumNumber = spectrumMatch.getSpectrumNumber();
                if (spectrumNumber == null) {
                    try {
                        spectrumNumber = new Integer(oldTitle);
                    } catch (Exception e) {
                        // ignore
                    }
                }
                if (spectrumNumber == null) {
                    String errorMessage = "Spectrum \'" + oldTitle + "\' not found in file " + fileName + ".";
                    waitingHandler.appendReport(errorMessage, true, true);
                    waitingHandler.setRunCanceled();
                    throw new IllegalArgumentException(errorMessage);
                }
                spectrumTitle = spectrumFactory.getSpectrumTitle(fileName, spectrumNumber);
                spectrumKey = Spectrum.getSpectrumKey(fileName, spectrumTitle);
                spectrumMatch.setKey(spectrumKey);
                if (!spectrumFactory.spectrumLoaded(spectrumKey)) {
                    spectrumTitle = spectrumNumber + "";
                    spectrumKey = Spectrum.getSpectrumKey(fileName, spectrumTitle);
                    spectrumMatch.setKey(spectrumKey);
                    if (spectrumFactory.fileLoaded(fileName) && !spectrumFactory.spectrumLoaded(spectrumKey)) {
                        String errorMessage = "Spectrum \'" + oldTitle + "\' number " + spectrumTitle + " not found in file " + fileName + ".";
                        waitingHandler.appendReport(errorMessage, true, true);
                        waitingHandler.setRunCanceled();
                        throw new IllegalArgumentException(errorMessage);
                    }
                }
            }
            return true;
        }

        /**
         * Verify that the spectra are imported and imports spectra from the
         * desired spectrum file if necessary.
         *
         * @param targetFileName the spectrum file
         */
        public void importSpectra(String targetFileName) {

            File spectrumFile = spectrumFiles.get(targetFileName);

            try {
                waitingHandler.appendReport("Importing " + targetFileName, true, true);
                waitingHandler.setSecondaryProgressCounterIndeterminate(false);
                waitingHandler.resetSecondaryProgressCounter();
                spectrumFactory.addSpectra(spectrumFile, waitingHandler);

                // @TODO: check for duplicate spectrum titles and show the warning in the lower right corner of the main frame
                if (waitingHandler.isRunCanceled()) {
                    return;
                }
                waitingHandler.resetSecondaryProgressCounter();
                waitingHandler.increasePrimaryProgressCounter();
                waitingHandler.appendReport(targetFileName + " imported.", true, true);
            } catch (Exception e) {
                waitingHandler.appendReport("Spectrum files import failed when trying to import " + targetFileName + ".", true, true);
                e.printStackTrace();
            }
        }
    }

    /**
     * Returns the path to the jar file.
     *
     * @return the path to the jar file
     */
    public String getJarFilePath() {
        return CompomicsWrapper.getJarFilePath(this.getClass().getResource("FileImporter.class").getPath(), "PeptideShaker");
    }

    /**
     * Returns the temp folder name to use when unzipping a zip file.
     *
     * @param fileName the name of the zip file
     *
     * @return the folder name associated to the zip file
     */
    public static String getTempFolderName(String fileName) {
        return Util.removeExtension(fileName) + "_" + FileImporter.tempFolderName;
    }
}
TOP

Related Classes of eu.isas.peptideshaker.fileimport.FileImporter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.
m.parentNode.insertBefore(a,m) })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); ga('create', 'UA-20639858-1', 'auto'); ga('send', 'pageview');