Package eu.isas.peptideshaker.followup

Source Code of eu.isas.peptideshaker.followup.FastaExport

package eu.isas.peptideshaker.followup;

import com.compomics.util.experiment.identification.Identification;
import com.compomics.util.experiment.identification.SequenceFactory;
import com.compomics.util.experiment.identification.matches.ProteinMatch;
import com.compomics.util.waiting.WaitingHandler;
import eu.isas.peptideshaker.myparameters.PSParameter;
import eu.isas.peptideshaker.preferences.FilterPreferences;
import eu.isas.peptideshaker.utils.IdentificationFeaturesGenerator;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;

/**
* Export proteins in the FASTA format.
*
* @author Marc Vaudel
*/
public class FastaExport {

    /**
     * Exports the proteins of interest in a text file of the FASTA format. Non
     * validated protein mode iterates all proteins in the original FASTA file
     * (size in the sequence factory). Validated protein mode iterates only
     * validated proteins (size in the identification features generator).
     *
     * @param destinationFile the file where to write
     * @param identification the identification
     * @param identificationFeaturesGenerator the identification features
     * generator
     * @param exportType the export type (see enum below)
     * @param waitingHandler waiting handler used to display progress and cancel
     * the process
     *
     * @throws IOException
     * @throws SQLException
     * @throws ClassNotFoundException
     * @throws InterruptedException
     */
    public static void exportFasta(File destinationFile, Identification identification,
            IdentificationFeaturesGenerator identificationFeaturesGenerator, ExportType exportType, WaitingHandler waitingHandler, FilterPreferences filterPreferences)
            throws IOException, SQLException, ClassNotFoundException, InterruptedException {
        export(destinationFile, identification, identificationFeaturesGenerator, exportType, waitingHandler, filterPreferences, false);
    }

    /**
     * Exports the accessions proteins of interest in a text file. Non validated
     * protein mode iterates all proteins in the original FASTA file (size in
     * the sequence factory). Validated protein mode iterates only validated
     * proteins (size in the identification features generator).
     *
     * @param destinationFile the file where to write
     * @param identification the identification
     * @param identificationFeaturesGenerator the identification features
     * generator
     * @param exportType the export type (see enum below)
     * @param waitingHandler waiting handler used to display progress and cancel
     * the process
     * @param filterPreferences the filter preferences
     *
     * @throws IOException
     * @throws SQLException
     * @throws ClassNotFoundException
     * @throws InterruptedException
     */
    public static void exportAccessions(File destinationFile, Identification identification,
            IdentificationFeaturesGenerator identificationFeaturesGenerator, ExportType exportType, WaitingHandler waitingHandler, FilterPreferences filterPreferences)
            throws IOException, SQLException, ClassNotFoundException, InterruptedException {
        export(destinationFile, identification, identificationFeaturesGenerator, exportType, waitingHandler, filterPreferences, true);
    }

    /**
     * Exports the proteins of interest in a text file of the given format. Non
     * validated protein mode iterates all proteins in the original FASTA file
     * (size in the sequence factory). Validated protein mode iterates only
     * validated proteins (size in the identification features generator).
     *
     * @param destinationFile the file where to write
     * @param identification the identification
     * @param identificationFeaturesGenerator the identification features
     * generator
     * @param exportType the export type (see enum below)
     * @param waitingHandler waiting handler used to display progress and cancel
     * the process
     * @param filterPreferences the filter preferences
     * @param accessionOnly if true only the accession of the protein will be
     * exported, if false the entire information in FASTA format
     *
     * @throws IOException
     * @throws SQLException
     * @throws ClassNotFoundException
     * @throws InterruptedException
     */
    public static void export(File destinationFile, Identification identification, IdentificationFeaturesGenerator identificationFeaturesGenerator,
            ExportType exportType, WaitingHandler waitingHandler, FilterPreferences filterPreferences, boolean accessionOnly) throws IOException, SQLException, ClassNotFoundException, InterruptedException {

        SequenceFactory sequenceFactory = SequenceFactory.getInstance();
        FileWriter f = new FileWriter(destinationFile);

        try {

            BufferedWriter b = new BufferedWriter(f);

            try {
                if (exportType == ExportType.non_validated) {

                    PSParameter psParameter = new PSParameter();
                    identification.loadProteinMatchParameters(psParameter, waitingHandler);

                    for (String accession : sequenceFactory.getAccessions()) {

                        if (!sequenceFactory.isDecoyAccession(accession)) {

                            ArrayList<String> matches = identification.getProteinMap().get(accession);

                            if (matches != null) {
                                boolean validated = false;
                                for (String match : matches) {
                                    psParameter = (PSParameter) identification.getProteinMatchParameter(match, psParameter);
                                    if (psParameter.getMatchValidationLevel().isValidated()) {
                                        validated = true;
                                        break;
                                    }
                                }
                                if (!validated) {
                                    writeAccession(b, accession, sequenceFactory, accessionOnly);
                                }
                            }
                        }
                        if (waitingHandler != null) {
                            if (waitingHandler.isRunCanceled()) {
                                break;
                            }
                            waitingHandler.increaseSecondaryProgressCounter();
                        }
                    }
                } else {

                    if (exportType == ExportType.validated_main_accession) {
                        identification.loadProteinMatches(identificationFeaturesGenerator.getValidatedProteins(waitingHandler, filterPreferences), waitingHandler);
                    }

                    ArrayList<String> exported = new ArrayList<String>();

                    for (String matchKey : identificationFeaturesGenerator.getValidatedProteins(waitingHandler, filterPreferences)) {
                        ArrayList<String> accessions = new ArrayList<String>();
                        if (exportType == ExportType.validated_main_accession) {
                            ProteinMatch proteinMatch = identification.getProteinMatch(matchKey);
                            accessions.add(proteinMatch.getMainMatch());
                        } else if (exportType == ExportType.validated_all_accessions) {
                            accessions.addAll(Arrays.asList(ProteinMatch.getAccessions(matchKey)));
                        }
                        for (String accession : accessions) {
                            if (!exported.contains(accession)) {
                                writeAccession(b, accession, sequenceFactory, accessionOnly);
                                exported.add(accession);
                            }
                        }
                        if (waitingHandler != null) {
                            if (waitingHandler.isRunCanceled()) {
                                break;
                            }
                            waitingHandler.increaseSecondaryProgressCounter();
                        }
                    }
                }
            } finally {
                b.close();
            }
        } finally {
            f.close();
        }

    }

    /**
     * Writes the desired information about a given accession.
     *
     * @param b the stream where to write
     * @param accession the accession of interest
     * @param sequenceFactory the sequence factory
     * @param accessionOnly indicate whether only the accession shall be written
     * or the entire protein details in FASTA format
     *
     * @throws IOException
     * @throws IllegalArgumentException
     * @throws InterruptedException
     * @throws FileNotFoundException
     * @throws ClassNotFoundException
     */
    private static void writeAccession(BufferedWriter b, String accession, SequenceFactory sequenceFactory, boolean accessionOnly)
            throws IOException, IllegalArgumentException, InterruptedException, FileNotFoundException, ClassNotFoundException {

        if (accessionOnly) {
            b.write(accession);
            b.newLine();
        } else {
            b.write(sequenceFactory.getHeader(accession).getRawHeader());
            b.newLine();
            b.write(sequenceFactory.getProtein(accession).getSequence());
            b.newLine();
        }
    }

    /**
     * Enum of the different types of export implemented.
     */
    public enum ExportType {

        /**
         * Exports the main accession of validated protein groups.
         */
        validated_main_accession(0, "Main Accession of Validated Protein Groups"),
        /**
         * Exports all accessions of validated protein groups.
         */
        validated_all_accessions(1, "All Accessions of Validated Protein Groups"),
        /**
         * Exports accessions which cannot be mapped to a protein group.
         */
        non_validated(2, "Non-Validated Accessions");
        /**
         * Index for the export type.
         */
        public int index;
        /**
         * Description of the export.
         */
        public String description;

        /**
         * Constructor.
         *
         * @param index
         */
        private ExportType(int index, String description) {
            this.index = index;
            this.description = description;
        }

        /**
         * Returns the export type corresponding to a given index.
         *
         * @param index the index of interest
         * @return the export type
         */
        public static ExportType getTypeFromIndex(int index) {
            if (index == validated_main_accession.index) {
                return validated_main_accession;
            } else if (index == validated_all_accessions.index) {
                return validated_all_accessions;
            } else if (index == non_validated.index) {
                return non_validated;
            } else {
                throw new IllegalArgumentException("Export type " + index + " not implemented.");
            }
        }

        /**
         * Returns all possibilities descriptions in an array of string. Tip:
         * the position in the array corresponds to the type index.
         *
         * @return all possibilities descriptions in an array of string
         */
        public static String[] getPossibilities() {
            return new String[]{validated_main_accession.description, validated_all_accessions.description, non_validated.description};
        }

        /**
         * Returns a description of the command line arguments.
         *
         * @return a description of the command line arguments
         */
        public static String getCommandLineOptions() {
            return validated_main_accession.index + ": " + validated_main_accession.description + " (default), "
                    + validated_all_accessions.index + ": " + validated_all_accessions.description + ", "
                    + non_validated.index + ": " + non_validated.description + ".";
        }
    }
}
TOP

Related Classes of eu.isas.peptideshaker.followup.FastaExport

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.