Package eu.isas.peptideshaker.followup

Source Code of eu.isas.peptideshaker.followup.TrainingExport

package eu.isas.peptideshaker.followup;

import com.compomics.util.Util;
import com.compomics.util.experiment.identification.Identification;
import com.compomics.util.experiment.identification.matches.SpectrumMatch;
import com.compomics.util.experiment.massspectrometry.MSnSpectrum;
import com.compomics.util.experiment.massspectrometry.Spectrum;
import com.compomics.util.experiment.massspectrometry.SpectrumFactory;
import com.compomics.util.waiting.WaitingHandler;
import com.compomics.util.preferences.AnnotationPreferences;
import static eu.isas.peptideshaker.followup.RecalibrationExporter.getRecalibratedFileName;
import eu.isas.peptideshaker.myparameters.PSMaps;
import eu.isas.peptideshaker.myparameters.PSParameter;
import eu.isas.peptideshaker.recalibration.SpectrumRecalibrator;
import eu.isas.peptideshaker.scoring.PsmSpecificMap;
import eu.isas.peptideshaker.scoring.targetdecoy.TargetDecoyResults;
import eu.isas.peptideshaker.scoring.targetdecoy.TargetDecoySeries;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import uk.ac.ebi.jmzml.xml.io.MzMLUnmarshallerException;

/**
* This class exports de novo training files.
*
* @author Marc Vaudel
*/
public class TrainingExport {

    /**
     * Suffix for the mgf file containing annotated spectra making the "good
     * training" set.
     */
    public static final String goodTrainingSetSuffix = "_good_training";
    /**
     * Suffix for the mgf file containing spectra making the "bad training" set.
     */
    public static final String badTrainingSetSuffix = "_bad_training";

    /**
     * Exports the PepNovo training files using the validation settings,
     * eventually recalibrated with the recalibrated mgf.
     *
     * @param destinationFolder the folder where to write the output files
     * @param identification the identification
     * @param annotationPreferences the annotation preferences. Only necessary
     * in recalibration mode.
     * @param recalibrate boolean indicating whether the files shall be
     * recalibrated
     * @param waitingHandler waiting handler displaying progress to the user and
     * allowing canceling the process
     * @throws IOException
     * @throws MzMLUnmarshallerException
     * @throws SQLException
     * @throws ClassNotFoundException
     * @throws InterruptedException
     */
    public static void exportPepnovoTrainingFiles(File destinationFolder, Identification identification, AnnotationPreferences annotationPreferences,
            boolean recalibrate, WaitingHandler waitingHandler) throws IOException, MzMLUnmarshallerException, SQLException, ClassNotFoundException, InterruptedException {
        exportPepnovoTrainingFiles(destinationFolder, identification, annotationPreferences, null, null, recalibrate, waitingHandler);
    }

    /**
     * Exports the PepNovo training files, eventually recalibrated with the
     * recalibrated mgf.
     *
     * @param destinationFolder the folder where to write the output files
     * @param identification the identification
     * @param annotationPreferences the annotation preferences. Only necessary
     * in recalibration mode.
     * @param fdr the false discovery rate to use for the selection of "good"
     * spectra. Can be null, then the value used for validation is used.
     * @param fnr the false negative rate to use for the selection fo "bad"
     * spectra. Can be null, then the same value as for the fdr parameter is
     * used.
     * @param recalibrate boolean indicating whether the files shall be
     * recalibrated
     * @param waitingHandler waiting handler displaying progress to the user and
     * allowing canceling the process
     * @throws IOException
     * @throws MzMLUnmarshallerException
     * @throws SQLException
     * @throws ClassNotFoundException
     * @throws InterruptedException
     */
    public static void exportPepnovoTrainingFiles(File destinationFolder, Identification identification, AnnotationPreferences annotationPreferences, Double fdr, Double fnr,
            boolean recalibrate, WaitingHandler waitingHandler) throws IOException, MzMLUnmarshallerException, SQLException, ClassNotFoundException, InterruptedException {

        SpectrumFactory spectrumFactory = SpectrumFactory.getInstance();
        SpectrumRecalibrator spectrumRecalibrator = new SpectrumRecalibrator();

        PSMaps psMaps = new PSMaps();
        psMaps = (PSMaps) identification.getUrParam(psMaps);
        PsmSpecificMap psmTargetDecoyMap = psMaps.getPsmSpecificMap();

        int progress = 1;

        for (String fileName : spectrumFactory.getMgfFileNames()) {

            if (recalibrate) {
                if (waitingHandler != null) {
                    if (waitingHandler.isRunCanceled()) {
                        break;
                    }

                    waitingHandler.setWaitingText("Recalibrating Spectra. Please Wait... (" + progress + "/" + spectrumFactory.getMgfFileNames().size() + ")");
                    waitingHandler.setSecondaryProgressCounter(0);
                    waitingHandler.setSecondaryProgressCounterIndeterminate(false);
                    waitingHandler.setMaxSecondaryProgressCounter(2 * spectrumFactory.getNSpectra(fileName));
                }

                spectrumRecalibrator.estimateErrors(fileName, identification, annotationPreferences, waitingHandler);
            }

            PSParameter psParameter = new PSParameter();
            identification.loadSpectrumMatchParameters(fileName, psParameter, waitingHandler);

            if (waitingHandler != null) {
                if (waitingHandler.isRunCanceled()) {
                    return;
                }
                waitingHandler.setWaitingText("Selecting Good PSMs. Please Wait... (" + progress + "/" + spectrumFactory.getMgfFileNames().size() + ")");
                // reset the progress bar
                waitingHandler.resetSecondaryProgressCounter();
                waitingHandler.setMaxSecondaryProgressCounter(spectrumFactory.getSpectrumTitles(fileName).size());
            }

            ArrayList<String> keys = new ArrayList<String>();
            for (String spectrumTitle : spectrumFactory.getSpectrumTitles(fileName)) {

                String spectrumKey = Spectrum.getSpectrumKey(fileName, spectrumTitle);
                if (identification.matchExists(spectrumKey)) {
                    psParameter = (PSParameter) identification.getSpectrumMatchParameter(spectrumKey, psParameter);
                    Integer charge = new Integer(psParameter.getSpecificMapKey());
                    String spectrumFile = Spectrum.getSpectrumFile(spectrumKey);
                    Double fdrThreshold = getFdrThreshold(psmTargetDecoyMap, charge, spectrumTitle, fdr);
                    double confidenceLevel = getHighConfidenceThreshold(psmTargetDecoyMap, charge, spectrumFile, fdrThreshold);
                    if (psParameter.getPsmConfidence() >= confidenceLevel) {
                        keys.add(spectrumKey);
                    }
                }
                if (waitingHandler != null) {
                    if (waitingHandler.isRunCanceled()) {
                        return;
                    }
                    waitingHandler.increaseSecondaryProgressCounter();
                }
            }
            if (waitingHandler != null) {
                if (waitingHandler.isRunCanceled()) {
                    return;
                }
                waitingHandler.setWaitingText("Loading PSMs. Please Wait... (" + progress + "/"
                        + spectrumFactory.getMgfFileNames().size() + ")");
            }
            identification.loadSpectrumMatches(keys, waitingHandler);

            if (waitingHandler != null) {
                if (waitingHandler.isRunCanceled()) {
                    return;
                }
                waitingHandler.setWaitingText("Exporting PepNovo Training Files. Please Wait... (" + progress + "/" + spectrumFactory.getMgfFileNames().size() + ").");
                // reset the progress bar
                waitingHandler.resetSecondaryProgressCounter();
                waitingHandler.setMaxSecondaryProgressCounter(spectrumFactory.getSpectrumTitles(fileName).size());
            }

            File file = new File(destinationFolder, getGoodSetFileName(fileName));
            BufferedWriter writerGood = new BufferedWriter(new FileWriter(file));
            file = new File(destinationFolder, getBadSetFileName(fileName));
            BufferedWriter writerBad = new BufferedWriter(new FileWriter(file));
            BufferedWriter writerRecalibration = null;
            if (recalibrate) {
                file = new File(destinationFolder, getRecalibratedFileName(fileName));
                writerRecalibration = new BufferedWriter(new FileWriter(file));
            }

            try {
                for (String spectrumTitle : spectrumFactory.getSpectrumTitles(fileName)) {

                    String spectrumKey = Spectrum.getSpectrumKey(fileName, spectrumTitle);

                    MSnSpectrum spectrum = null;
                    if (recalibrate) {
                        spectrum = spectrumRecalibrator.recalibrateSpectrum(fileName, spectrumTitle, true, true);
                        spectrum.writeMgf(writerRecalibration);
                    }
                    if (identification.matchExists(spectrumKey)) {
                        psParameter = (PSParameter) identification.getSpectrumMatchParameter(spectrumKey, psParameter);
                        Integer charge = new Integer(psParameter.getSpecificMapKey());
                        String spectrumFile = Spectrum.getSpectrumFile(spectrumKey);
                        Double fdrThreshold = getFdrThreshold(psmTargetDecoyMap, charge, spectrumTitle, fdr);
                        double confidenceLevel = getHighConfidenceThreshold(psmTargetDecoyMap, charge, spectrumFile, fdrThreshold);
                        if (psParameter.getPsmConfidence() >= confidenceLevel) {
                            if (spectrum == null) {
                                spectrum = (MSnSpectrum) spectrumFactory.getSpectrum(spectrumKey);
                            }
                            SpectrumMatch spectrumMatch = identification.getSpectrumMatch(spectrumKey);
                            if (spectrumMatch.getBestPeptideAssumption() != null) {
                                String sequence = spectrumMatch.getBestPeptideAssumption().getPeptide().getSequence();
                                HashMap<String, String> tags = new HashMap<String, String>();
                                tags.put("SEQ", sequence);
                                spectrum.writeMgf(writerGood, tags);
                            }
                        }
                        confidenceLevel = getLowConfidenceThreshold(psmTargetDecoyMap, charge, spectrumFile, fnr, fdrThreshold);
                        if (psParameter.getPsmConfidence() <= confidenceLevel) {
                            if (spectrum == null) {
                                spectrum = (MSnSpectrum) spectrumFactory.getSpectrum(spectrumKey);
                            }
                            spectrum.writeMgf(writerBad);
                        }
                    }

                    if (waitingHandler != null) {
                        if (waitingHandler.isRunCanceled()) {
                            return;
                        }
                        waitingHandler.increaseSecondaryProgressCounter();
                    }
                }
            } finally {
                writerBad.close();
                writerGood.close();
                if (writerRecalibration != null) {
                    writerRecalibration.close();
                }
            }

            spectrumRecalibrator.clearErrors(fileName);
            progress++;
        }
    }

    /**
     * Returns the fdr threshold to be used
     *
     * @param psmSpecificMap the psm target/decoy scoring map
     * @param charge the charge of the inspected PSM
     * @param spectrumFileName the spectrum file name
     * @param fdr a user defined FDR, can be null
     *
     * @return the fdr threshold to be used
     */
    private static double getFdrThreshold(PsmSpecificMap psmSpecificMap, int charge, String spectrumFileName, Double fdr) {
        double fdrThreshold;
        TargetDecoyResults currentResults = psmSpecificMap.getTargetDecoyMap(charge, spectrumFileName).getTargetDecoyResults();
        if (fdr == null) {
            if (currentResults.getInputType() == 1) {
                fdrThreshold = currentResults.getUserInput();
            } else {
                fdrThreshold = currentResults.getFdrLimit();
            }
        } else {
            fdrThreshold = fdr;
        }
        return fdrThreshold;
    }

    /**
     * Returns the high confidence threshold
     *
     * @param psmSpecificMap the psm target/decoy scoring map
     * @param charge the charge of the inspected PSM
     * @param spectrumFileName the spectrum file name
     * @param fdrThreshold the fdr threshold to be used
     *
     * @return the confidence threshold corresponding to this match at the
     * desired FDR
     */
    private static double getHighConfidenceThreshold(PsmSpecificMap psmSpecificMap, int charge, String spectrumFileName, Double fdrThreshold) {
        TargetDecoyResults trainingResults = new TargetDecoyResults();
        trainingResults.setClassicalEstimators(true);
        trainingResults.setClassicalValidation(true);
        trainingResults.setFdrLimit(fdrThreshold);
        TargetDecoySeries currentSeries = psmSpecificMap.getTargetDecoyMap(charge, spectrumFileName).getTargetDecoySeries();
        currentSeries.getFDRResults(trainingResults);
        return trainingResults.getConfidenceLimit();
    }

    /**
     * Returns the low confidence threshold
     *
     * @param psmSpecificMap the psm target/decoy scoring map
     * @param charge the charge of the inspected PSM
     * @param spectrumFileName the spectrum file name
     * @param fnr a user defined FNR threshold, can be null
     * @param fdrThreshold the fdr threshold used
     *
     * @return the confidence threshold corresponding to this match at the
     * desired FNR
     */
    private static double getLowConfidenceThreshold(PsmSpecificMap psmSpecificMap, int charge, String spectrumFileName, Double fnr, double fdrThreshold) {
        double fnrThreshold;
        if (fnr == null) {
            fnrThreshold = fdrThreshold;
        } else {
            fnrThreshold = fnr;
        }
        TargetDecoyResults trainingResults = new TargetDecoyResults();
        trainingResults.setClassicalEstimators(true);
        trainingResults.setClassicalValidation(true);
        trainingResults.setFnrLimit(fnrThreshold);
        TargetDecoySeries currentSeries = psmSpecificMap.getTargetDecoyMap(charge, spectrumFileName).getTargetDecoySeries();
        currentSeries.getFNRResults(trainingResults);
        return trainingResults.getConfidenceLimit();
    }

    /**
     * Returns the name of the good training set file.
     *
     * @param fileName the original file name
     * @return the name of the good training set file
     */
    public static String getGoodSetFileName(String fileName) {
        return Util.appendSuffix(fileName, goodTrainingSetSuffix);
    }

    /**
     * Returns the name of the bad training set file.
     *
     * @param fileName the original file name
     * @return the name of the bad training set file
     */
    public static String getBadSetFileName(String fileName) {
        return Util.appendSuffix(fileName, badTrainingSetSuffix);
    }
}
TOP

Related Classes of eu.isas.peptideshaker.followup.TrainingExport

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.