Package org.molgenis.util.plink.drivers

Source Code of org.molgenis.util.plink.drivers.BedFileDriver

package org.molgenis.util.plink.drivers;

import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;

/**
* Driver to query BED (binary Plink genotype) files. See:
* http://pngu.mgh.harvard.edu/~purcell/plink/binary.shtml
*/
public class BedFileDriver
{
  private int mode;
  private long nrOfElements;
  private File bedFile;

  /**
   * Get the mode: mode 1 = SNP-major, mode 0 = individual-major
   *
   * @return
   */
  public int getMode()
  {
    return mode;
  }

  /**
   * Get the number of retrievable genotype elements of this BED file. Does
   * not account for trailing null elements because they are indistinguishable
   * from null genotypes in this format alone.
   *
   * @return
   */
  public long getNrOfElements()
  {
    return nrOfElements;
  }

  /**
   * Construct new convertGenoCoding on this file
   *
   * @param bedFile
   * @throws Exception
   */
  public BedFileDriver(File bedFile) throws Exception
  {
    RandomAccessFile raf = new RandomAccessFile(bedFile, "r");
    try
    {
      this.bedFile = bedFile;

      byte mn1 = raf.readByte();
      byte mn2 = raf.readByte();

      if (mn1 == 108 && mn2 == 27) // tested, bit code 01101100 00011011
      {
        // System.out.println("Plink magic number valid");
      }
      else
      {
        throw new Exception("Invalid Plink magic number");
      }

      byte bmode = raf.readByte();

      if (bmode == 1) // tested, bit code 00000001
      {
        // System.out.println("mode 1: SNP-major");
      }
      else if (bmode == 0) // assumed... bit code 00000000
      {
        // System.out.println("mode 0: individual-major");
      }
      else
      {
        throw new Exception("Mode not recognized: " + bmode);
      }

      this.mode = bmode;
      this.nrOfElements = (raf.length() - 3) * 4;
    }
    finally
    {
      raf.close();
    }
  }

  /**
   * Convert bit coding in custom genotype coding.
   *
   * @param in
   * @param hom1
   * @param hom2
   * @param het
   * @param _null
   * @return
   * @throws Exception
   */
  public String convertGenoCoding(String in, String hom1, String hom2, String het, String _null) throws Exception
  {
    if (in.equals("00"))
    {
      return hom1;
    }
    if (in.equals("01"))
    {
      return het;
    }
    if (in.equals("11"))
    {
      return hom2;
    }
    if (in.equals("10"))
    {
      return _null;
    }
    throw new Exception("Input '" + in + "' not recognized");
  }

  /**
   * Convert bit coding in common genotype signs: A & B for homozygotes, H for
   * heterozygote, N for null.
   *
   * @param in
   * @return
   * @throws Exception
   */
  public String genoCodingCommon(String in) throws Exception
  {
    return convertGenoCoding(in, "A", "B", "H", "N");
  }

  /**
   * Get a single element from the BED file
   *
   * @param index
   * @return
   * @throws Exception
   */
  public String getElement(long index) throws Exception
  {
    // throw new Exception("fixme!");
    RandomAccessFile raf = new RandomAccessFile(bedFile, "r");
    raf.seek((index / 4) + 3);
    String byteString = reverse(bits(raf.readByte()));
    raf.close();
    int bitpair = (int) (index % 4) * 2;
    return byteString.substring(bitpair, bitpair + 2);
  }

  /**
   * Get a String[] of elements from the BED file.
   *
   * from = inclusive to = exclusive
   *
   * @param from
   * @param to
   * @return
   * @throws IOException
   */
  public String[] getElements(long from, long to, int paddingBitpairs, int pass) throws IOException
  {
    double paddingFraction = paddingBitpairs / 4.0;
    // Start byte = byte position of start individual, corrected for padding
    // 0's that get added at every SNP:
    long start = (long) ((from / 4.0) - (pass * paddingFraction) + pass + 3);
    // Stop byte = byte position after last individual, corrected for
    // padding 0's that get added at every SNP:
    long stop = (long) ((to / 4.0) - ((pass + 1) * paddingFraction) + (pass + 1) + 3);
    byte[] res = new byte[(int) (stop - start)];
    int res_index = 0;
    String[] result = new String[(int) (to - from)]; // to - from = nr. of
                              // individuals

    RandomAccessFile raf = new RandomAccessFile(bedFile, "r");
    raf.seek(start);
    raf.read(res);
    raf.close();

    for (int i = 0; i < res.length; i++)
    {
      byte b = res[i];
      String byteString = reverse(bits(b));

      int toBit = 8; // normally we take the whole byte
      if (i == res.length - 1) // except at the end, when we correct for
                    // padding 0's
      {
        // At the end, the string is padded with 0's -> check
        for (int j = paddingBitpairs * 2; j < 8; j++)
        {
          if (byteString.charAt(j) != '0')
          {
            throw new IOException("Fatal error: padding 0's not present where expected!");
          }
        }
        toBit -= (paddingBitpairs * 2);
      }

      for (int pair = 0; pair < toBit; pair += 2)
      {
        // System.out.print(res_index + " ");
        result[res_index++] = byteString.substring(pair, pair + 2);
      }
    }
    // System.out.println("");
    return result;
  }

  /**
   * Helper function to get the bit values
   *
   * @param b
   * @return
   */
  private String bits(byte b)
  {
    StringBuilder bitsBuilder = new StringBuilder();
    for (int bit = 7; bit >= 0; --bit)
    {
      bitsBuilder.append(((b >>> bit) & 1));
    }
    return bitsBuilder.toString();
  }

  /**
   * Helper function to reverse a string
   *
   * @param string
   * @return
   */
  private String reverse(String string)
  {
    return new StringBuffer(string).reverse().toString();
  }

}
TOP

Related Classes of org.molgenis.util.plink.drivers.BedFileDriver

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.