Package net.sf.cram

Source Code of net.sf.cram.ReadTag

/*******************************************************************************
* Copyright 2012 EMBL-EBI, Hinxton outstation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package net.sf.cram;

import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.Charset;
import java.util.Arrays;

import net.sf.samtools.SAMBinaryTagAndUnsignedArrayValue;
import net.sf.samtools.SAMBinaryTagAndValue;
import net.sf.samtools.SAMException;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFormatException;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMRecord.SAMTagAndValue;
import net.sf.samtools.SAMTagUtil;
import net.sf.samtools.TagValueAndUnsignedArrayFlag;
import net.sf.samtools.util.StringUtil;

public class ReadTag implements Comparable<ReadTag> {
  private static final long MAX_INT = Integer.MAX_VALUE;
  private static final long MAX_UINT = MAX_INT * 2 + 1;
  private static final long MAX_SHORT = Short.MAX_VALUE;
  private static final long MAX_USHORT = MAX_SHORT * 2 + 1;
  private static final long MAX_BYTE = Byte.MAX_VALUE;
  private static final long MAX_UBYTE = MAX_BYTE * 2 + 1;
 
  public static final int OQZ = (('O' << 16) | ('Q' << 8)) | 'Z'
  public static final int BQZ = (('B' << 16) | ('Q' << 8)) | 'Z'

  // non-null
  private String key;
  private String keyAndType;
  public String keyType3Bytes;
  public int keyType3BytesAsInt;
  private char type;
  private Object value;
  public short code;
  private byte index;

  public ReadTag(int id, byte[] dataAsByteArray) {
    this.type = (char) (0xFF & id);
    key = new String(new char[] { (char) ((id >> 16) & 0xFF),
        (char) ((id >> 8) & 0xFF) });
    value = restoreValueFromByteArray(type, dataAsByteArray);
    keyType3Bytes = this.key + this.type;

    keyType3BytesAsInt = id;

    code = SAMTagUtil.getSingleton().makeBinaryTag(this.key);
  }

  public ReadTag(String key, char type, Object value) {
    if (key == null)
      throw new NullPointerException("Tag key cannot be null.");
    if (value == null)
      throw new NullPointerException("Tag value cannot be null.");

    this.value = value;

    if (key.length() == 2) {
      this.key = key;
      this.type = type;
      // this.type = getTagValueType(value);
      keyAndType = key + ":" + getType();
    } else if (key.length() == 4) {
      this.key = key.substring(0, 2);
      this.type = key.charAt(3);
    }

    keyType3Bytes = this.key + this.type;
    keyType3BytesAsInt = nameType3BytesToInt(this.key, this.type);

    code = SAMTagUtil.getSingleton().makeBinaryTag(this.key);
  }
 
  public static int name3BytesToInt(byte[] name) {
    int value = 0xFF & name[0];
    value <<= 8;
    value |= 0xFF & name[1];
    value <<= 8;
    value |= 0xFF & name[2];

    return value;
  }

  public static int nameType3BytesToInt(String name, char type) {
    int value = 0xFF & name.charAt(0);
    value <<= 8;
    value |= 0xFF & name.charAt(1);
    value <<= 8;
    value |= 0xFF & type;

    return value;
  }

  public static String intToNameType3Bytes(int value) {
    byte b3 = (byte) (0xFF & value);
    byte b2 = (byte) (0xFF & (value >> 8));
    byte b1 = (byte) (0xFF & (value >> 16));

    return new String(new byte[] { b1, b2, b3 });
  }

  public static String intToNameType4Bytes(int value) {
    byte b3 = (byte) (0xFF & value);
    byte b2 = (byte) (0xFF & (value >> 8));
    byte b1 = (byte) (0xFF & (value >> 16));

    return new String(new byte[] { b1, b2, ':', b3 });
  }

  public SAMTagAndValue createSAMTag() {
    return new SAMTagAndValue(key, value);
  }

  public static ReadTag deriveTypeFromKeyAndType(String keyAndType,
      Object value) {
    if (keyAndType.length() != 4)
      throw new RuntimeException("Tag key and type must be 4 char long: "
          + keyAndType);

    return new ReadTag(keyAndType.substring(0, 2), keyAndType.charAt(3),
        value);
  }

  public static ReadTag deriveTypeFromValue(String key, Object value) {
    if (key.length() != 2)
      throw new RuntimeException("Tag key must be 2 char long: " + key);

    return new ReadTag(key, getTagValueType(value), value);
  }

  public String getKey() {
    return key;
  }

  @Override
  public int compareTo(ReadTag o) {
    return key.compareTo(o.key);
  }

  @Override
  public boolean equals(Object obj) {
    if (!(obj instanceof ReadTag))
      return false;

    ReadTag foe = (ReadTag) obj;
    if (!key.equals(foe.key))
      return false;
    if (value == null && foe.value == null)
      return true;
    if (value != null && value.equals(foe.value))
      return true;

    return false;
  }

  @Override
  public int hashCode() {
    return key.hashCode();
  }

  public Object getValue() {
    return value;
  }

  public char getType() {
    return type;
  }

  public String getKeyAndType() {
    return keyAndType;
  }

  public byte[] getValueAsByteArray() {
    return writeSingleValue((byte)type, value, false) ;
  }

  public static Object restoreValueFromByteArray(char type, byte[] array) {
    ByteBuffer buf = ByteBuffer.wrap(array) ;
    buf.order(ByteOrder.LITTLE_ENDIAN) ;
    return readSingleValue((byte) type, buf, null) ;
  }

  // copied from net.sf.samtools.BinaryTagCodec 1.62:
  public static char getTagValueType(final Object value) {
    if (value instanceof String) {
      return 'Z';
    } else if (value instanceof Character) {
      return 'A';
    } else if (value instanceof Float) {
      return 'f';
    } else if (value instanceof Number) {
      if (!(value instanceof Byte || value instanceof Short
          || value instanceof Integer || value instanceof Long)) {
        throw new IllegalArgumentException("Unrecognized tag type "
            + value.getClass().getName());
      }
      return getIntegerType(((Number) value).longValue());
    } /*
     * Note that H tag type is never written anymore, because B style is
     * more compact. else if (value instanceof byte[]) { return 'H'; }
     */
    else if (value instanceof byte[] || value instanceof short[]
        || value instanceof int[] || value instanceof float[]) {
      return 'B';
    } else {
      throw new IllegalArgumentException(
          "When writing BAM, unrecognized tag type "
              + value.getClass().getName());
    }
  }

  // copied from net.sf.samtools.BinaryTagCodec:
  static private char getIntegerType(final long val) {
    if (val > MAX_UINT) {
      throw new IllegalArgumentException(
          "Integer attribute value too large to be encoded in BAM");
    }
    if (val > MAX_INT) {
      return 'I';
    }
    if (val > MAX_USHORT) {
      return 'i';
    }
    if (val > MAX_SHORT) {
      return 'S';
    }
    if (val > MAX_UBYTE) {
      return 's';
    }
    if (val > MAX_BYTE) {
      return 'C';
    }
    if (val >= Byte.MIN_VALUE) {
      return 'c';
    }
    if (val >= Short.MIN_VALUE) {
      return 's';
    }
    if (val >= Integer.MIN_VALUE) {
      return 'i';
    }
    throw new IllegalArgumentException(
        "Integer attribute value too negative to be encoded in BAM");
  }

  public void setIndex(byte i) {
    this.index = i;
  }

  public byte getIndex() {
    return index;
  }

  // yeah, I'm that risky:
  private static final ByteBuffer buf = ByteBuffer
      .allocateDirect(1024 * 1024);
  static {
    buf.order(ByteOrder.LITTLE_ENDIAN);
  }
  private static final Charset charset = Charset.forName("US-ASCII");

  public static byte[] writeSingleValue(byte tagType, Object value,
      boolean isUnsignedArray) {

    buf.clear();
    switch (tagType) {
    case 'Z':
      String s = (String) value;
      buf.put(s.getBytes(charset));
      buf.put((byte) 0);
      break;
    case 'A':
      buf.put((byte) ((Character) value).charValue());
      break;
    case 'I':
      // this is tricky:
      buf.putLong((Long) value);
      buf.position(buf.position() - 4);
      break;
    case 'i':
      buf.putInt((Integer) value);
      break;
    case 's':
      buf.putShort(((Number) value).shortValue());
      break;
    case 'S':
      // Convert to unsigned short stored in an int
      buf.putInt(((Number) value).intValue());
      buf.position(buf.position() - 2);
      break;
    case 'c':
      buf.put(((Number) value).byteValue());
      break;
    case 'C':
      // Convert to unsigned byte stored in an int
      buf.putShort(((Integer) value).shortValue());
      buf.position(buf.position() - 1);
      break;
    case 'f':
      buf.putFloat((Float) value);
      break;
    case 'H':
      s = StringUtil.bytesToHexString((byte[]) value);
      buf.put(s.getBytes(charset));
      buf.put((byte) 0);
      break;
    case 'B':
      writeArray(value, isUnsignedArray, buf);
      break;
    default:
      throw new SAMFormatException("Unrecognized tag type: "
          + (char) tagType);
    }

    buf.flip();
    byte[] bytes = new byte[buf.limit()];
    buf.get(bytes);

    return bytes;
  }

  private static void writeArray(final Object value,
      final boolean isUnsignedArray, ByteBuffer buf) {
    if (value instanceof byte[]) {
      buf.put((byte) (isUnsignedArray ? 'C' : 'c'));
      final byte[] array = (byte[]) value;
      buf.putInt(array.length);
      for (final byte element : array)
        buf.put(element);

    } else if (value instanceof short[]) {
      buf.put((byte) (isUnsignedArray ? 'S' : 's'));
      final short[] array = (short[]) value;
      buf.putInt(array.length);
      for (final short element : array)
        buf.putShort(element);

    } else if (value instanceof int[]) {
      buf.put((byte) (isUnsignedArray ? 'I' : 'i'));
      final int[] array = (int[]) value;
      buf.putInt(array.length);
      for (final int element : array)
        buf.putInt(element);

    } else if (value instanceof float[]) {
      buf.put((byte) 'f');
      final float[] array = (float[]) value;
      buf.putInt(array.length);
      for (final float element : array)
        buf.putFloat(element);

    } else
      throw new SAMException("Unrecognized array value type: "
          + value.getClass());
  }

  public static Object readSingleValue(final byte tagType,
      final ByteBuffer byteBuffer,
      final SAMFileReader.ValidationStringency validationStringency) {
    switch (tagType) {
    case 'Z':
      return readNullTerminatedString(byteBuffer);
    case 'A':
      return (char) byteBuffer.get();
    case 'I':
      final long val = byteBuffer.getInt() & 0xffffffffL;
      if (val <= Integer.MAX_VALUE) {
        return (int) val;
      }
      throw new RuntimeException(
          "Tag value is too large to store as signed integer.");
    case 'i':
      return byteBuffer.getInt();
    case 's':
      return (int) byteBuffer.getShort();
    case 'S':
      // Convert to unsigned short stored in an int
      return byteBuffer.getShort() & 0xffff;
    case 'c':
      return (int) byteBuffer.get();
    case 'C':
      // Convert to unsigned byte stored in an int
      return (int) byteBuffer.get() & 0xff;
    case 'f':
      return byteBuffer.getFloat();
    case 'H':
      final String hexRep = readNullTerminatedString(byteBuffer);
      return StringUtil.hexStringToBytes(hexRep);
    case 'B':
      final TagValueAndUnsignedArrayFlag valueAndFlag = readArray(
          byteBuffer, validationStringency);
      return valueAndFlag.value ;
    default:
      throw new SAMFormatException("Unrecognized tag type: "
          + (char) tagType);
    }
  }

  /**
   * Read value of specified type.
   *
   * @param byteBuffer
   *            Little-ending byte buffer to read value from.
   * @return CVO containing the value in in-memory Object form, and a flag
   *         indicating whether it is unsigned or not.
   */
  private static TagValueAndUnsignedArrayFlag readArray(
      final ByteBuffer byteBuffer,
      final SAMFileReader.ValidationStringency validationStringency) {
    final byte arrayType = byteBuffer.get();
    final boolean isUnsigned = Character.isUpperCase(arrayType);
    final int length = byteBuffer.getInt();
    final Object value;
    switch (Character.toLowerCase(arrayType)) {
    case 'c': {
      final byte[] array = new byte[length];
      value = array;
      byteBuffer.get(array);
      break;
    }
    case 's': {
      final short[] array = new short[length];
      value = array;
      for (int i = 0; i < length; ++i) {
        array[i] = byteBuffer.getShort();
      }
      break;
    }

    case 'i': {
      final int[] array = new int[length];
      value = array;
      for (int i = 0; i < length; ++i) {
        array[i] = byteBuffer.getInt();
      }
      break;
    }

    case 'f': {
      final float[] array = new float[length];
      value = array;
      for (int i = 0; i < length; ++i) {
        array[i] = byteBuffer.getFloat();
      }
      break;
    }

    default:
      throw new SAMFormatException("Unrecognized tag array type: "
          + (char) arrayType);
    }
    return new TagValueAndUnsignedArrayFlag(value, isUnsigned);
  }

  private static String readNullTerminatedString(final ByteBuffer byteBuffer) {
    // Count the number of bytes in the string
    byteBuffer.mark();
    final int startPosition = byteBuffer.position();
    while (byteBuffer.get() != 0) {
    }
    final int endPosition = byteBuffer.position();

    // Don't count null terminator
    final byte[] buf = new byte[endPosition - startPosition - 1];
    // Go back to the start of the string and read out the bytes
    byteBuffer.reset();
    byteBuffer.get(buf);
    // Skip over the null terminator
    byteBuffer.get();
    return StringUtil.bytesToString(buf);
  }

  public static void main(String[] args) {
    SAMFileHeader h = new SAMFileHeader();
    SAMRecord r = new SAMRecord(h);
    r.setAttribute("OQ", "A:LKAS:LKASDJKL".getBytes());
    r.setAttribute("XA", 1333123);
    r.setAttribute("XB", (byte) 31);
    r.setAttribute("XB", 'Q');
    r.setAttribute("XC", "A STRING");

    int intValue = 1123123123;
    byte[] data = writeSingleValue((byte) 'i', intValue, false);
    ByteBuffer byteBuffer = ByteBuffer.wrap(data);
    byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
    Object value = readSingleValue((byte) 'i', byteBuffer, null);
    if (intValue != ((Integer) value).intValue())
      throw new RuntimeException("Failed for " + intValue);

    String sValue = "qwe";
    data = writeSingleValue((byte) 'Z', sValue, false);
    byteBuffer = ByteBuffer.wrap(data);
    byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
    value = readSingleValue((byte) 'Z', byteBuffer, null);
    if (!sValue.equals(value))
      throw new RuntimeException("Failed for " + sValue);

    byte[] baValue = "qwe".getBytes();
    data = writeSingleValue((byte) 'B', baValue, false);
    byteBuffer = ByteBuffer.wrap(data);
    byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
    value = readSingleValue((byte) 'B', byteBuffer, null);
    if (!Arrays.equals(baValue, (byte[]) value))
      throw new RuntimeException("Failed for " + baValue);
  }

}
TOP

Related Classes of net.sf.cram.ReadTag

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.