Package uk.ac.cam.ch.wwmm.ptclib.xml

Examples of uk.ac.cam.ch.wwmm.ptclib.xml.StandoffTable


   * @param oscarSafDoc The Oscar3 SAF document.
   * @throws Exception
   */
  public static void processGeniaAndOscarSafs(Document sourceXML, Document geniaSafDoc, Document oscarSafDoc) throws Exception {
    XMLSpanTagger.tagUpDocument(sourceXML.getRootElement(), "a");
    StandoffTable st = new StandoffTable(sourceXML.getRootElement());
   
    String chunkStart = null;
    String chunkType = null;
    String neStart = null;
    String neType = null;
    String lastEnd = null;
    String docStr = sourceXML.getValue();
   
    List<Element> chunks = new ArrayList<Element>();
    List<Element> nes = new ArrayList<Element>();
   
    Map<String, Element> toToElem = new HashMap<String, Element>();
   
    for(int i=0;i<geniaSafDoc.getRootElement().getChildCount();i++) {
      Element e = (Element)geniaSafDoc.getRootElement().getChild(i);
      if(e.getAttributeValue("type").equals("sentence")) continue;
      toToElem.put(e.getAttributeValue("to"), e);
      String chunk = e.query("slot[@name='chunk']").get(0).getValue();
      String ne = e.query("slot[@name='geniane']").get(0).getValue();
      if(chunk.startsWith("B")) {
        if(chunkStart != null) {
          chunks.add(SafTools.makeAnnot(chunkStart, lastEnd, "chunk", chunkType, docStr.substring(st.getOffsetAtXPoint(chunkStart), st.getOffsetAtXPoint(lastEnd))));
          chunkStart = null;
        }
        chunkStart = e.getAttributeValue("from");
        chunkType = chunk.substring(2);
      } else if(chunk.startsWith("O")) {
        if(chunkStart != null) {
          chunks.add(SafTools.makeAnnot(chunkStart, lastEnd, "chunk", chunkType, docStr.substring(st.getOffsetAtXPoint(chunkStart), st.getOffsetAtXPoint(lastEnd))));
          chunkStart = null;
        }       
      }
      if(ne.startsWith("B")) {
        if(neStart != null) {
          nes.add(SafTools.makeAnnot(neStart, lastEnd, "ne", neType, docStr.substring(st.getOffsetAtXPoint(neStart), st.getOffsetAtXPoint(lastEnd))));
          neStart = null;
        }
        neStart = e.getAttributeValue("from");
        neType = ne.substring(2);
       
       
      } else if(ne.startsWith("O")) {
        if(neStart != null) {
          nes.add(SafTools.makeAnnot(neStart, lastEnd, "ne", neType, docStr.substring(st.getOffsetAtXPoint(neStart), st.getOffsetAtXPoint(lastEnd))));
          neStart = null;
        }       
      }
     
      lastEnd = e.getAttributeValue("to");
    }
    if(chunkStart != null) {
      chunks.add(SafTools.makeAnnot(chunkStart, lastEnd, "chunk", chunkType, docStr.substring(st.getOffsetAtXPoint(chunkStart), st.getOffsetAtXPoint(lastEnd))));
      chunkStart = null;
    }
    if(neStart != null) {
      nes.add(SafTools.makeAnnot(neStart, lastEnd, "ne", neType, docStr.substring(st.getOffsetAtXPoint(neStart), st.getOffsetAtXPoint(lastEnd))));
      neStart = null;
    }

    for(Element e : chunks) geniaSafDoc.getRootElement().appendChild(e);
    for(Element e : nes) geniaSafDoc.getRootElement().appendChild(e);
View Full Code Here


      charPosToCharPos.add(-1);
    }
    if(verbose) System.out.println("Initialised charmap at " + (System.currentTimeMillis() - startTime));
    XMLSpanTagger.tagUpDocument(xml1.getRootElement(), "a");
    XMLSpanTagger.tagUpDocument(xml2.getRootElement(), "a");
    st1 = new StandoffTable(xml1.getRootElement());
    st2 = new StandoffTable(xml2.getRootElement());
    if(verbose) System.out.println("Tags and tables at " + (System.currentTimeMillis() - startTime));
   
    List<Element> alignable = new ArrayList<Element>(elemToElem.keySet());
    Collections.sort(alignable, Collections.reverseOrder(new Comparator<Element>() {
      public int compare(Element o1, Element o2) {
View Full Code Here

public class ValueCharPoint2XMLCharPoint {

  public static Map<Integer,Integer> valueCharPoint2XMLCharPoint(File f) throws Exception {
    Map<String,Integer> pm = Xpoint2Charpoint.parseFile(f);
    Document doc = new Builder().build(f);
    StandoffTable st = new StandoffTable(doc.getRootElement());
   
    int size = st.getSize();
    Map<Integer,Integer> m = new HashMap<Integer,Integer>();
    for(int i=0;i<=size;i++) {
      if(i < size) {
        int cpoint = pm.get(st.getLeftPointAtOffset(i));
        if(m.containsKey(cpoint) && m.get(cpoint) != i) System.out.println(i + "\t" + cpoint + "\t" + m.get(cpoint));
        m.put(cpoint, i);
      }
      if(i > 0) {
        int cpoint = pm.get(st.getRightPointAtOffset(i));
        if(m.containsKey(cpoint) && m.get(cpoint) != i) System.out.println(i + "\t" + cpoint + "\t" + m.get(cpoint));
        m.put(cpoint, i);
      }
    }
    return m;
View Full Code Here

    InlineToSAF nets = new InlineToSAF(neDoc, refDoc, name);
    return nets.getSAF();
  }
 
  private InlineToSAF(Document neDoc, Document refDoc, String name) throws Exception {   
    StandoffTable st = new StandoffTable(refDoc.getRootElement());
    new XMLSpanTagger(neDoc.getRootElement(), "n");

    Element saf = new Element("saf");
    safDoc = new Document(saf);
    saf.addAttribute(new Attribute("document", name));
   
    Nodes nes = neDoc.query("//ne");
    for(int i=0;i<nes.size();i++) {
      Element e = (Element)nes.get(i);
      String xps = st.getLeftPointAtOffset(Integer.parseInt(e.getAttributeValue("xtspanstart")));
      String xpe = st.getRightPointAtOffset(Integer.parseInt(e.getAttributeValue("xtspanend")));

      //Element safElem = new Element("annot");
      Element safElem = SafTools.makeAnnot(xps, xpe, "oscar");
      saf.appendChild(safElem);
      //safElem.addAttribute(new Attribute("from", xps));
View Full Code Here

  public void testStandoffTable() throws Exception {
    Element testXML = XMLBuilder.build("<a> <b /><c /> <c /><b /> <b><c /></b> <c><b /></c> \n" +
        " <b><c>_</c></b> <b /><c>_</c> <c><b />_</c> <b>_<c /></b> <b>_</b><c /> \n" +
        " +++***+++ +++*<d>*</d>*+++ +++<d>***</d>+++ +++<d>**</d>*+++ +++*<d>**</d>+++ \n" +
        " +++*<d />**+++ +++*<d><e>*</e></d>*+++ </a>", "/localhost").getRootElement();
    StandoffTable st = new StandoffTable(testXML);
    st.getElemAtOffset(0);
    assertTrue("No exception thown yet!", true);
  }
View Full Code Here

  public void testGetElemAtOffset() throws Exception {
    Element testXML = XMLBuilder.build("<a>012345678<b>9" +
        "0</b>1234<c>5</c>6789" +
        "<d>0<e>1</e>2</d>3<d><e>45</e>6</d>789" +
        "01<f><g>23</g></f>456<i/>789</a>", "/localhost").getRootElement();
    StandoffTable st = new StandoffTable(testXML);
    assertTrue("No exception thown yet!", true);
    assertEquals("Root element at pos 0", "a", st.getElemAtOffset(0).getLocalName());
    assertEquals("b element at pos 10", "b", st.getElemAtOffset(10).getLocalName());
    assertEquals("Root element at pos 14", "a", st.getElemAtOffset(14).getLocalName());
    assertEquals("c element at pos 15", "c", st.getElemAtOffset(15).getLocalName());
    assertEquals("Root element at pos 16", "a", st.getElemAtOffset(16).getLocalName());
    assertEquals("d element at pos 20", "d", st.getElemAtOffset(20).getLocalName());
    assertEquals("e element at pos 21", "e", st.getElemAtOffset(21).getLocalName());
    assertEquals("d element at pos 22", "d", st.getElemAtOffset(22).getLocalName());
    assertEquals("Root element at pos 23", "a", st.getElemAtOffset(23).getLocalName());   
    assertEquals("e element at pos 25", "e", st.getElemAtOffset(25).getLocalName());
    assertEquals("d element at pos 26", "d", st.getElemAtOffset(26).getLocalName());
    assertEquals("Root element at pos 31", "a", st.getElemAtOffset(31).getLocalName());
    assertEquals("g element at pos 32", "g", st.getElemAtOffset(32).getLocalName());
    assertEquals("g element at pos 33", "g", st.getElemAtOffset(33).getLocalName());
    assertEquals("Root element at pos 34", "a", st.getElemAtOffset(34).getLocalName());
    assertEquals("Root element at pos 36", "a", st.getElemAtOffset(36).getLocalName());
    assertEquals("Root element at pos 37", "a", st.getElemAtOffset(37).getLocalName());
    assertEquals("Root element at pos 39", "a", st.getElemAtOffset(39).getLocalName());
    try {
      st.getElemAtOffset(40);
      assertTrue("Should have caught an Exception by now", false);
    } catch (Exception e) {
      assertTrue("Caught an exception: no position 40", true);
    }
  }
View Full Code Here

  public void testGetLeftPointAtOffset() throws Exception {
    Element testXML = XMLBuilder.build("<a>012345678<b>9" +
        "0</b>1234<c>5</c>6789" +
        "<d>0<e>1</e>2</d>3<d><e>45</e>6</d>789" +
        "01<f><g>23</g></f>456<i/>789</a>", "/localhost").getRootElement();
    StandoffTable st = new StandoffTable(testXML);
    assertTrue("No exception thown yet!", true);
    assertEquals("At pos 0", "/1/1.0", st.getLeftPointAtOffset(0));
    assertEquals("At pos 1", "/1/1.1", st.getLeftPointAtOffset(1));
    assertEquals("At pos 8", "/1/1.8", st.getLeftPointAtOffset(8));
    assertEquals("At pos 9", "/1/2/1.0", st.getLeftPointAtOffset(9));
    assertEquals("At pos 10", "/1/2/1.1", st.getLeftPointAtOffset(10));
    assertEquals("At pos 11", "/1/3.0", st.getLeftPointAtOffset(11));
    assertEquals("At pos 19", "/1/5.3", st.getLeftPointAtOffset(19));
    assertEquals("At pos 20", "/1/6/1.0", st.getLeftPointAtOffset(20));
    assertEquals("At pos 21", "/1/6/2/1.0", st.getLeftPointAtOffset(21));
    assertEquals("At pos 22", "/1/6/3.0", st.getLeftPointAtOffset(22));
    assertEquals("At pos 23", "/1/7.0", st.getLeftPointAtOffset(23));
    assertEquals("At pos 24", "/1/8/1/1.0", st.getLeftPointAtOffset(24));
    assertEquals("At pos 25", "/1/8/1/1.1", st.getLeftPointAtOffset(25));
    assertEquals("At pos 26", "/1/8/2.0", st.getLeftPointAtOffset(26));

  }
View Full Code Here

  public void testGetRightPointAtOffset() throws Exception {
    Element testXML = XMLBuilder.build("<a>012345678<b>9" +
        "0</b>1234<c>5</c>6789" +
        "<d>0<e>1</e>2</d>3<d><e>45</e>6</d>789" +
        "01<f><g>23</g></f>456<i/>789</a>", "/localhost").getRootElement();
    StandoffTable st = new StandoffTable(testXML);
    assertTrue("No exception thown yet!", true);
    try {
      assertEquals("At pos 0", "/1.0", st.getRightPointAtOffset(0));
      assertTrue("This should throw!", false);
    } catch (Exception e) {
      assertTrue("Throws exception OK!", true);
    }
    assertEquals("At pos 1", "/1/1.1", st.getRightPointAtOffset(1));
    assertEquals("At pos 8", "/1/1.8", st.getRightPointAtOffset(8));
    assertEquals("At pos 9", "/1/1.9", st.getRightPointAtOffset(9));
    assertEquals("At pos 10", "/1/2/1.1", st.getRightPointAtOffset(10));
    assertEquals("At pos 11", "/1/2/1.2", st.getRightPointAtOffset(11));
    assertEquals("At pos 19", "/1/5.3", st.getRightPointAtOffset(19));
    assertEquals("At pos 20", "/1/5.4", st.getRightPointAtOffset(20));
    assertEquals("At pos 21", "/1/6/1.1", st.getRightPointAtOffset(21));
    assertEquals("At pos 22", "/1/6/2/1.1", st.getRightPointAtOffset(22));
    assertEquals("At pos 23", "/1/6/3.1", st.getRightPointAtOffset(23));
    assertEquals("At pos 24", "/1/7.1", st.getRightPointAtOffset(24));
    assertEquals("At pos 25", "/1/8/1/1.1", st.getRightPointAtOffset(25));
    assertEquals("At pos 26", "/1/8/1/1.2", st.getRightPointAtOffset(26));
  }
View Full Code Here

   */
  public static Document safToInline(Document safDoc, Document plainDoc, Document ontoDoc, boolean brittle) throws Exception {
    Document resultsDoc = new Document((Element)XOMTools.safeCopy(ontoDoc.getRootElement()));
    plainDoc = new Document((Element)XOMTools.safeCopy(plainDoc.getRootElement()));
   
    StandoffTable st = new StandoffTable(plainDoc.getRootElement());
    XMLInserter xi = new XMLInserter(resultsDoc.getRootElement(), "a", "c");
    new XMLSpanTagger(plainDoc.getRootElement(), "b");
   
    Nodes annots = safDoc.query("//annot");
    for(int i=0;i<annots.size();i++) {
      //if(true) continue;
      Element annot = (Element)annots.get(i);
     
      String blocked = SafTools.getSlotValue(annot, "blocked");
      if("true".equals(blocked)) continue;
     
      Element ne = new Element("ne");
     
      String type = SafTools.getSlotValue(annot, "type");
      if(null == type) continue;
      if("dataSection".equals(type)) ne.setLocalName("datasection");
     
      if(annot.getAttribute("id") != null) ne.addAttribute(new Attribute("id", annot.getAttributeValue("id")));
     
      Elements slots = annot.getChildElements("slot");
      for(int j=0;j<slots.size();j++) {
        String slotName = slots.get(j).getAttributeValue("name");
        //if(slotName.equals("surface")) continue;
        ne.addAttribute(new Attribute(slotName, slots.get(j).getValue()));
      }
      int startOffset; int endOffset;
      try {
        startOffset = st.getOffsetAtXPoint(annot.getAttributeValue("from"));
        endOffset = st.getOffsetAtXPoint(annot.getAttributeValue("to"));
      } catch (Exception e) {
        if(brittle)  {
          e.printStackTrace();
          System.err.println(annot.getAttributeValue("from"));
          System.err.println(annot.getAttributeValue("to"));
View Full Code Here

    Element testXML = XMLBuilder.build("<a>012345678<b>9" +
        "0</b>1234<c>5</c>6789" +
        "<d>0<e>1</e>2</d>3<d><e>45</e>6</d>789" +
        "01<f><g>23</g></f>456<i/>789</a>", "/localhost").getRootElement();
    XMLSpanTagger.tagUpDocument(testXML, "a");
    StandoffTable st = new StandoffTable(testXML);
        assertEquals("At /1/1.1", 1, st.getOffsetAtXPoint("/1/1.1"));
        assertEquals("At /1/1.8", 8, st.getOffsetAtXPoint("/1/1.8"));
        assertEquals("At /1/1.9", 9, st.getOffsetAtXPoint("/1/1.9"));
        assertEquals("At /1/2/1.1", 10, st.getOffsetAtXPoint("/1/2/1.1"));
        assertEquals("At /1/2/1.2", 11, st.getOffsetAtXPoint("/1/2/1.2"));
        assertEquals("At /1/5.3", 19, st.getOffsetAtXPoint("/1/5.3"));
        assertEquals("At /1/5.4", 20, st.getOffsetAtXPoint("/1/5.4"));
        assertEquals("At /1/6/1.1", 21, st.getOffsetAtXPoint("/1/6/1.1"));
        assertEquals("At /1/6/2/1.1", 22, st.getOffsetAtXPoint("/1/6/2/1.1"));
        assertEquals("At /1/6/3.1", 23, st.getOffsetAtXPoint("/1/6/3.1"));
        assertEquals("At /1/7.1", 24, st.getOffsetAtXPoint("/1/7.1"));
        assertEquals("At /1/8/1/1.1", 25, st.getOffsetAtXPoint("/1/8/1/1.1"));
        assertEquals("At /1/8/1/1.2", 26, st.getOffsetAtXPoint("/1/8/1/1.2"));
               
        assertEquals("At /1/1.0", 0, st.getOffsetAtXPoint("/1/1.0"));
        assertEquals("At /1/1.1", 1, st.getOffsetAtXPoint("/1/1.1"));
        assertEquals("At /1/1.8", 8, st.getOffsetAtXPoint("/1/1.8"));
        assertEquals("At /1/2/1.0", 9, st.getOffsetAtXPoint("/1/2/1.0"));
        assertEquals("At /1/2/1.1", 10, st.getOffsetAtXPoint("/1/2/1.1"));
        assertEquals("At /1/3.0", 11, st.getOffsetAtXPoint("/1/3.0"));
        assertEquals("At /1/5.3", 19, st.getOffsetAtXPoint("/1/5.3"));
        assertEquals("At /1/6/1.0", 20, st.getOffsetAtXPoint("/1/6/1.0"));
        assertEquals("At /1/6/2/1.0", 21, st.getOffsetAtXPoint("/1/6/2/1.0"));
        assertEquals("At /1/6/3.0", 22, st.getOffsetAtXPoint("/1/6/3.0"));
        assertEquals("At /1/7.0", 23, st.getOffsetAtXPoint("/1/7.0"));
        assertEquals("At /1/8/1/1.0", 24, st.getOffsetAtXPoint("/1/8/1/1.0"));
        assertEquals("At /1/8/1/1.1", 25, st.getOffsetAtXPoint("/1/8/1/1.1"));
        assertEquals("At /1/8/2.0", 26, st.getOffsetAtXPoint("/1/8/2.0"));
  }
View Full Code Here

TOP

Related Classes of uk.ac.cam.ch.wwmm.ptclib.xml.StandoffTable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.