Package uk.ac.ucl.panda.utility.structure

Examples of uk.ac.ucl.panda.utility.structure.Field


  // use only part of the body, modify it to keep the rest (or use all if size==0).
  // reset the docdata properties so they are not added more than once.
  private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
    int docid = incrNumDocsCreated();
    Document doc = new Document();
    doc.add(new Field(ID_FIELD, docid+"", storeVal, indexVal, termVecVal));
    if (docData.getName()!=null) {
      String name = (cnt<0 ? docData.getName() : docData.getName()+"_"+cnt);
     ///////////////////
      doc.add(new Field(NAME_FIELD, name, Field.Store.YES,Field.Index.UN_TOKENIZED, termVecVal));
    }
    if (docData.getDate()!=null) {
      String dateStr = DateTools.dateToString(docData.getDate(), DateTools.Resolution.SECOND);
      doc.add(new Field(DATE_FIELD, dateStr, storeVal, indexVal, termVecVal));
    }
    if (docData.getTitle()!=null) {
      doc.add(new Field(TITLE_FIELD, docData.getTitle(), storeVal, indexVal, termVecVal));
    }
    if (docData.getBody()!=null && docData.getBody().length()>0) {
      String bdy;
      if (size<=0 || size>=docData.getBody().length()) {
        bdy = docData.getBody(); // use all
        docData.setBody("")// nothing left
      } else {
        // attempt not to break words - if whitespace found within next 20 chars...
        for (int n=size-1; n<size+20 && n<docData.getBody().length(); n++) {
          if (Character.isWhitespace(docData.getBody().charAt(n))) {
            size = n;
            break;
          }
        }
        bdy = docData.getBody().substring(0,size); // use part
        docData.setBody(docData.getBody().substring(size)); // some left
      }
      doc.add(new Field(BODY_FIELD, bdy, storeVal, indexVal, Field.TermVector.YES));
      if (storeBytes == true) {
        doc.add(new Field(BYTES_FIELD, bdy.getBytes("UTF-8"), Field.Store.YES));
      }
    }

    if (docData.getProps()!=null) {
      for (Iterator it = docData.getProps().keySet().iterator(); it.hasNext(); ) {
        String key = (String) it.next();
        String val = (String) docData.getProps().get(key);
        doc.add(new Field(key, val, storeVal, indexVal, termVecVal));
      }
      docData.setProps(null);
    }
    //System.out.println("============== Created doc "+numDocsCreated+" :\n"+doc+"\n==========");
    return doc;
View Full Code Here


    if (binary) {
      int toRead = fieldsStream.readVInt();
      final byte[] b = new byte[toRead];
      fieldsStream.readBytes(b, 0, b.length);
      if (compressed)
        doc.add(new Field(fi.getName(), uncompress(b), Field.Store.COMPRESS));
      else
        doc.add(new Field(fi.getName(), b, Field.Store.YES));

    } else {
      Field.Store store = Field.Store.YES;
      Field.Index index = getIndexType(fi, tokenize);
      Field.TermVector termVector = getTermVectorType(fi);

      Fieldable f;
      if (compressed) {
        store = Field.Store.COMPRESS;
        int toRead = fieldsStream.readVInt();

        final byte[] b = new byte[toRead];
        fieldsStream.readBytes(b, 0, b.length);
        f = new Field(fi.getName(),      // field name
                new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
                store,
                index,
                termVector);
        f.setOmitNorms(fi.getOmitNorms());
      } else {
        f = new Field(fi.getName(),     // name
                fieldsStream.readString(), // read value
                store,
                index,
                termVector);
        f.setOmitNorms(fi.getOmitNorms());
View Full Code Here

    byte[] sizebytes = new byte[4];
    sizebytes[0] = (byte) (bytesize>>>24);
    sizebytes[1] = (byte) (bytesize>>>16);
    sizebytes[2] = (byte) (bytesize>>> 8);
    sizebytes[3] = (bytebytesize      ;
    doc.add(new Field(fi.getName(), sizebytes, Field.Store.YES));
    return size;
  }
View Full Code Here

TOP

Related Classes of uk.ac.ucl.panda.utility.structure.Field

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.