Examples of net.sf.clairv.index.document.Document

net.sf.clairv.index.document.Document
A document is an indexable record. @author qiuyin

      for (int i = 1; i <= count; i++) {
        String colName = rsm.getColumnName(i);
        fieldNames.add(colName);
      }
      while (rs.next()) {
        Document doc = docFactory.createDocument();
        for (Iterator itr = fieldNames.iterator(); itr.hasNext(); ) {
          String name = (String)itr.next();
          // XXX could do some tweak by eliminating the lookup
          Object obj = rs.getObject(name);
          String content = null;
          if (obj instanceof Date) {
            content = format.format((Date) obj);
          } else {
            content = rs.getObject(name).toString();
          }
          Pair type = (Pair) fieldTypeMappings.get(name);
          log.debug("Adding a field of name \"" + name
              + "\" with content: " + content);
          if (type != null) {
            doc.addField(name, content, (StoreOption)(type.t), (IndexOption)(type.v));
          } else {
            log.warn("Type of filed " + name + " is not mapped; "
                + "regarding as stored and untokenized");
            doc.addField(name, content, StoreOption.YES, IndexOption.UN_TOKENIZED);
          }
        }
        holder.addDocument(doc);
        docCount++;
      }

View Full Code Here

  protected List readDocumentsFromFeed(SyndFeed feed, DocumentFactory docFactory) {
    List entries = feed.getEntries();
    List docs = new ArrayList(entries.size());
    for (Iterator itr = entries.iterator(); itr.hasNext(); ) {
      SyndEntry entry = (SyndEntry)itr.next();
      Document doc = docFactory.createDocument();
      doc.addField("title", entry.getTitle(), StoreOption.YES,
          IndexOption.TOKENIZED);
      SyndContent descContent = entry.getDescription();
      String contentType = descContent.getType();
      String description = null;
      if (contentType != null && contentType.indexOf("text/html") != -1) {
        // HTML description; removes the tags
        description = descContent.getValue().replaceAll("<[^>]+>", "");
      } else {
        description = descContent.getValue();
      }
      doc.addField("description", description, StoreOption.YES,
          IndexOption.TOKENIZED);
      String author = entry.getAuthor();
      if (author == null)
        author = "";
      doc.addField("author", author, StoreOption.YES,
          IndexOption.TOKENIZED);
      doc.addField("url", entry.getLink(), StoreOption.YES,
          IndexOption.NO);
      doc.addField("publishedDate", DateTools.dateToString(entry
          .getPublishedDate(), DateTools.Resolution.MINUTE),
          StoreOption.YES, IndexOption.UN_TOKENIZED);
      docs.add(doc);
    }
    return docs;

View Full Code Here

      FTPFile child = children[i];
      if (child.isDirectory()) {
        // delay processing the directory
        directories.add(child.getName());
      } else if (child.isFile()) {
        Document doc = docFactory.createDocument();
        doc.addField("fileName", child.getName(), StoreOption.YES,
            IndexOption.TOKENIZED);
        doc.addField("fileSize", (child.getSize() / 1024) + "KB",
            StoreOption.YES, IndexOption.NO);
        doc.addField("filePath", prefix + "/" + child.getName(),
            StoreOption.YES, IndexOption.NO);
        fileCount++;
        holder.addDocument(doc);
      } else {
        log.warn("Discarded file: " + child.getName());

View Full Code Here


    private void action(Iterator itr) {
      if (indexWriter != null) {
        try {
          while (itr.hasNext()) {
            Document doc = (Document)itr.next();
            indexWriter.addDocument(LuceneUtils.convert(doc));
          }
          indexWriter.flush();
        } catch (CorruptIndexException e) {
          log.warn("The original index might have been corrupted.");

View Full Code Here

  }


  protected void visitFileRecursively(File file, DocumentHolder holder, DocumentFactory docFactory) {
    if (file.isFile()) {
      if (isIncluded(file) && !isExcluded(file)) {
        Document doc = processFile(file, docFactory);
        if (doc != null) {
          doc.addField("fileName", file.getName(), StoreOption.YES,
              IndexOption.TOKENIZED);
          doc.addField("filePath", file.getAbsolutePath(),
              StoreOption.YES, IndexOption.NO);
          doc.addField("fileSize", file.length() / 1024 + "KB",
              StoreOption.YES, IndexOption.NO);
          doc.addField("modificationTime", DateTools.dateToString(
              (new Date(file.lastModified())),
              DateTools.Resolution.SECOND), StoreOption.YES,
              IndexOption.NO);
          fileCount++;
          holder.addDocument(doc);

View Full Code Here

      }
      try {
        FileInputStream fis = new FileInputStream(file);
        log.debug("Building document from file: "
            + file.getAbsolutePath());
        Document doc = docFactory.createDocument();
        builder.buildDocument(fis, doc);
        return doc;
      } catch (FileNotFoundException e) {
        log.error("File in use or not found");
        return null;

View Full Code Here

TOP

Related Classes of net.sf.clairv.index.document.Document

net.sf.clairv.index.processor.LuceneResourceProcessor$BufferAction

net.sf.clairv.index.resource.DefaultDatabaseResource

net.sf.clairv.index.resource.DefaultFileSystemResource

net.sf.clairv.index.resource.FtpResource

net.sf.clairv.index.resource.SyndicationFeedsResource

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.