Package net.bpiwowar.mg4j.extensions.query

Examples of net.bpiwowar.mg4j.extensions.query.DefaultQuerySet


   * @throws IOException
   */
  static public DefaultQuerySet readTopics(BufferedReader reader,
                                             final boolean quoteCommas) throws IOException {
    logger.debug("Reading a topic file");
    final DefaultQuerySet querySet = new DefaultQuerySet();

    BulletParser bulletParser = new BulletParser(
        TRECParsingFactory.INSTANCE);

    bulletParser.setCallback(new DefaultCallback() {
      TRECTopic topic = null;
      MutableString curText = new MutableString();
      Element curElement;

      @Override
      public boolean characters(char[] text, int offset, int length,
          boolean flowBroken) {
        curText.append(text, offset, length);
        return true;
      }

      @Override
      public boolean startElement(Element element,
          Map<Attribute, MutableString> attrMapUnused) {

        // --- New tag
        if (topic != null)
          process();

        // ---
        if (element == TRECParsingFactory.ELEMENT_TOP) {
          topic = new TRECTopic();
        }
        curElement = element;
        return true;
      }

      void removePrefix(String prefix, MutableString text) {
        if (text.startsWith(prefix))
          text.delete(0, prefix.length());

      }

      private void process() {
        curText.trim();
        curText.replace('\n', ' ');
        curText.squeezeSpaces(false);

        if (curElement == TRECParsingFactory.ELEMENT_TITLE) {
          removePrefix("Topic: ", curText);
          if (quoteCommas) {
            StringBuilder builder = new StringBuilder();
            boolean first = true;
            for (String part : curText.toString()
                .split("\\s*,\\s*")) {
              if (first)
                first = false;
              else
                builder.append(' ');

              if (part.indexOf(' ') >= 0) {
                builder.append('"');
                builder.append(part);
                builder.append('"');
              } else
                builder.append(part);
            }

            topic.title = builder.toString();
          } else
            topic.title = curText.toString();
        } else if (curElement == TRECParsingFactory.ELEMENT_NUM) {
          removePrefix("Number: ", curText);
          // Normalise the number
          topic.id = new Integer(curText.toString()).toString();
        } else if (curElement == TRECParsingFactory.ELEMENT_DESC) {
          removePrefix("Description: ", curText);
          topic.description = curText.toString();
        } else if (curElement == TRECParsingFactory.ELEMENT_NARR) {
          // TREC
          removePrefix("Narrative: ", curText);
          topic.narrative = curText.toString();
        } else if (curElement == TRECParsingFactory.ELEMENT_SMRY) {
          removePrefix("Summary: ", curText);
          topic.summary = curText.toString();
        } else if (curElement == TRECParsingFactory.ELEMENT_CON) {
          // TREC 1
          removePrefix("Concepts: ", curText);
          topic.concepts = curText.toString();
        } else if (curElement == TRECParsingFactory.ELEMENT_DEF) {
          // TREC 1
          removePrefix("Definition(s): ", curText);
          removePrefix("Definition: ", curText);
          topic.definitions = curText.toString();
        }
        curElement = null;
        curText.delete(0, curText.length());
      }

      @Override
      public boolean endElement(Element element) {
        if (topic != null)
          process();

        if (element == TRECParsingFactory.ELEMENT_TOP) {
          if (topic.id == null) {
            logger.warn("Topic had no identifier - skipping");
          } else {
            logger.debug(new LazyString("Adding topic %s with title [%s]",
                topic.id, topic.title));

            querySet.put(topic.id, topic);
          }
          topic = null;
        }
        return true;
      }
View Full Code Here

TOP

Related Classes of net.bpiwowar.mg4j.extensions.query.DefaultQuerySet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.