Package net.fp.rp.search.back.struct

Examples of net.fp.rp.search.back.struct.DocumStruct


                            double sumGroup = 0;

                            //sum for all the direct score of the nodes found it in the same set 
                            for (int j = 0; j < result.getDocuments().size();
                                    j++) {
                                DocumStruct doc = (DocumStruct) result.getDocuments()
                                                                      .get(j);

                                sumGroup = sumGroup +
                                    PluginManager.getCategoryStores()
                                                 .getDirectScore(doc.getCategoryName(),
                                        doc.getId());
                            }

                            //calculate the average
                            double average = 0.0;

                            if (result.getDocuments().size() > 0) {
                                average = sumGroup / result.getDocuments().size();
                            }

                            //case of "term AND categoryname:X" and set of term < set of categoryname:X (1 category in the system)
                            //store the new average score for each document in the set
                            for (int j = 0; j < result.getDocuments().size();
                                    j++) {
                                DocumStruct doc = (DocumStruct) result.getDocuments()
                                                                      .get(j);

                                //create a new document set and it to the list
                                listLinks.add(new DocumSetStruct(
                                        doc.getCategoryName(), doc.getId(),
                                        average * weight));
                            }
                        }
                    }
                }
View Full Code Here


              (String) model.get(ModelView.DOCUMENTSNO))
              .intValue();
          List list = (List) model.get(ModelView.DOCUMENTS);

          for (int i = 0; i < nodocums; i++) {
            DocumStruct docum = (DocumStruct) list.get(i);

            text.append(Config.NEW_LINE);
            text.append("Pos. " + i + " " + docum.getTitle() + " "
                + docum.getTitle());
            text.append(Config.NEW_LINE);
            text.append(docum.getPath() + " "
                + docum.getLastUpdate());
            text.append(Config.NEW_LINE);
          }

          log.info("Search completed: " + nodocums
              + " documents found it ");
View Full Code Here

                    //set the summary field according to the defualt settings
                    if (summary.length() > getMaxLengthSummary()) {
                        summary = summary.substring(0, getMaxLengthSummary());
                    }

                    DocumStruct doc = new DocumStruct();
                    doc.setTitle(title);
                    doc.setPath(info.getUri());
                    doc.setDescription(summary);
                    doc.setContent(node);
                    doc.setCategoryName(info.getCategoryName());
                    doc.setCategoryLocation(info.getCategoryLocation());

                    //set the pdf -author
                    doc.setAuthor(author);

                    //store and reindex document
                    PluginManager.storeAndAddDocument(doc);
                } catch (IOException e) {
                    logger.debug("Exception in reading the document text" +
View Full Code Here

        InputStream in = UtilExtract.getStream(info.getUri());

        final LinkedList links = new LinkedList();

        //define an DocNode
        final DocumStruct doc = new DocumStruct();

        //use SAX-Parser instead of DOM-Parser, Performance issue
        //get a reader to the data using sax
        try {
            // Create a JAXP "parser factory" for creating SAX parsers
            javax.xml.parsers.SAXParserFactory saxFactory = SAXParserFactory.newInstance();

            // Configure the parser factory for the type of parsers we require
            saxFactory.setValidating(false); // No validation required

            // Now use the parser factory to create a SAXParser object
            // Note that SAXParser is a JAXP class, not a SAX class
            javax.xml.parsers.SAXParser saxParser = saxFactory.newSAXParser();

            // Create a SAX input source for the file argument
            org.xml.sax.InputSource input = new InputSource(in);

            //create the stack
            final Stack nodeStack = new Stack();

            //final NodeStruct actual = new NodeStruct();
          //  final DefaultMutableTreeNode tree = new DefaultMutableTreeNode();

            //define an internal stack
            IXMLController controller = new IXMLController() {
                    // (non-Javadoc)
                    // @see net.fp.rp.back.extractor.xml.IXMLController#handleContent(java.lang.String)
                    //
                    public void handleContent(final String name,
                        final String content) throws SAXException {
                        //split and add the content
                        logger.debug("XMLProcessing - handle the content " +
                            content + " for the name " + name);

                        NodeStruct actual = (NodeStruct) nodeStack.pop();
                        boolean isTupleValueEmpty = false;

                        //if the node has only one tuple (special case)
                        if (actual.getTuples().size() == 1) {
                            TupleStruct tuple = (TupleStruct) actual.getTuples()
                                                                    .get(0);

                            //last element with empty value ??
                            if ((tuple.getKeyword().equals(name)) &&
                                    ("".equals(tuple.getValue()))) {
                                //update the value
                                logger.debug("XMLProcessing - handle the node " +
                                    name + "which has before empty value");
                                isTupleValueEmpty = true;
                            }
                        }

                        //ignore the empty contents
                        if (content.length() > 0) {
                            if (isTupleValueEmpty) {
                                //update the value
                                ((TupleStruct) actual.getTuples().get(0)).setValue(content);
                            } else {
                                actual.addTuple(name, content);
                            }

                            logger.debug("Tag name/value is " + name + "/" +
                                content);

                            //validate if the specified tuple is a link
                            if (linkRequired && (linkTags.indexOf(name) != -1)) {
                                //add the specified link to the list
                                logger.debug("Tag name is a link" + name + "/" +
                                    content);
                                links.add(content);
                            }
                        }

                        if (nodeStack.isEmpty()) {
                            logger.debug(
                                "XMLProcessing - set the content actual node as content for document");
                            doc.setContent(actual);
                        }
                    }

                    // (non-Javadoc)
                    // @see net.fp.rp.back.extractor.xml.IXMLController#handleElemAttributes(java.lang.String, org.xml.sax.AttributeList)
                    //
                    public void handleElemAttributes(String name,
                        AttributeList attributes) throws SAXException {
                        logger.debug(
                            "XMLProcessing - handle the element attributes for name " +
                            name);

                        //create the node struct
                        NodeStruct actual = new NodeStruct();

                        //if exists attributes handle as value for the element:name
                        if (attributes.getLength() > 0) {
                            //iterate on attributes and added as value
                           // StringBuffer buf = new StringBuffer();

                            for (int i = 0; i < attributes.getLength(); i++) {
                                //Encode the attrib. buffer (for the attributes maybe is not necessarilly)
                                actual.addTuple(attributes.getName(i),
                                    UtilExtract.encode(attributes.getValue(i)));
                            }
                        }

                        //add the actual node
                        actual.addTuple(name, "");

                        if (!nodeStack.isEmpty()) {
                            //get the parent
                            ((NodeStruct) nodeStack.get(nodeStack.size() - 1)).addChild(actual);
                        }

                        nodeStack.push(actual);
                    }
                };

            SaxXMLBuilder builder = new SaxXMLBuilder(controller);

            //parse the input and notify the handler
            saxParser.parse(input, builder);
        } catch (SAXException e) {
            logger.debug("SAXException in processing location" + info.getUri(),
                e);
            throw new RpException("extractor.xml.filenotvalid",
                new Object[] { info.getUri() });
        } catch (Throwable t) {
            logger.debug("Exception in processing the location" +
                info.getUri(), t);
            throw new RpException("app.extract.error",
                new Object[] { info.getUri() });
        } finally {
            try {
                if (in != null) {
                    in.close();
                }
            } catch (IOException e) {
            }
        }

        /*
        try
        {
            DOMParser parser = new DOMParser();
            parser.parse( new InputSource( in ) );
            Document xmldoc = parser.getDocument();


            //parse the document and generate the conent nodes
            doc.setContent( Translator.translate( xmldoc.getDocumentElement() ) );
        }
        catch ( SAXException e )
        {
            e.printStackTrace(System.out);
        }
        catch ( IOException e )
        {
            e.printStackTrace(System.out);
        }
        */
        //add the document to the list
        doc.setPath(info.getUri());
        doc.setTitle(UtilExtract.getFilenameTitle(info.getUri()));

        //get the summary of the document
        StringBuffer summary = new StringBuffer("");
        boolean isMaxReached = false;

        NodeStruct node = doc.getContent();

        for (int i = 0; (i < node.getTuples().size()) && (!isMaxReached);
                i++) {
            TupleStruct tuple = (TupleStruct) node.getTuples().get(i);

            //add to the summary
            if (summary.length() <= getMaxLengthSummary()) {
                summary.append(tuple.getValue());
                summary.append(" ");
            }

            if (summary.length() > getMaxLengthSummary()) {
                isMaxReached = true;
            }
        }

        if (isMaxReached) {
            doc.setDescription(summary.toString().substring(0,
                    getMaxLengthSummary()));
        } else {
            doc.setDescription(summary.toString());
        }

        doc.setCategoryName(info.getCategoryName());
        doc.setCategoryLocation(info.getCategoryLocation());

        //store and reindex document
        PluginManager.storeAndAddDocument(doc);

        logger.debug("Level of the information is " + info.getLevel());
View Full Code Here

                                                        ModelView.DOCUMENTSNO)).intValue();
                                                List list = (List) model.get(ModelView.DOCUMENTS);

                                                for (int i = 0; i < nodocums;
                                                        i++) {
                                                    DocumStruct docum = (DocumStruct) list.get(i);

                                                    text.append(Config.NEW_LINE);
                                                    text.append("Pos. " + i +
                                                        " " + docum.getTitle() +
                                                        " " + docum.getTitle());
                                                    text.append(Config.NEW_LINE);
                                                    text.append(docum.getPath() +
                                                        " " +
                                                        docum.getLastUpdate());
                                                    text.append(Config.NEW_LINE);
                                                }

                                                resultDocuments.setText(text.toString());
                                                statusMessage.setText(
View Full Code Here

                node.addTuple(TupleStruct.KEYWORD_GENERIC,
                    (String) listWords.get(j));
        }

        //define an DocumentStruct object
        DocumStruct doc = new DocumStruct();
        doc.setTitle(spider.getTitle());
        doc.setPath(spider.getUri());
        doc.setDescription(spider.getDescription());
        doc.setContent(node);
        doc.setCategoryName(info.getCategoryName());
        doc.setCategoryLocation(info.getCategoryLocation());

        //store and reindex document
        PluginManager.storeAndAddDocument(doc);

        logger.debug("Level of the information is " + info.getLevel());
View Full Code Here

                    int nodocums = Integer.valueOf((String) model.get(
                            ModelView.DOCUMENTSNO)).intValue();
                    List list = (List) model.get(ModelView.DOCUMENTS);

                    for (int i = 0; i < nodocums; i++) {
                        DocumStruct docum = (DocumStruct) list.get(i);

                        text.append(Config.NEW_LINE);
                        text.append("Pos. " + i + " " + docum.getTitle() + " " +
                            docum.getTitle());
                        text.append(Config.NEW_LINE);
                        text.append(docum.getPath() + " " +
                            docum.getLastUpdate());
                        text.append(Config.NEW_LINE);
                    }

                   
                    log.info("Search completed: " + nodocums + " documents found it ");
View Full Code Here

                IndexWriter writer = new IndexWriter(root,
                        new StandardAnalyzer(), true);

                //add a document to the index in order to sort the results
                //link to red-piranha.com
                DocumStruct doc = new DocumStruct();
                doc.setTitle("");
                doc.setPath(DEFAULT_DOCUM_PATH);
                doc.setAuthor(DEFAULT_DOCUM_AUTHOR);
                doc.addContentTuple(TupleStruct.KEYWORD_GENERIC, "");

                //add the document to the index
                writer.addDocument(doc.getIndexDocument());
                writer.close();
            } catch (IOException ex) {
                logger.fatal(MessageUtil.getMessage(
                        "app.indexmanager.wrong.index", new Object[] { root }));
View Full Code Here

                //define an DocNode
                logger.debug("Title is " + title + "Path is :" + info.getUri() +
                    " Summary:" + summary.toString());

                //add the document to the list
                DocumStruct doc = new DocumStruct();
                doc.setTitle(title);
                doc.setPath(info.getUri());
                doc.setDescription(summary.toString());
                doc.setContent(node);
                doc.setCategoryName(info.getCategoryName());
                doc.setCategoryLocation(info.getCategoryLocation());

                //store and reindex document
                PluginManager.storeAndAddDocument(doc);
            } catch (IOException e) {
                throw new RpException("app.extract.error",
View Full Code Here

                        logger.debug("CategScore " +
                            doc.get(DocumStruct.FIELD_CATEGORY_SCORE));
                        logger.debug("GeneralScore " + doc.get(sortField));

                        //translation from the found it lucene document to the RPDocument to the return documents
                        DocumStruct docStruct = new DocumStruct();
                        docStruct.setTitle(doc.get(DocumStruct.FIELD_TITLE));
                        docStruct.setPath(doc.get(DocumStruct.FIELD_PATH));
                        docStruct.setDescription(doc.get(
                                DocumStruct.FIELD_DESCRIPTION));
                        docStruct.setLastUpdate(doc.get(
                                DocumStruct.FIELD_LASTUPDATE));
                        docStruct.setId(doc.get(DocumStruct.FIELD_DOCUMENTID));

                        docStruct.setCategoryName(doc.get(
                                DocumStruct.FIELD_CATEGORY_NAME));

                        try {
                            docStruct.setCategoryScore(Double.valueOf(doc.get(
                                        DocumStruct.FIELD_CATEGORY_SCORE))
                                                             .doubleValue());
                        } catch (Throwable e) {
                            //set the default category score
                            docStruct.setCategoryScore(1.00);
                        }

                        docStruct.setCategoryLocation(doc.get(
                                DocumStruct.FIELD_CATEGORY_LOCATION));

                        listDocs.add(docStruct);
                    } catch (IOException e) {
                        countErrors++;
View Full Code Here

TOP

Related Classes of net.fp.rp.search.back.struct.DocumStruct

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.