Package net.fp.rp.search.back.struct

Examples of net.fp.rp.search.back.struct.NodeStruct


                //construct the patterns (to not ignore and replace)
                Pattern notIgnorePattern = Pattern.compile(getNotIgnoreChars());
                Pattern replacePattern = Pattern.compile(getReplaceChars());

                NodeStruct node = new NodeStruct();
                ByteArrayInputStream bin = null;

                try {
                    bin = new ByteArrayInputStream(bout.toByteArray());

                    byte[] buffer = new byte[1024];
                    int n = bin.read(buffer);

                    while (n > 0) {
                        String chars = new String(buffer, 0, n);

                        //generate the list of the words for the buffer
                        LinkedList listWords = UtilExtract.getValueList(chars,
                                getMinLengthWord(), notIgnorePattern,
                                replacePattern);

                        for (int j = 0; j < listWords.size(); j++)
                            node.addTuple(TupleStruct.KEYWORD_GENERIC,
                                (String) listWords.get(j));

                        n = bin.read(buffer);
                    }
View Full Code Here


        System.out.println("Translate the node" + node.getNodeName());
        if (node == null) {
            return null;
        }

        NodeStruct actual = null;
        int type = node.getNodeType();

        switch (type) {
        // print document
        case Node.DOCUMENT_NODE: {
            return translate(((Document) node).getDocumentElement() );
        }

        // print element with attributes
        case Node.ELEMENT_NODE: {
            System.out.println("Process attributes for node" + node.getNodeName());
           
            actual = new NodeStruct();

            StringBuffer buf = new StringBuffer();
            NamedNodeMap attrs = node.getAttributes();
            int lenAttrs = (attrs != null) ? attrs.getLength() : 0;

            for (int i = 0; i < lenAttrs; i++) {
                String strValue = attrs.item(i).getNodeValue();

                if ((strValue.indexOf('<') != -1) ||
                        (strValue.indexOf('>') != -1) ||
                        (strValue.indexOf('&') != -1) ||
                        (strValue.indexOf('"') != -1) ||
                        (strValue.indexOf('\'') != -1)) {
                    actual.addTuple(node.getNodeName(), encode(strValue));
                } else {
                    actual.addTuple(node.getNodeName(), strValue);
                }
            }
            System.out.println("Process the childs");
           
            NodeList children = node.getChildNodes();
            int len = children.getLength();

            for (int i = 0; i < len; i++) {
                System.out.println("Process the childs"  + children.item(i).getNodeName());
               
                NodeStruct childNode = translate(children.item(i) );

                if (childNode != null) {
                    System.out.println("Append the child "+children.item(i).getNodeName());
                    actual.addChild(childNode);
                }
            }
            break;
        }

        // handle entity reference nodes
        case Node.ENTITY_REFERENCE_NODE: {
            actual = new NodeStruct();
            actual.addTuple(node.getNodeName(), "");

            break;
        }

        // print cdata sections
        case Node.CDATA_SECTION_NODE: {
            //must to encode this values
            actual = new NodeStruct();
            actual.addTuple("GENERIC", encode(node.getNodeValue()));
            break;
        }

        // print text
        case Node.TEXT_NODE: {
            System.out.println("Process the text");
            String strValue = node.getNodeValue();
            System.out.println("Process the text value "  + node.getNodeName() +" /" + strValue );

            if ( !Util.isEmpty( strValue )) {
                actual = new NodeStruct();
 
              if ((strValue.indexOf('<') != -1) || (strValue.indexOf('>') != -1) ||
                      (strValue.indexOf('&') != -1) ||
                      (strValue.indexOf('"') != -1) ||
                      (strValue.indexOf('\'') != -1)) {
                  actual.addTuple(node.getNodeName(), encode(strValue));
              } else {
                  actual.addTuple(node.getNodeName(), strValue);
              }
            }

            break;
        }

        // print processing instruction
        case Node.PROCESSING_INSTRUCTION_NODE: {
            actual = new NodeStruct();
            String data = node.getNodeValue();
            if ((data != null) && (data.length() > 0)) {
                //data must to be encoded
                actual.addTuple(node.getNodeName(), encode(data));
            }
View Full Code Here

                        final String content) throws SAXException {
                        //split and add the content
                        logger.debug("XMLProcessing - handle the content " +
                            content + " for the name " + name);

                        NodeStruct actual = (NodeStruct) nodeStack.pop();
                        boolean isTupleValueEmpty = false;

                        //if the node has only one tuple (special case)
                        if (actual.getTuples().size() == 1) {
                            TupleStruct tuple = (TupleStruct) actual.getTuples()
                                                                    .get(0);

                            //last element with empty value ??
                            if ((tuple.getKeyword().equals(name)) &&
                                    ("".equals(tuple.getValue()))) {
                                //update the value
                                logger.debug("XMLProcessing - handle the node " +
                                    name + "which has before empty value");
                                isTupleValueEmpty = true;
                            }
                        }

                        //ignore the empty contents
                        if (content.length() > 0) {
                            if (isTupleValueEmpty) {
                                //update the value
                                ((TupleStruct) actual.getTuples().get(0)).setValue(content);
                            } else {
                                actual.addTuple(name, content);
                            }

                            logger.debug("Tag name/value is " + name + "/" +
                                content);

                            //validate if the specified tuple is a link
                            if (linkRequired && (linkTags.indexOf(name) != -1)) {
                                //add the specified link to the list
                                logger.debug("Tag name is a link" + name + "/" +
                                    content);
                                links.add(content);
                            }
                        }

                        if (nodeStack.isEmpty()) {
                            logger.debug(
                                "XMLProcessing - set the content actual node as content for document");
                            doc.setContent(actual);
                        }
                    }

                    // (non-Javadoc)
                    // @see net.fp.rp.back.extractor.xml.IXMLController#handleElemAttributes(java.lang.String, org.xml.sax.AttributeList)
                    //
                    public void handleElemAttributes(String name,
                        AttributeList attributes) throws SAXException {
                        logger.debug(
                            "XMLProcessing - handle the element attributes for name " +
                            name);

                        //create the node struct
                        NodeStruct actual = new NodeStruct();

                        //if exists attributes handle as value for the element:name
                        if (attributes.getLength() > 0) {
                            //iterate on attributes and added as value
                           // StringBuffer buf = new StringBuffer();

                            for (int i = 0; i < attributes.getLength(); i++) {
                                //Encode the attrib. buffer (for the attributes maybe is not necessarilly)
                                actual.addTuple(attributes.getName(i),
                                    UtilExtract.encode(attributes.getValue(i)));
                            }
                        }

                        //add the actual node
                        actual.addTuple(name, "");

                        if (!nodeStack.isEmpty()) {
                            //get the parent
                            ((NodeStruct) nodeStack.get(nodeStack.size() - 1)).addChild(actual);
                        }

                        nodeStack.push(actual);
                    }
                };

            SaxXMLBuilder builder = new SaxXMLBuilder(controller);

            //parse the input and notify the handler
            saxParser.parse(input, builder);
        } catch (SAXException e) {
            logger.debug("SAXException in processing location" + info.getUri(),
                e);
            throw new RpException("extractor.xml.filenotvalid",
                new Object[] { info.getUri() });
        } catch (Throwable t) {
            logger.debug("Exception in processing the location" +
                info.getUri(), t);
            throw new RpException("app.extract.error",
                new Object[] { info.getUri() });
        } finally {
            try {
                if (in != null) {
                    in.close();
                }
            } catch (IOException e) {
            }
        }

        /*
        try
        {
            DOMParser parser = new DOMParser();
            parser.parse( new InputSource( in ) );
            Document xmldoc = parser.getDocument();


            //parse the document and generate the conent nodes
            doc.setContent( Translator.translate( xmldoc.getDocumentElement() ) );
        }
        catch ( SAXException e )
        {
            e.printStackTrace(System.out);
        }
        catch ( IOException e )
        {
            e.printStackTrace(System.out);
        }
        */
        //add the document to the list
        doc.setPath(info.getUri());
        doc.setTitle(UtilExtract.getFilenameTitle(info.getUri()));

        //get the summary of the document
        StringBuffer summary = new StringBuffer("");
        boolean isMaxReached = false;

        NodeStruct node = doc.getContent();

        for (int i = 0; (i < node.getTuples().size()) && (!isMaxReached);
                i++) {
            TupleStruct tuple = (TupleStruct) node.getTuples().get(i);

            //add to the summary
            if (summary.length() <= getMaxLengthSummary()) {
                summary.append(tuple.getValue());
                summary.append(" ");
View Full Code Here

        Spider spider = new Spider(info.getUri(), getMaxLengthSummary());
        spider.start();

        //process the content from the actual document
        //iterate on the links
        NodeStruct node = new NodeStruct();

        for (int i = 0; (i < spider.getLinks().size()); i++) {
            String uri = ((URL) spider.getLinks().get(i)).toString();
            node.addTuple(TupleStruct.KEYWORD_NAME, uri);
        }

        Pattern notIgnorePattern = Pattern.compile(getNotIgnoreChars());
        Pattern replacePattern = Pattern.compile(getReplaceChars());

        for (int i = 0; (i < spider.getValues().size()); i++) {
            String value = ((String) spider.getValues().get(i));

            //generate the list of the words for the spidered values
            LinkedList listWords = UtilExtract.getValueList(value,
                    getMinLengthWord(), notIgnorePattern, replacePattern);

            for (int j = 0; j < listWords.size(); j++)
                node.addTuple(TupleStruct.KEYWORD_GENERIC,
                    (String) listWords.get(j));
        }

        //define an DocumentStruct object
        DocumStruct doc = new DocumStruct();
View Full Code Here

            //construct the patterns (to not ignore and replace)
            Pattern notIgnorePattern = Pattern.compile(notIgnoreChars);
            Pattern replacePattern = Pattern.compile(replaceChars);

            NodeStruct node = new NodeStruct();

            DataInputStream din = null;

            try {
                din = new DataInputStream(UtilExtract.getStream(info.getUri()));

                String buffer = din.readLine();

                while (buffer != null) {
                    //generate the list of the words for the buffer
                    LinkedList listWords = UtilExtract.getValueList(buffer,
                            minLengthWord, notIgnorePattern, replacePattern);

                    for (int j = 0; j < listWords.size(); j++)
                        node.addTuple(TupleStruct.KEYWORD_GENERIC,
                            (String) listWords.get(j));

                    buffer = din.readLine();
                }

                //get the title of the document
                String title = UtilExtract.getFilenameTitle(info.getUri());

                //get the summary of the document
                StringBuffer summary = new StringBuffer("");
                boolean isMaxReached = false;

                for (int i = 0;
                        (i < node.getTuples().size()) && (!isMaxReached);
                        i++) {
                    TupleStruct tuple = (TupleStruct) node.getTuples().get(i);

                    //add to the summary
                    if (summary.length() <= maxLengthSummary) {
                        summary.append(tuple.getValue());
                        summary.append(" ");
View Full Code Here

            }
        }

        Enumeration e = root.children();
        while (e.hasMoreElements()){
            NodeStruct node = (NodeStruct) e.nextElement();
            Resource crt = generateResource(node, model);
            actual.addProperty(RP.child, crt);
        }
        /*
        for (int i = 0; i < root.getChild().size(); i++) {
View Full Code Here

TOP

Related Classes of net.fp.rp.search.back.struct.NodeStruct

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.