Examples of org.cyberneko.html.parsers.DOMParser

org.cyberneko.html.parsers.DOMParser
A DOM parser for HTML documents. @author Andy Clark @version $Id: DOMParser.java,v 1.5 2005/02/14 03:56:54 andyc Exp $

  /**
   * @see org.olat.core.util.filter.Filter#filter(java.lang.String)
   */
  public String filter(String original) {
    try {
      DOMParser parser = new DOMParser();
      parser.parse(new InputSource(new StringReader(original)));
      Document document = parser.getDocument();
      StringBuilder sb = new StringBuilder();
      scanNode(document, sb);
      return sb.toString();
    } catch (SAXException e) {
      logError("", e);

View Full Code Here

  public List<Contact> parseContacts() throws ContactsException {
    try {
      String sid = getSid(indexPage, "folder");
      String content = doGet(contactsUrl.replaceFirst("%sid", sid) + "&gid=all");
      List<Contact> contacts = new ArrayList<Contact>();
      DOMParser parser = new DOMParser();
      InputSource is = new InputSource(new ByteArrayInputStream(content
          .getBytes("GBK")));
      is.setEncoding("GBK");
      parser.parse(is);
      NodeList nodes = parser.getDocument().getElementsByTagName("td");
      for (int i = 0; i < nodes.getLength(); i++) {
        Node node = nodes.item(i);
        if (node.getFirstChild().getNodeName()
            .equalsIgnoreCase("input")) {
          i++;

View Full Code Here

  public List<Contact> parseContacts() throws ContactsException {
    try {
      String sid = getSid(indexPage, "folder");
      String content = doGet(contactsUrl.replaceFirst("%sid", sid));
      List<Contact> contacts = new ArrayList<Contact>();
      DOMParser parser = new DOMParser();
      InputSource is = new InputSource(new ByteArrayInputStream(content
          .getBytes("GBK")));
      is.setEncoding("GBK");
      parser.parse(is);
      NodeList nodes = parser.getDocument().getElementsByTagName("td");
      for (int i = 0; i < nodes.getLength(); i++) {
        Node node = nodes.item(i);
         if (node.getAttributes().getNamedItem("class") != null &&
                                        node.getAttributes().getNamedItem("class").getNodeValue().equals("Addr_Td_Name")) {
          String username = node.getTextContent().trim();

View Full Code Here

            List<Contact> contacts = new ArrayList<Contact>();
            while (true) {
                boolean empty = true;
                String contactsUrl = lastUrl.substring(0, lastUrl.indexOf("/webmail/")) + "/webmail/addressBookList.do?groupId=-1&page=" + page;
                String content = doGet(contactsUrl);
                DOMParser parser = new DOMParser();
                InputSource is = new InputSource(new ByteArrayInputStream(content.getBytes()));
                parser.parse(is);
                NodeList nodes = parser.getDocument().getElementsByTagName("td");
                for (int i = 0; i < nodes.getLength(); i++) {
                    Node node = nodes.item(i);
                    if (node.getAttributes().getNamedItem("class") != null) {
                        if (node.getAttributes().getNamedItem("class").getNodeValue().equals("mtb1-td2")) {
                            String username_text = node.getTextContent();

View Full Code Here

    public List<Contact> parseContacts() throws ContactsException {
        try {
            String sid = getSid(indexPage, "folder");
            String content = doGet(contactsUrl.replaceFirst("%sid", sid) + "&gid=all");
            List<Contact> contacts = new ArrayList<Contact>();
            DOMParser parser = new DOMParser();
            InputSource is = new InputSource(new ByteArrayInputStream(content.getBytes("GBK")));
            is.setEncoding("GBK");
            parser.parse(is);
            NodeList nodes = parser.getDocument().getElementsByTagName("td");
            for (int i = 0; i < nodes.getLength(); i++) {
                Node node = nodes.item(i);
                if (node.getFirstChild().getNodeName().equalsIgnoreCase("input")) {
                    i++;
                    String username = "";

View Full Code Here

    public List<Contact> parseContacts() throws ContactsException {
        try {
            String contactsUrl = lastUrl.replace("main", "address/addrlist") + "&gid=all";
            String content = doGet(contactsUrl);
            List<Contact> contacts = new ArrayList<Contact>();
            DOMParser parser = new DOMParser();
            InputSource is = new InputSource(new ByteArrayInputStream(content.getBytes("GBK")));
            is.setEncoding("GBK");
            parser.parse(is);
            NodeList nodes = parser.getDocument().getElementsByTagName("td");
            for (int i = 0; i < nodes.getLength(); i++) {
                Node node = nodes.item(i);
                if (node.getAttributes().getNamedItem("class").getNodeValue().equals("Ibx_Td_addrName")) {
                    String username = node.getFirstChild().getTextContent().trim();
                    i++;

View Full Code Here

   */
  public List<Contact> parseContacts() throws ContactsException {
    try {
      String content = doGet(contactsUrl);
      List<Contact> contacts = new ArrayList<Contact>();
      DOMParser parser = new DOMParser();
      parser.parse(new InputSource(new ByteArrayInputStream(content
          .getBytes())));
      NodeList nodes = parser.getDocument().getElementsByTagName("td");
      for (int i = 0; i < nodes.getLength(); i++) {
        Node node = nodes.item(i);
                                if (node.getAttributes().getNamedItem("class") != null &&
                                        node.getAttributes().getNamedItem("class").getNodeValue().equals("nobottom")) {
                                    String username = node.getChildNodes().item(1).getTextContent().trim();

View Full Code Here

        final Resource r = request.getResource();
        final Node n = r.adaptTo(Node.class);
        final String pageTitle = getTitle(r, n);


        // Parse script using the NekoHTML permissive HTML parser
        final DOMParser parser = new DOMParser();
        try {
            parser.setFeature("http://xml.org/sax/features/namespaces", false);
            parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
            parser.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
            parser.parse(new InputSource(scriptStream));
        } catch(Exception e) {
            final ScriptException se = new ScriptException("Error parsing script " + scriptPath);
            se.initCause(e);
            throw se;
        }
        final Document template = parser.getDocument();


        // compute default rendering
        final StringWriter defaultRendering = new StringWriter();
        if(n!=null) {
            final PrintWriter pw = new PrintWriter(defaultRendering);

View Full Code Here

  }


  private Document parseDomInternal(String source) throws SAXException, IOException, GadgetException {
    if (attemptFullDocParseFirst(source)) {
      InputSource input = new InputSource(new StringReader(source));
      DOMParser parser = new DOMParser();
      // Force parser not to use HTMLDocumentImpl as document implementation otherwise
      // it forces all element names to uppercase.
      parser.setProperty("http://apache.org/xml/properties/dom/document-class-name",
          "org.apache.xerces.dom.DocumentImpl");
      // Dont convert element names to upper/lowercase
      parser.setProperty("http://cyberneko.org/html/properties/names/elems", "default");
      // Preserve case of attributes
      parser.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");
      // Record entity references
      parser.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
      parser.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true);
      // No need to defer as full DOM is walked later
      parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false);
      parser.parse(input);
      return parser.getDocument();
    } else {
      DocumentFragment fragment = parseFragmentImpl(source);
      normalizeFragment(fragment.getOwnerDocument(), fragment);
      return fragment.getOwnerDocument();
    }

View Full Code Here

    public static Document readHtmlDocument(String str) {
        Document document = null;
        try {
            URL url = FlexibleLocation.resolveLocation(str);
            if (url != null) {
                DOMParser parser = new DOMParser();
                parser.setFeature("http://xml.org/sax/features/namespaces", false);
                parser.parse(url.toExternalForm());
                document = parser.getDocument();
            } else {
                Debug.logError("Unable to locate HTML document " + str, module);
            }
        } catch (Exception e) {
            Debug.logError(e, "Error while reading HTML document " + str, module);

View Full Code Here

0 1 2 3

TOP

Related Classes of org.cyberneko.html.parsers.DOMParser

biz.webgate.domino.mywebgate.util.URLFetcher

com.asual.summer.core.faces.FacesResourceProcessor

com.crawljax.util.DomUtils

com.crawljax.util.Helper

com.huangzhimin.contacts.email.OneEightNineImporter

com.huangzhimin.contacts.email.OneSixThreeImporter

com.huangzhimin.contacts.email.OneTwoSixImporter

com.huangzhimin.contacts.email.TomImporter

com.huangzhimin.contacts.email.YahooImporter

com.huangzhimin.contacts.email.YeahImporter

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.