Package org.eclipse.ecf.internal.provider.vbulletin

Source Code of org.eclipse.ecf.internal.provider.vbulletin.VBParser

/*******************************************************************************
* Copyright (c) 2005, 2006 Erkki Lindpere and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
*     Erkki Lindpere - initial API and implementation
*******************************************************************************/
package org.eclipse.ecf.internal.provider.vbulletin;

import java.net.URL;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.eclipse.ecf.bulletinboard.BBException;
import org.eclipse.ecf.bulletinboard.IBBObject;
import org.eclipse.ecf.bulletinboard.IMember;
import org.eclipse.ecf.core.identity.ID;
import org.eclipse.ecf.core.identity.IDCreateException;
import org.eclipse.ecf.core.identity.Namespace;
import org.eclipse.ecf.internal.bulletinboard.commons.AbstractParser;
import org.eclipse.ecf.internal.bulletinboard.commons.IBBObjectFactory;
import org.eclipse.ecf.internal.bulletinboard.commons.parsing.DefaultPatternDescriptor;
import org.eclipse.ecf.internal.bulletinboard.commons.parsing.IPatternDescriptor;
import org.eclipse.ecf.internal.bulletinboard.commons.util.StringUtil;
import org.eclipse.ecf.internal.provider.vbulletin.ThreadBrowser2.SkippedStatus;
import org.eclipse.ecf.internal.provider.vbulletin.identity.ThreadMessageID;
import org.eclipse.ecf.internal.provider.vbulletin.internal.ForumFactory;
import org.eclipse.ecf.internal.provider.vbulletin.internal.GuestFactory;
import org.eclipse.ecf.internal.provider.vbulletin.internal.MemberFactory;
import org.eclipse.ecf.internal.provider.vbulletin.internal.MemberGroupFactory;
import org.eclipse.ecf.internal.provider.vbulletin.internal.ThreadMessageFactory;
import org.eclipse.ecf.internal.provider.vbulletin.internal.VBException;

public class VBParser extends AbstractParser {

  public VBParser(Namespace namespace, URL baseURL) {
    super(namespace, baseURL);
  }

  private static final Pattern PAT_TITLE = Pattern
      .compile("<title>(.*?)</title>");

  public String parseTitle(CharSequence seq) {
    Matcher m = PAT_TITLE.matcher(seq);
    if (m.find()) {
      return new String(m.group(1));
    }
    return null;
  }

  @Override
  public IBBObjectFactory getMemberFactory() {
    return new MemberFactory();
  }

  public Pattern getMemberNamePattern() {
    return Pattern.compile("- View Profile: (.*?)</title>");
  }

  public static final Pattern PAT_FORUM = Pattern
      .compile("<a href=\"forumdisplay.php?(?:.*?)f=([0-9]+)\">(.*?)</a>");

  public Map<ID, Forum> parseForums(final CharSequence seq) {
    Map<ID, Forum> forums = new LinkedHashMap<ID, Forum>();
    Matcher matcher = PAT_FORUM.matcher(seq);
    while (matcher.find()) {
      String name = StringUtil.stripHTMLTrim(matcher.group(2));
      // String desc = StringUtil.stripHTMLTrim(matcher.group(3));
      if (StringUtil.notEmptyStr(name)) {
        ForumFactory ff = new ForumFactory();
        String idStr = matcher.group(1);
        ID id = null;
        try {
          id = ff.createBBObjectId(namespace, baseURL, idStr);
        } catch (NumberFormatException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        } catch (IDCreateException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }
        Forum forum = (Forum) ff.createBBObject(id, name, null);
        // forum.setDescription(desc);
        forums.put(id, forum);
      }
    }
    return forums;
  }

  @Override
  public IPatternDescriptor getThreadPattern() {
    return DefaultPatternDescriptor
        .defaultCustom(
            Pattern
                .compile(
                    "<a href=\"showthread.php?(?:.*?)t=([0-9]+?)\" id=\"thread_title_(?:[0-9]+?)\">(.*?)</a>(?:.*?)<div class=\"smallfont\">(.*?)</div>",
                    Pattern.DOTALL), new String[] { "id",
                "name", "authorInfo" });
  }

  @Override
  public IBBObjectFactory getThreadFactory() {
    return new ThreadFactory();
  }

  public static final Pattern PAT_MSG_INFORMATION = Pattern
      .compile(
          "<div class=\"panel\">(?:.*?)<blockquote>(.*?)</blockquote>(?:.*?)</td>",
          Pattern.DOTALL);

  public String parseInformationMessage(CharSequence seq) {
    String msg = null;
    Matcher m = PAT_MSG_INFORMATION.matcher(seq);
    if (m.find()) {
      msg = "vBulletin: " + m.group(1);
    }
    return msg;
  }

  public static final Pattern PAT_MSG_POST_ERROR = Pattern
      .compile(
          "<!--POSTERROR do not remove this comment-->(.*?)<!--/POSTERROR do not remove this comment-->",
          Pattern.DOTALL);

  public String parsePostErrorMessage(CharSequence seq) {
    String msg = null;
    Matcher m = PAT_MSG_POST_ERROR.matcher(seq);
    if (m.find()) {
      msg = "vBulletin: " + m.group(1);
    }
    return msg;
  }

  protected BBException createVBException(String msg, CharSequence seq) {
    String vbmsg = parseInformationMessage(seq);
    if (vbmsg == null) {
      vbmsg = parsePostErrorMessage(seq);
    }
    if (vbmsg != null) {
      return new BBException(msg, new VBException(new String(StringUtil
          .stripHTMLTrim(vbmsg))));
    } else {
      return new BBException(msg);
    }
  }

  public static final Pattern PAT_THEAD_ATTRS = Pattern
  // .compile("<title>(?:.*?) :: View topic - (.*?)</title>");
      .compile(
          "<td class=\"navbar\"(?:.*?)><a href=\"/showthread.php\\?t=([0-9]+)(?:.*?)\">(.*?)</td>",
          Pattern.DOTALL);

  public static final Pattern PAT_THEAD_ATTRS_FORUM = Pattern
      .compile("<span class=\"navbar\">&gt; <a href=\"forumdisplay.php\\?f=([0-9]+?)\">(.*?)</a></span>");

  public static final IPatternDescriptor PD_THREAD_ATTRS = DefaultPatternDescriptor
      .defaultIdAndName(PAT_THEAD_ATTRS);

  public static final IPatternDescriptor PD_THREAD_ATTRS_FORUM = DefaultPatternDescriptor
      .defaultIdAndName(PAT_THEAD_ATTRS_FORUM);

  public Thread parseThreadPageForThreadAttributes(CharSequence seq)
      throws BBException {
    Thread t = (Thread) genericParser.parseSingleIdName(PD_THREAD_ATTRS,
        seq, new ThreadFactory());
    if (t != null) {
      Map<ID, IBBObject> forums = genericParser.parseMultiIdName(
          PD_THREAD_ATTRS_FORUM, seq, new ForumFactory(), true);
      Forum prev = null;
      Forum f = null;
      for (IBBObject obj : forums.values()) {
        f = (Forum) obj;
        if (prev != null) {
          prev.subforums.add(f);
        }
        f.setParent(prev);
        prev = f;
      }
      t.forum = f;
      return t;
    } else {
      throw new BBException("Failed to parse the thread.");
    }
  }

  public static final Pattern PAT_MSG = Pattern.compile(
      "<!-- post #([0-9]+) -->(.*)<!-- / post #\\1 -->", Pattern.DOTALL);

  public List<ThreadMessage> parseMessages2(final CharSequence seq,
      final ID lastReadId, boolean desc, SkippedStatus skipped)
      throws BBException {
    Matcher m;
    ThreadMessage msg;
    List<ThreadMessage> messages = new ArrayList<ThreadMessage>();
    m = PAT_MSG.matcher(seq);
    while (m.find()) {
      ThreadMessageFactory tmf = new ThreadMessageFactory();
      ThreadMessageID id = null;
      try {
        id = (ThreadMessageID) tmf.createBBObjectId(namespace, baseURL,
            m.group(1));
      } catch (NumberFormatException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      } catch (IDCreateException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
      if (lastReadId == null
          || id.getLongValue() > ((ThreadMessageID) lastReadId)
              .getLongValue()) {
        String msgSrc = m.group(2);
        msg = parseMessage2(id, msgSrc);
        if (msg != null) {
          if (desc) {
            messages.add(0, msg);
          } else {
            messages.add(msg);
          }
        }
      } else {
        skipped.messagesSkipped = true;
      }
    }
    return messages;
  }

  public static final Pattern PAT_MSG_USER = Pattern
      .compile("<a class=\"bigusername\" href=\"member.php?(.*)u=([0-9]+)\">(.*)</a>");

  public static final Pattern PAT_MSG_TITLE = Pattern.compile(
      "<!-- icon and title -->(.*)<!-- / icon and title -->",
      Pattern.DOTALL);

  public static final Pattern PAT_MSG_MESSAGE = Pattern.compile(
      "<!-- message -->(.*)<!-- / message -->", Pattern.DOTALL);

  public static final Pattern PAT_MSG_TIMESTAMP = Pattern.compile(
      "<!-- status icon and date -->(.*)<!-- / status icon and date -->",
      Pattern.DOTALL);

  @Override
  public Long parseTimestamp(CharSequence seq) {
    Long l = null;
    final Locale locale = Locale.ENGLISH;
    final String dateFormat = "MM-dd-yyyy";
    final String timeFormat = "hh:mm aa";
    final String dateTimeSeparator = ", ";
    final DateFormat fmtTimestamp = new SimpleDateFormat(dateFormat
        + dateTimeSeparator + timeFormat, locale);
    final DateFormat fmtTime = new SimpleDateFormat(timeFormat, locale);
    Matcher matcher;
    matcher = PAT_MSG_TIMESTAMP.matcher(seq);
    if (matcher.find()) {
      String timestamp = StringUtil.stripHTMLFullTrim(matcher.group(1));
      timestamp = timestamp.replaceAll("1st", "1");
      timestamp = timestamp.replaceAll("2nd", "2");
      timestamp = timestamp.replaceAll("3rd", "3");
      timestamp = timestamp.replaceAll("th", "");
      if (timestamp.startsWith("Today")
          || timestamp.startsWith("Yesterday")) {
        String[] s = timestamp.split(dateTimeSeparator);
        try {
          Calendar now = Calendar.getInstance(fmtTime.getTimeZone());
          if ("Yesterday".equals(s[0])) {
            now.add(Calendar.DATE, -1);
          }

          Date d = fmtTime.parse(s[1]);
          Calendar then = Calendar.getInstance(fmtTime.getTimeZone());
          then.setTime(d);
          then.set(now.get(Calendar.YEAR), now.get(Calendar.MONTH),
              now.get(Calendar.DATE));
          l = new Long(then.getTimeInMillis());
        } catch (ParseException e) {
          e.printStackTrace();
        }
      } else {
        try {
          l = new Long(fmtTimestamp.parse(timestamp).getTime());
        } catch (ParseException e) {
          e.printStackTrace();
        }
      }
    }
    return l;
  }

  private ThreadMessage parseMessage2(final ID id, final CharSequence seq) {
    ThreadMessage msg = null;
    ThreadMessageFactory tmf = new ThreadMessageFactory();
    msg = (ThreadMessage) tmf.createBBObject(id, null, null);
    Matcher m;
    String uname;
    Long l = parseTimestamp(seq);
    if (l != null) {
      msg.timePosted = new Date(l);
    }
    m = Pattern.compile(
        "<div id=\"postmenu_" + ((ThreadMessageID) id).getLongValue()
            + "\">(.*?)</div>", Pattern.DOTALL).matcher(seq);
    if (m.find()) {
      String userInfoStr = m.group(1);
      m = PAT_MSG_USER.matcher(userInfoStr);
      if (m.find()) {
        MemberFactory mf = new MemberFactory();
        uname = new String(StringUtil.simpleStripHTML(m.group(3)));
        ID uid = null;
        try {
          uid = mf.createBBObjectId(namespace, baseURL, m.group(2));
        } catch (NumberFormatException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        } catch (IDCreateException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }
        msg.author = (IMember) mf.createBBObject(uid, uname, null);
      } else {
        // Didn't find a registered author, so the userinfo should
        // contain only the username.
        msg.author = new Member(new String(userInfoStr.trim()));
      }
    }
    m = Pattern.compile(
        "#<a href=\"showpost.php\\?p="
            + ((ThreadMessageID) id).getLongValue()
            + "(?:.*?)><strong>([0-9]+)</strong></a>").matcher(seq);
    m.find();
    msg.number = Integer.parseInt(m.group(1));
    m = PAT_MSG_TITLE.matcher(seq);
    m.find();
    msg.setNameInternal(new String(StringUtil.stripHTMLTrim(m.group(1))));
    m = PAT_MSG_MESSAGE.matcher(seq);
    m.find();
    String message = StringUtil.stripHTMLFullTrim(m.group(1));
    msg.message = message;
    return msg;
  }

  public static final Pattern PAT_PAGES = Pattern
      .compile("<td class=\"vbmenu_control\"(?:.*?)>Page ([0-9]+) of ([0-9]+)</td>");

  public int parseNextPage(CharSequence seq) {
    Matcher m = PAT_PAGES.matcher(seq);
    int next = -1;
    if (m.find()) {
      int current = Integer.parseInt(m.group(1));
      int last = Integer.parseInt(m.group(2));
      if (current < last) {
        next = current + 1;
      }
    }
    return next;
  }

  public int parsePrevPage(CharSequence seq) {
    Matcher m = PAT_PAGES.matcher(seq);
    int prev = -1;
    if (m.find()) {
      int current = Integer.parseInt(m.group(1));
      if (current > 1) {
        prev = current - 1;
      }
    }
    return prev;
  }

  @Override
  public IPatternDescriptor getMemberPattern() {
    return DefaultPatternDescriptor.defaultIdAndName(Pattern
        .compile("<a href=\"member.php\\?u=([0-9]+?)\">(.*?)</a>"));
  }

  @Override
  public IPatternDescriptor getAuthorInfoMemberPattern() {
    return DefaultPatternDescriptor
        .defaultIdAndName(Pattern
            .compile("<span(?:.*?)onclick=\"window.open('member.php\\?u=([0-9]+?)', '_self')\">(.*?)</span>"));
  }

  @Override
  public Pattern getMemberGroupContainerPattern() {
    return Pattern
        .compile(
            "<form action=\"profile.php\\?do=joingroup\" method=\"post\">(.*?)</form>",
            Pattern.DOTALL);
  }

  @Override
  public IBBObjectFactory getMemberGroupFactory() {
    return new MemberGroupFactory();
  }

  @Override
  public IPatternDescriptor getMemberGroupPattern() {
    return DefaultPatternDescriptor
        .reverseIdAndName(Pattern
            .compile(
                "<tr>(?:.*?)<td class=\"alt(?:[12]{1})\">(.*?)<div class=\"smallfont\">(?:.*?)</div>(?:.*?)<label for=\"rb_join_([0-9]+?)\">(?:.*?)</tr>",
                Pattern.DOTALL));
  }

  @Override
  public void throwException(final String msg, final CharSequence seq)
      throws BBException {
    throw createVBException(msg, seq);
  }

  @Override
  public IBBObjectFactory getGuestFactory() {
    return new GuestFactory();
  }

}
TOP

Related Classes of org.eclipse.ecf.internal.provider.vbulletin.VBParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.