Package com.aelitis.azureus.core.metasearch.impl

Source Code of com.aelitis.azureus.core.metasearch.impl.DateParserRegex

/*
* Created on May 6, 2008
* Created by Paul Gardner
*
* Copyright 2008 Vuze, Inc.  All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License only.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
*/

package com.aelitis.azureus.core.metasearch.impl;

import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class DateParserRegex extends DateParser {
 
  static boolean DEBUG = false;
 
  TimeZone timeZone;
  DateFormat userDateFormat;
  boolean auto;
 
  private static final Pattern hasLettersPattern = Pattern.compile("(?i).*[a-z]");
  private static final Pattern isAgeBasedPattern = Pattern.compile("(?i)(ago)|(min)|(hour)|(day)|(week)|(month)|(year)|([0-9](h|d|w|m|y))");
  private static final Pattern getTimeComponent = Pattern.compile("(?i)([0-9]{2}):([0-9]{2})(:([0-9]{2}))?( ?(a|p)m)?");
  private static final Pattern timeBasedDateWithLettersPattern = Pattern.compile("(?i)([0-9]{1,2})[^ ]{0,2}(?: |-)([a-z]{3,10})\\.?(?: |-)?([0-9]{2,4})?");
  private static final Pattern timeBasedDateWithLettersPatternMonthFirst = Pattern.compile("(?i)([a-z]{3,10})\\.?(?: |-)?([0-9]{1,2})[^ ]{0,2}(?: |-)([0-9]{2,4})?");
  private static final Pattern todayPattern = Pattern.compile("(?i)(t.?day)");
  private static final Pattern yesterdayPattern = Pattern.compile("(?i)(y[a-z\\-]+day)");
  private static final Pattern agoSpacerPattern = Pattern.compile("(?i)([0-9])([a-z])");
  private static final Pattern agoTimeRangePattern = Pattern.compile("(?i)([0-9.]+) ([a-z\\(\\)]+)");
  private static final Pattern numbersOnlyDatePattern = Pattern.compile("([0-9]{2,4})[ \\-\\./]([0-9]{2,4})[ \\-\\./]?([0-9]{2,4})?");
 
  private static final String[] MONTHS_LIST = new String[] {
    " january janvier enero januar",
    " february fevrier f�vrier febrero februar",
    " march mars marzo marz marz m�rz" ,
    " april avril abril april ",
    " may mai mayo mai",
    " june juin junio juni",
    " july juillet julio juli",
    " august aout ao�t agosto august",
    " september septembre septiembre september",
    " october octobre octubre oktober",
    " november novembre noviembre november",
    " december decembre d�cembre diciembre dezember"};
 
  public DateParserRegex() {
    this("GMT-7",true,null);
  }
 
  public DateParserRegex(String timeZone,boolean auto, String dateFormat) {
   
    this.timeZone = TimeZone.getTimeZone(timeZone);
    this.auto = auto;
   
    if(!auto) {
      if(dateFormat != null) {
        userDateFormat = new SimpleDateFormat(dateFormat);
        userDateFormat.setTimeZone(this.timeZone);
      } else {
        //TODO : in debug mode throw an Exception telling the user he needs to provide a dateFormat in manual mode
      }
    }

  }
 
  public Date parseDate(String date) {
    Date result =  null;
    if(auto) {
       result = parseDateInternal(date);
    } else {
      if(userDateFormat != null) {
        try {
          result = userDateFormat.parse(date);
        } catch(Exception e) {
          //TODO : in debug mode, throw an exception to tell the user that his dateFormat is invalid / didn't parse a date
        }
      }
    }
    if(DEBUG && result != null) {
      System.out.println(date + " > " + result.toString());
    }
   
    return result;
  }
 
  private Date parseDateInternal(String input) {
   
    if(input == null) {
      return null;
    }
   
    String s = input;
   
    Calendar calendar = new GregorianCalendar(timeZone);
   
    //Find if there is any time information in the date
    Matcher matcher = getTimeComponent.matcher(s);
    //Remove the time information in order to not confuse the date parsing
    s = matcher.replaceFirst("").trim();
   
      // handle date with format "2009-01-12 at 03:36:38" by removing trailing " at";
   
    if ( s.endsWith( " at" )){
     
      s = s.substring(0,s.length()-3).trim();
    }
   
    //Find if the date contains letters
    matcher = hasLettersPattern.matcher(s);
    if(matcher.find()) {
      //We have a date with letters, could be age-based or time based (with a literal month)
     
      //Try to determine if it is age-based or time-based
      matcher = isAgeBasedPattern.matcher(s);
      if(matcher.find()) {
        //Age Based date
       
        matcher = todayPattern.matcher(s);
        if(matcher.find()) {
          //Nothing to do for today as we base our initial date on the current one
        } else {
          matcher = yesterdayPattern.matcher(s);
          if(matcher.find()) {
            calendar.add(Calendar.DATE, -1);
          } else {
            //We're in the real "ago" case, let's remove " ago" if it's there
            s = s.replaceAll("ago","").trim();
            matcher = agoSpacerPattern.matcher(s);
            s = matcher.replaceAll("$1 $2");
            matcher = agoTimeRangePattern.matcher(s);
            boolean seenHoursAsLowerCaseH = false;
            while(matcher.find()) {
              String unit = matcher.group(2);
             
              if(unit.equals("h")) {
                seenHoursAsLowerCaseH = true;
              }
             
             
              String lUnit = unit.toLowerCase();
              float value = Float.parseFloat(matcher.group(1));
              int intValue = (int) value;
              if(lUnit.startsWith("sec")) {
                calendar.add(Calendar.SECOND, -intValue);
              } else if(lUnit.startsWith("min") || (unit.equals("m") && seenHoursAsLowerCaseH)) {
                calendar.add(Calendar.MINUTE, -intValue);
                int seconds = (int) ((value - intValue)*60f);
                calendar.add(Calendar.SECOND, -seconds);
              } else if(lUnit.startsWith("h")) {
                calendar.add(Calendar.HOUR_OF_DAY, -intValue);
                int seconds = (int) ((value - intValue)*3600f);
                calendar.add(Calendar.SECOND, -seconds);
              } else if(lUnit.startsWith("d")) {
                calendar.add(Calendar.DATE, -intValue);
                int seconds = (int) ((value - intValue)*86400f);
                calendar.add(Calendar.SECOND, -seconds);
              } else if(lUnit.startsWith("w")) {
                calendar.add(Calendar.WEEK_OF_YEAR, -intValue);
                //604800 seconds in a week
                int seconds = (int) ((value - intValue)*640800f);
                calendar.add(Calendar.SECOND, -seconds);
              } //The month case when m is not a minute
                else if(lUnit.startsWith("m")) {
                calendar.add(Calendar.MONTH, -intValue);
                //about 720 hours per month
                int hours = (int) ((value - intValue)*720f);
                calendar.add(Calendar.HOUR_OF_DAY, -hours);
              } else if(lUnit.startsWith("y")) {
                calendar.add(Calendar.YEAR, -intValue);
                //about 8760 hours per year
                int hours = (int) ((value - intValue)*8760);
                calendar.add(Calendar.HOUR_OF_DAY, -hours);
              } else {
                //System.out.println("Unit not matched : " + unit);
              }
            }
           
          }
         
        }
       
        //System.out.println(input + " > " + calendar.getTime());
       
       
      } else {
        //Time based date
        //System.out.println("DL : " + s);
        matcher = timeBasedDateWithLettersPattern.matcher(s);
        if(matcher.find()) {
          int day = Integer.parseInt(matcher.group(1));
          calendar.set(Calendar.DAY_OF_MONTH,day);
         
          String monthStr = " " + matcher.group(2).toLowerCase();
          int month = -1;
          for(int i = 0 ; i < MONTHS_LIST.length ; i++) {
            if(MONTHS_LIST[i].indexOf(monthStr) != -1) {
              month = i;
            }
          }
          if(month > -1) {
            calendar.set(Calendar.MONTH,month);
          }
         
          boolean hasYear = matcher.group(3) != null;
          if(hasYear) {
            int year = Integer.parseInt(matcher.group(3));
            if(year < 100) {
              year += 2000;
            }
            calendar.set(Calendar.YEAR,year);
          }
         
          calendar.set(Calendar.HOUR_OF_DAY,0);
          calendar.set(Calendar.MINUTE,0);
          calendar.set(Calendar.SECOND,0);
          calendar.set(Calendar.MILLISECOND,0);

          //System.out.println(input + " > " + calendar.getTime() + "( " + calendar.getTimeZone() + " )");
         
        } else {
          matcher = timeBasedDateWithLettersPatternMonthFirst.matcher(s);
          if(matcher.find()) {
            int day = Integer.parseInt(matcher.group(2));
            calendar.set(Calendar.DAY_OF_MONTH,day);
           
            String monthStr = " " + matcher.group(1).toLowerCase();
            int month = -1;
            for(int i = 0 ; i < MONTHS_LIST.length ; i++) {
              if(MONTHS_LIST[i].indexOf(monthStr) != -1) {
                month = i;
              }
            }
            if(month > -1) {
              calendar.set(Calendar.MONTH,month);
            }
           
            boolean hasYear = matcher.group(3) != null;
            if(hasYear) {
              int year = Integer.parseInt(matcher.group(3));
              if(year < 100) {
                year += 2000;
              }
              calendar.set(Calendar.YEAR,year);
            }
           
            calendar.set(Calendar.HOUR_OF_DAY,0);
            calendar.set(Calendar.MINUTE,0);
            calendar.set(Calendar.SECOND,0);
            calendar.set(Calendar.MILLISECOND,0);

            //System.out.println(input + " > " + calendar.getTime() + "( " + calendar.getTimeZone() + " )");
           
          } else {
            System.err.println("Unparseable date : " + input);
          }
        }
      }
    } else {
      //We have a date with only numbers
      //System.out.println("DN : " + s );//+ "(" + input + ")");
      //Let's assume a default order of m/d and switch if it doesn't make sense
      matcher = numbersOnlyDatePattern.matcher(s);
      if(matcher.find()) {
        try {
       
          String g1 = matcher.group(1);
          String g2 = matcher.group(2);
          String g3 = matcher.group(3);
         
          int i1 = Integer.parseInt(g1);
          int i2 = Integer.parseInt(g2);
         
          if(g3 != null) {
            int i3 = Integer.parseInt(g3);
           
            int day = i1;
            int month = i2;
            int year = i3;
           
            if(month > 12) {
              day = i2;
              month = i1;
            }
           
            if(year < 100) {
              year += 2000;
            }
           
            if(g1.length() == 4) {
              year = i1;
              day = i3;
            }

            calendar.set(Calendar.YEAR,year);
            calendar.set(Calendar.MONTH,month-1);
            calendar.set(Calendar.DAY_OF_MONTH,day);
           
          } else {
            //2 numbers only, we assume it's day and month
            int month = i1;
            int day = i2;
            if(month > 12) {
              day = i1;
              month = i2;
            }
            if(month > 12) {
              //TODO : fire an exception ?
              System.err.println("Unparseable date : " + input);
            } else {
              calendar.set(Calendar.MONTH, month-1);
              calendar.set(Calendar.DAY_OF_MONTH, day);
            }
          }
         
          calendar.set(Calendar.HOUR_OF_DAY,0);
          calendar.set(Calendar.MINUTE,0);
          calendar.set(Calendar.SECOND,0);
          calendar.set(Calendar.MILLISECOND,0);

        } catch (Exception e) {
          e.printStackTrace();
        }
       
      }
     
      //System.out.println(input + " > " + calendar.getTime());
    }
   
    //Extract the time information
    matcher = getTimeComponent.matcher(input);
    if(matcher.find()) {
      try {
        int hours = Integer.parseInt(matcher.group(1));
       
        int minutes = Integer.parseInt(matcher.group(2));
        calendar.set(Calendar.MINUTE,minutes);
       
        boolean amPMModifier = matcher.group(5) !=  null;
       
        boolean hasSeconds = matcher.group(4) != null;
       
        if(hasSeconds) {
          int seconds = Integer.parseInt(matcher.group(4));
          calendar.set(Calendar.SECOND, seconds);
        }
       
        if(amPMModifier) {
          String amPm = matcher.group(5).trim().toLowerCase();
          if(amPm.equals("am")) {
            calendar.set(Calendar.AM_PM,Calendar.AM);
          } else {
            calendar.set(Calendar.AM_PM,Calendar.PM);
          }
          calendar.set(Calendar.HOUR, hours);
         
        } else {
          calendar.set(Calendar.HOUR_OF_DAY, hours);
        }
      } catch(Exception e) {
        e.printStackTrace();
      }
    }
   
    int nbBack = 0;
    Calendar calendarCompare = new GregorianCalendar();
   
//    if(calendar.after(calendarCompare)) {
//      System.err.println("maintenant ici: "+calendarCompare.getTimeInMillis()+"  --  "+calendarCompare.getTime()+"  --  "+calendarCompare);
//      System.err.println("maintenant la bas: "+calendar.getTimeInMillis()+"  --  "+calendar.getTime()+"  --  "+calendar);
//      System.err.println("CALENDARS: after? " + calendar.after(calendarCompare) + " // before? " + calendar.before(calendarCompare));
//    }
    while(calendar.after(calendarCompare) && nbBack++ < 50) {
      calendar.add(Calendar.YEAR, -1);
    }
   
    //calendar.setTimeZone(TimeZone.getDefault());
    return calendar.getTime();
  }
 
 
  public static void main(String args[]) {
    DEBUG = true;
    DateParserRegex dateParser = new DateParserRegex();
   
    dateParser.parseDate("Today 05:34");
    dateParser.parseDate("Y-Day 21:55");
    dateParser.parseDate("07-25 2006");
    dateParser.parseDate("02-01 02:53");
    dateParser.parseDate("02-01 02:53 am");
    dateParser.parseDate("02-01 02:53 pm");
    dateParser.parseDate("03 Mar 2006");
    dateParser.parseDate("0 minute ago");
    dateParser.parseDate("3 hours ago");
    dateParser.parseDate("2 days ago");
    dateParser.parseDate("10 months ago");
    dateParser.parseDate("45 mins ago");
    dateParser.parseDate("Today");
    dateParser.parseDate("Yesterday");
    dateParser.parseDate("16.9w");
    dateParser.parseDate("22.6h");
    dateParser.parseDate("1.7d");
    dateParser.parseDate("2d 7h");
    dateParser.parseDate("1w");
    dateParser.parseDate("1w 4d");
    dateParser.parseDate("1mon 1w");
    dateParser.parseDate("22.11.");    // 22 nov
    dateParser.parseDate("22 Apr 08")//
    dateParser.parseDate("3 months")//
    dateParser.parseDate("1 day");    //
    dateParser.parseDate("3 weeks")//
    dateParser.parseDate("1 year");    //
    dateParser.parseDate("4 hours ago")//
    dateParser.parseDate("yesterday")//
    dateParser.parseDate("2 days ago")//
    dateParser.parseDate("1 month ago")//
    dateParser.parseDate("2 months ago")//
    dateParser.parseDate("06/18");    // 18 Jun
    dateParser.parseDate("02:10");    // at 2:10am today
    dateParser.parseDate("2005-02-26 20:55:10")//
    dateParser.parseDate("2005-02-26 10:55:10 PM");
    dateParser.parseDate("2005-02-26 10:55:10 AM");
    dateParser.parseDate("25-04-08")//
    dateParser.parseDate("142 Day(s) ago")//
    dateParser.parseDate("6 Minute(s) ago")//
    dateParser.parseDate("1 Hour(s) ago")//
    dateParser.parseDate("1.4h");    // 1.4 hours ago
    dateParser.parseDate("3.5d");    // 3 and a half days ago
    dateParser.parseDate("392w");    // 392 weeks ago
    dateParser.parseDate("01st Mar")//
    dateParser.parseDate("19th Apr")//
    dateParser.parseDate("03rd Apr")//
    dateParser.parseDate("2nd Apr")//
    dateParser.parseDate("3rd Nov")//
    dateParser.parseDate("04-28");    //
    dateParser.parseDate("2007-07-14")//
    dateParser.parseDate("2008.04.28")//
    dateParser.parseDate("16/04/08")//
    dateParser.parseDate("20-Dec-07")//
    dateParser.parseDate("2009-01-12 at 03:36:38" );
  }
 

}
TOP

Related Classes of com.aelitis.azureus.core.metasearch.impl.DateParserRegex

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.