Package de.peacei.gae.foodsupplier.parser

Source Code of de.peacei.gae.foodsupplier.parser.AbstractMensaParser

package de.peacei.gae.foodsupplier.parser;


import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.Locale;
import java.util.TimeZone;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import de.peacei.gae.foodsupplier.data.Food;
import de.peacei.gae.foodsupplier.data.Mensa;
import de.peacei.gae.foodsupplier.data.Menu;
import de.peacei.gae.foodsupplier.data.Weekplan;
import de.peacei.gae.foodsupplier.util.CalendarUtil;

public abstract class AbstractMensaParser implements MensaParser{
 
  protected Mensa mensa;
 
  protected FoodConfig[] configs;
 
  protected AbstractMensaParser(Mensa mensa, FoodConfig[] configs) {
    this.mensa = mensa;
    this.configs = configs;
  }
 
  protected Date getWeekdayForWeekAndIndex(Date week, int index) {
    Calendar calendar = CalendarUtil.getCalendar(week);
    calendar.set(Calendar.DAY_OF_WEEK, index + 2);
    return calendar.getTime();
  }
 
  protected int parseWeekNumber(String html, int weekNumber) {
      int kw = -1;
      int start = 0;
      int end = 0;
     
      if(html!=null) {
        try {
          Matcher matcher = Pattern.compile("<tr>").matcher(html);
          if(!matcher.find()) return -1;
         
          if(!matcher.find(matcher.end())) return -1;
          start = matcher.end();
         
          matcher = Pattern.compile("</td>").matcher(html);
          if(!matcher.find(start)) return -1;
          end = matcher.start();
         
         
          String kwString = html.substring(start, end);
         
          matcher = Pattern.compile("<b>").matcher(kwString);
          if(!matcher.find()) return -1;
          start = matcher.end();
         
          matcher = Pattern.compile("</b>").matcher(kwString);
          if(!matcher.find()) return -1;
          end = matcher.start();
         
          matcher = Pattern.compile("\\d{1,2}").matcher(kwString);
          if(!matcher.find(start)) return -1;
          kw = Integer.parseInt(kwString.substring(matcher.start(), matcher.end()));
        }
        catch(Exception ex) {}
      }
     
      if(kw != weekNumber) return parseWeekNumberAlt(html, weekNumber);
      return kw;
    }
 
  public int parseWeekNumberAlt(String html, int weekNumber) {
    int kw = -1;
      int start = 0;
      int end = 0;
   
    try {
      Matcher matcher = Pattern.compile("<tr>").matcher(html);
      if(!matcher.find()) return -1;
      if(!matcher.find(matcher.end())) return -1;
      if(!matcher.find(matcher.end())) return -1;
      start = matcher.end();
     
      matcher = Pattern.compile("</td>").matcher(html);
      if(!matcher.find(start)) return -1;
      start = matcher.end();

      if(!matcher.find(matcher.end())) return -1;
      end = matcher.start();
     
      String datumString = html.substring(start, end);
     
      matcher = Pattern.compile("<b>").matcher(datumString);
      if(!matcher.find()) return -1;
      start = matcher.end();
     
      matcher = Pattern.compile("</b>").matcher(datumString);
      if(!matcher.find()) return -1;
      end = matcher.start();
     
      datumString = datumString.substring(start, end);
      String[] datumArr = datumString.split("\\.");
      //GregorianCalendar datum = new GregorianCalendar(2012, 0, 17);
      GregorianCalendar datum = new GregorianCalendar(
          Integer.valueOf(datumArr[2]).intValue(),
          Integer.valueOf(datumArr[1]).intValue()-1,
          Integer.valueOf(datumArr[0]).intValue());
      datum.setTimeZone(TimeZone.getTimeZone("Europe/Berlin"));
     
     
      kw =  datum.get(Calendar.WEEK_OF_YEAR);
     
    } catch (Exception e) {}
    if(kw != weekNumber) return -1;
    return kw;
  }
 
  //@Override
  public Weekplan readWeekplanForWeek(String html, int weekNumber) {
   
    if(weekNumber == parseWeekNumber(html, weekNumber)) {
      Weekplan weekplan = new Weekplan();
      weekplan.setMensa(mensa);
      weekplan.setWeek(weekNumber);
      weekplan.setMenus(readMenus(html, new Date()));
   
      return weekplan;
    }
    else return null;
  }
 
    protected Food[] parse(String html, String keyWord) {
      int startIndex = 0;
      int endIndex = 0;
      int index = 0;
      String htmlPart = null;
      Matcher matcher = null;
      Food[] food = new Food[5];
     
      try {
     
        startIndex = html.indexOf(keyWord);
        index = html.indexOf("</tr>", startIndex)-1;
        htmlPart = html.substring(startIndex,
                html.indexOf("</tr>", startIndex)-1);
       
        startIndex = 10;
        String description;
        byte type;
        byte extra;

        for (byte i=0; i<5; i++) {
          type = Food.NO_TYPE;
          extra = Food.NO_EXTRA;
         
          startIndex = htmlPart.indexOf("<td", startIndex)+3;
          endIndex = htmlPart.indexOf("</td>", startIndex);

          description = htmlPart.substring(startIndex-3, endIndex);
           
                // Zeilenumbruch durch Leerzeichen ersetzen
                description=Pattern.compile("<br>").matcher(description).
                  replaceAll(" ");
               
                // Parsen der Kategorie
                matcher = Pattern.compile("\\([VGSRFWL]\\)").matcher(description);
                if(matcher.find()) {
                    type = (byte) description.charAt(matcher.start()+1);
                    description = matcher.replaceAll("");
                }
                else {
                  matcher = Pattern.compile(
                      "(schwein\\.gif|Rind\\.gif|Vegetarisch\\.gif|Huhn\\.gif|" +
                      "fisch\\.gif|SunFlower\\.gif|=\" \"|Wild\\.gif|wild\\.gif|Lamm\\.gif|lamm\\.gif)").matcher(description);
                  if(matcher.find()) {
                    char c = description.charAt(matcher.start());
                    switch (c) {
                      case 'R': type = Food.BEEF; break;
                      case 'V': type = Food.VEGETARIAN; break;
                      case 'H': type = Food.POULTRY; break;
                      case 's': type = Food.PORK; break;
                      case 'f': type = Food.FISH; break;
                      case 'S': type = Food.VEGAN; break;
                      case '=': case 'w': case 'W':  type = Food.GAME; break;
                      case 'l': case 'L': type = Food.LAMB; break;
                      default: type = Food.NO_TYPE;
                    }
                    description = matcher.replaceAll("");
                  }
                }
               
                // Entfernen der HTML-Tags
                //description=Pattern.compile("<[^>]+>").matcher(description).
                //    replaceAll("");
                description = Pattern.compile(">>(.*)<<").matcher(description).replaceAll("$1");
                description = Pattern.compile("<[a-zA-Z0-9\\ \"\\-=#&?\\/\\.\\\\]*>").matcher(description).replaceAll("");
                description = Pattern.compile("\\*").matcher(description).replaceAll("");

                // Parsen und Entfernen der Tagessuppe bzw. des Desserts
                matcher = Pattern.compile("Tagessuppe").matcher(description);
                if(matcher.find()) {
                  description = matcher.replaceAll("");
                  extra = Food.SOUP;
                }
                else {
                  matcher = Pattern.compile("(Dessert|Tagesdessert)").matcher(description);
                  if(matcher.find()) {
                    description = matcher.replaceAll("");
                      extra = Food.DESSERT;
                  }
                }
             
                // Aufräumen der Setzung von Anführungszeichen
                //matcher = Pattern.compile("&quot;[a-zA-Z]&quot;").matcher(description);
                matcher = Pattern.compile("\"[a-zA-Zäöüß\\s]+\"").matcher(description);
                if(matcher.find()) {
                  String tmp = matcher.group();
                  tmp = tmp.substring(1, tmp.length()-1);
                  tmp = "\""+tmp.trim()+"\"";
                  description = description.replaceAll("\"[a-zA-Zäöüß\\s]+\"", tmp);
                }

                // Entfernen doppelter Leerzeichen
                description=Pattern.compile("\\s{2,}").matcher(description).
                    replaceAll(" ");
               
                // Ersetzen des &-Zeichens
                description=Pattern.compile("\\s&\\s").matcher(description).
                  replaceAll(" und ");
               
                // Entfernen eines "und" am Ende
                if (description.endsWith(" und ")) {
                  description = description.substring(0, description.length()-5);
                }
                else if (description.endsWith(" und")) {
                  description = description.substring(0, description.length()-4);
                }
               
                // Entfernen ungültiger Zeichen
                description=Pattern.compile("([*<>\\.*+#&/]|nbsp;)+").matcher(description).
                  replaceAll("");
               
                // Entfernen eines Leerzeichens am Beginn oder Ende
                description = description.trim();
               
                description = String.valueOf(description.charAt(0)).toUpperCase(Locale.GERMANY)+description.substring(1);
           
                if(description.length()==0) description = "Entfällt";
           
                for (FoodConfig foodConfig : configs) {
                  boolean match = false;
                  if (foodConfig.matchOnlyFirstLetter) {
                    match = keyWord.charAt(0) == foodConfig.keyword.charAt(0);
                  } else {
                    match = keyWord.equals(foodConfig.keyword);
                  }
                  if (match) {
                    food[i] = new Food(foodConfig.label, description, type, extra);
                    break;
                  }
                }
        }
      } catch(Exception e) {
             return null;
        }
   
      setPrices(food, html.substring(index+17, html.indexOf("</tr>", index+4)), keyWord);
   
      return food;
    }
   
    protected void setPrices(Food[] food, String priceSources, String keyWord) {
      Matcher matcher;
      //int startIndex = 4;
      //int endIndex = 4;
      int startIndex = priceSources.indexOf("<td ", 0);
    int endIndex = priceSources.indexOf("</td>", 0);
     
    /*
      if(mensa.getId().equals(Mensa.Airport.getId()) && keyWord.equals("ESSEN II")) {
      startIndex = priceSources.indexOf("<td ", endIndex);
      endIndex = priceSources.indexOf("</td>", startIndex);
    }
      */
    for(int i=0; i<5; i++) {
      try {
        food[i].setStudentPrice("---");
        food[i].setStaffPrice("---");
       
        for(int j=0; j<4; j++) {
           
          switch(j) {
              case 1:
                String bla = priceSources.substring(startIndex, endIndex);
                matcher = Pattern.compile("\\d,\\d\\d").matcher(bla);
                if(matcher.find()) {
                  food[i].setStudentPrice(matcher.group().trim()+'€');
                } break;
              case 3:
                String blub = priceSources.substring(startIndex, endIndex);
                matcher = Pattern.compile("\\d,\\d\\d").matcher(blub);
                if(matcher.find()) {
                  food[i].setStaffPrice(matcher.group().trim()+'€');
                }
              default: break;   
          }
          startIndex = priceSources.indexOf("<td ", endIndex);
            endIndex = priceSources.indexOf("</td>", startIndex);
        }
      }
      catch(Exception ex) { }
    }
    }
   
  protected List<Menu> readMenus(final String html, final Date week) {
    List<Menu> menus = new ArrayList<Menu>();
        if(html != null) {
         
          Vector<Food[]> foods = new Vector<Food[]>(0);
         
          Food[] foodArr;
          for (FoodConfig foodConfig : configs) {
            if((foodArr=parse(html, foodConfig.keyword))!=null)foods.add(foodArr);
          }
         
          for (int i = 0; i < 5; i++) {
            Menu menu = new Menu();
            menu.setDay(getWeekdayForWeekAndIndex(week, i));
            for (Food[] foods2 : foods) {
              menu.getFoods().add(foods2[i]);
            }
            menus.add(menu);
          }
           
        }
    return menus;
  }
   
    protected static class FoodConfig {
     
      private String keyword;
     
      private String label;
     
      private boolean matchOnlyFirstLetter;

    public FoodConfig(String keyword, String label, boolean matchOnlyFirstLetter) {
      this.keyword = keyword;
      this.label = label;
      this.matchOnlyFirstLetter = matchOnlyFirstLetter;
    }
   
    }

}
TOP

Related Classes of de.peacei.gae.foodsupplier.parser.AbstractMensaParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.