Package com.alimama.quanjingmonitor.mdrillImport.parse.for416tmp

Source Code of com.alimama.quanjingmonitor.mdrillImport.parse.for416tmp.app_log_parse$DataIterParse

package com.alimama.quanjingmonitor.mdrillImport.parse.for416tmp;

import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;

import com.alimama.mdrill.json.JSONException;
import com.alimama.mdrill.json.JSONObject;
import com.alimama.mdrillImport.InvalidEntryException;
import com.alimama.quanjingmonitor.mdrillImport.parse.FetchAdid2Pid;


/**
* usertrack日志
* @author yannian.mu
*
*/
public class app_log_parse extends com.alimama.mdrillImport.DataParser{
  private static final long serialVersionUID = 1L;
  public volatile long groupCreateerror=0;

  private static Logger LOG = Logger.getLogger(app_log_parse.class);

  private volatile long lines=0;
  private volatile long lines_sb=0;
  private volatile long lines_sb3=0;

  private static long TS_MAX=3600l*24*31;

  private volatile long laststartts=System.currentTimeMillis()/1000-TS_MAX;
  private volatile long lastendts=System.currentTimeMillis()/1000+TS_MAX;

  private volatile long timediff=System.currentTimeMillis();
  private volatile long timediff3=System.currentTimeMillis();

  public static void main(String[] args) {
    System.out.println(111);
  }

  public static JSONObject parseSb(String str) throws JSONException
  {
    String[] split=str.split("[ ]*,[ ]*",-1);
    for(String s:split)
    {
      if(!s.startsWith("_sb="))
      {
        continue;
      }
      String json=decodeString(s.substring(4));
      return new JSONObject(json);
    }
    return new JSONObject();
  }
 
   private static String decodeString(String args) {
      try {
        return new String(java.net.URLDecoder.decode(args,"UTF-8".getBytes("UTF-8"), "UTF-8");
      } catch (Throwable e) {
        try {
          return new String(java.net.URLDecoder.decode(args,"GBK".getBytes("UTF-8"), "UTF-8");
        } catch (Throwable e2) {
          return args;
        }
      }
    }
 
  
   public String formatRows(String[] clicklog)
   {
     StringBuilder b = new StringBuilder();
      for (int i = 0; i < clicklog.length; i++) {
        b.append(i);
        b.append("=");
        b.append(String.valueOf(clicklog[i]));
        b.append(",");
      }
     
      return b.toString();
   }
 
     private String matchGet(String s,Pattern pat,int index)
     {
       if(s==null)
       {
         return null;
       }
         Matcher mat = pat.matcher(s);

       while (mat.find()) {
             return mat.group(index);
          }
       return null;
     }
    
     private String get_json_object(String s,String key)
     {
       if(s==null)
       {
         return null;
       }
       try{
         JSONObject obj= new JSONObject(s);
         if(obj.has(key))
         {
           return String.valueOf(obj.get(key));
         }
       }catch(Throwable e)
       {
         return null;
       }
      
       return null;
     }
    
    
    
     private static Pattern pat_arg1=Pattern.compile("(.*)(lwfrom|point|_sb)=([^&=,]+)(.*)");
     private static Pattern pat_reserves=Pattern.compile("(.*)(lwfrom|_sb|point)=([^&=,]+)(.*)");
     private static Pattern pat_args=Pattern.compile("(.*)(lwfrom|_sb|point)=([^&=,]+)(.*)");
     private static Pattern pat_last=Pattern.compile("(\\w+)[#]*(.*)");
    
    

     /**
      2014-04-10 00:04:48 0=59.174.28.74,1=5.0.1,2=860623029116421,3=460012710506535,4=Huawei,5=ARMv7 Processor rev 2 (v7l),6=860623029116421,7=HUAWEI G610-U00,8=960*540,9=中国联通,10=Wi-Fi,11=Unknown,12=227200,13=12278902,14=4.1.2,15=tbzhanglihua,16=tbzhanglihua,17=-,18=Unknown,19=Unknown,20=Android,21=4.2.1,22=Android,23=1.3.8,24=KORRWMGMFVKHQQLJLUTIBEED_12278902_1397059405807,25=KORRWMGMFVKHQQLJLUTIBEED,26=-,27=-,28=-,29=_uid=121123981,_cc=227200,_oc=227200,30=2014-04-10 00:03:38,31=1397059418449,32=Page_Webview,33=2001,34=Page_Home,35=Page_Home_Button-home-1-5-1,36=6161,37=action=kpv,list_param=1_72091_h18019_首焦-来往-0409-0410_home-1-5-1,list_type=activity,from=lw,url=http://m.laiwang.com/go/market/laiwang/mingrenmingxing.php?locate=home-1-5-1&actparam=1_72091_h18019_%E9%A6%96%E7%84%A6-%E6%9D%A5%E5%BE%80-0409-0410&lwfrom=20140404152605385&imei=860623029116421&imsi=460012710506535&ttid=227200@taobao_android_4.1.2,dep=16,idx=1955,38=1397059200,39=1397059200,40=1397059488,41=0,
      * @param arg1
      * @param reserves
      * @param args
      * @param key
      * @return
      */
   private String parseGet(String arg1,String reserves,String args)
   {
     String rtn=null;
     if(rtn==null&&arg1!=null&&arg1.indexOf("lwfrom")>=0)
     {
       String lower_decode_arg1=decodeString(String.valueOf(arg1)).toLowerCase();
       rtn=matchGet(lower_decode_arg1,pat_arg1,3);
     }
    
     if(rtn==null&&reserves!=null&&reserves.indexOf("lwfrom")>=0)
     {
       String lower_decode_reserves=decodeString(String.valueOf(reserves)).toLowerCase();
       rtn=matchGet(lower_decode_reserves,pat_reserves,3);

     }
    
     if(rtn==null&&args!=null&&args.indexOf("lwfrom")>=0)
     {
       String lower_decode_args=decodeString(String.valueOf(args)).toLowerCase();
       rtn=matchGet(lower_decode_args,pat_args,3);
     }
    
     if(rtn==null)
     {
       return null;
     }
    
     return matchGet(rtn,pat_last,1)
   }
  
   private boolean isempty(String refpid)
   {
     return refpid==null||refpid.isEmpty()||refpid.length()<5||refpid.length()>500;
   }
  
  @Override
  public DataIter parseLine(String line) throws InvalidEntryException {
   
   
    try {
      if(line==null)
      {
        return null;
      }
     
     
      String[] clicklog=line.split("\001",-1);
      if(clicklog.length<41)
      {
        return null;
      }
     
           
      if(clicklog[40].isEmpty()||clicklog[40].length()<=5)
      {
        return null;
      }
     
      String app_key = clicklog[13];//应用的标识
      String event_id=clicklog[33];
      String arg1=clicklog[34];
      String args=clicklog[37];
      String reserves =clicklog[29];
     
//      boolean match_app_key="12278902".equals(app_key)||"12087020".equals(app_key)||"12500477".equals(app_key);
//      if(!match_app_key)
//      {
//        return null;
//      }
//     
//      boolean match_event_id="21032".equals(event_id)||"2001".equals(event_id)||"30001".equals(event_id);
//      if(!match_event_id)
//      {
//        return null;
//
//      }
     
      boolean match_lwfrom=arg1.indexOf("lwfrom")>=0||args.indexOf("lwfrom")>=0||reserves.indexOf("lwfrom")>=0;
      boolean match_refpid=arg1.indexOf("refpid")>=0||args.indexOf("refpid")>=0||reserves.indexOf("refpid")>=0;
     
      if(!(match_lwfrom||match_refpid))
      {
        return null;
      }
   
      this.lines++;
      if(this.lines>100000)
      {
        this.laststartts=(System.currentTimeMillis()/1000)-TS_MAX;
        this.lastendts=(System.currentTimeMillis()/1000)+TS_MAX;
        this.lines=0;
      }

      long ts = Long.parseLong(clicklog[40]);
      this.lines_sb++;
      if(this.lines_sb>5000)
      {
        this.lines_sb=0;
        long nowts=System.currentTimeMillis();
        if(nowts-timediff>30000)
        {
          timediff=nowts;
          LOG.info("parseLine_sb_"+ColsDefine.formatDayMin.format(new Date(ts*1000))+" "+formatRows(clicklog));
        }
      }

      if(ts<laststartts||ts>lastendts)
      {
        return null;
      }
       
      String refpid=null;//parseGet(arg1, reserves, args, "refpid");;
      String lwfrom=parseGet(arg1, reserves, args);;
     
      if(refpid==null&&lwfrom!=null)
      {
        String strday=ColsDefine.formatDay.format(new Date(ts*1000));
        refpid=FetchAdid2PidWireLess.fetch().get(strday+"@"+String.valueOf(lwfrom));
      }
     
      if(isempty(refpid))
      {
        this.lines_sb3++;
        if(this.lines_sb3>100)
        {
          this.lines_sb3=0;
          long nowts=System.currentTimeMillis();
          if(nowts-timediff3>30000)
          {
            timediff3=nowts;
            LOG.info("parse error :"+ColsDefine.formatDayMin.format(new Date(ts*1000))+" "+formatRows(clicklog));
          }
        }
       
        return null;
      }
      return new DataIterParse(ts,clicklog,refpid);
    } catch (Throwable nfe) {
      if(groupCreateerror<100)
      {
        LOG.error("InvalidEntryException:"+line,nfe);
        groupCreateerror++;
      }
     
      throw new InvalidEntryException("Invalid log `" + line + "'\n" , nfe);
    }
  }
 
  public static class DataIterParse implements DataIter{
    private String[] pvlog=null;
    String refpid=null;

    long ts;
    public DataIterParse(long ts,String[] pvlog,String refpid) {
      this.pvlog = pvlog;
      this.ts=ts;
      this.refpid=refpid;
    }

    @Override
    public boolean next() {
      return false;
    }


    @Override
    public Number[] getSum() {
      return new Number[] { 0, 0, 0, 0, 0, 1, 0, 0,0 };
    }
   
    @Override
    public long getTs() {
       return (ts/10)*10000;
    }
   

      //wdm_v3_user_track
    @Override
    public Object[] getGroup() {
      long ts300 = (this.ts / 300) * 300000;
      Date d = new Date(ts300);
      String channel = String.valueOf(pvlog[20]).toLowerCase();
      if (channel.indexOf("android") >= 0) {
        channel = "android";
      } else if (channel.indexOf("iphone") >= 0||channel.indexOf("ios") >= 0) {
        channel = "ios";
      } else {
        channel = "other";
      }
      return new String[] {
          String.valueOf(ColsDefine.formatDay.format(d)),
          String.valueOf(ColsDefine.formatMin.format(d)),
          "wireless",
          "app_log_parse",
          String.valueOf(this.refpid)
          , channel // channel
          ,""
          ,""
          ,ColsDefine.version
      };

    }
  }

  @Override
  public String[] getSumName() {
    return ColsDefine.colSumName;

  }

  @Override
  public String getTableName() {
    return ColsDefine.tablename;
  }
 

  
  @Override
  public String[] getGroupName() {
    return ColsDefine.colname;
  }
}

 
TOP

Related Classes of com.alimama.quanjingmonitor.mdrillImport.parse.for416tmp.app_log_parse$DataIterParse

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.