Package com.alimama.quanjingmonitor.mdrillImport.parse

Source Code of com.alimama.quanjingmonitor.mdrillImport.parse.app_log_parse$DataIterParse

package com.alimama.quanjingmonitor.mdrillImport.parse;

import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.io.Text;
import org.apache.log4j.Logger;

import com.alimama.mdrill.json.JSONException;
import com.alimama.mdrill.json.JSONObject;
import com.alimama.mdrillImport.InvalidEntryException;


/**
* usertrack日志
*
*
* @author yannian.mu
*
*/
public class app_log_parse extends com.alimama.mdrillImport.DataParser{
  private static final long serialVersionUID = 1L;
  public volatile long groupCreateerror=0;

  private static Logger LOG = Logger.getLogger(app_log_parse.class);

  private volatile long lines=0;
  private volatile long lines_sb=0;
  private volatile long lines_sb2=0;
  private volatile long lines_sb3=0;

  private static long TS_MAX=3600l*24*31;

  private volatile long laststartts=System.currentTimeMillis()/1000-TS_MAX;
  private volatile long lastendts=System.currentTimeMillis()/1000+TS_MAX;

  private volatile long timediff=System.currentTimeMillis();
  private volatile long timediff2=System.currentTimeMillis();
  private volatile long timediff3=System.currentTimeMillis();

  public static void main(String[] args) {
    System.out.println(111);
  }

  public static JSONObject parseSb(String str) throws JSONException
  {
    String[] split=str.split("[ ]*,[ ]*",-1);
    for(String s:split)
    {
      if(!s.startsWith("_sb="))
      {
        continue;
      }
      String json=decodeString(s.substring(4));
      return new JSONObject(json);
    }
    return new JSONObject();
  }
 
   private static String decodeString(String args) {
      try {
        return new String(java.net.URLDecoder.decode(args,"UTF-8".getBytes("UTF-8"), "UTF-8");
      } catch (Throwable e) {
        try {
          return new String(java.net.URLDecoder.decode(args,"GBK".getBytes("UTF-8"), "UTF-8");
        } catch (Throwable e2) {
          return args;
        }
      }
    }
 
  
   public String formatRows(String[] clicklog)
   {
     StringBuilder b = new StringBuilder();
      for (int i = 0; i < clicklog.length; i++) {
        b.append(i);
        b.append("=");
        b.append(String.valueOf(clicklog[i]));
        b.append(",");
      }
     
      return b.toString();
   }
 
     private String matchGet(String s,Pattern pat,int index)
     {
       if(s==null)
       {
         return null;
       }
         Matcher mat = pat.matcher(s);

       while (mat.find()) {
             return mat.group(index);
          }
       return null;
     }
    
     private String get_json_object(String s,String key)
     {
       if(s==null)
       {
         return null;
       }
       try{
         JSONObject obj= new JSONObject(s);
         if(obj.has(key))
         {
           return String.valueOf(obj.get(key));
         }
       }catch(Throwable e)
       {
         return null;
       }
      
       return null;
     }
    
    
    
     private static Pattern pat_arg1=Pattern.compile("(.*)(point|_sb)=(\\{.*\\})(.*)");
     private static Pattern pat_reserves=Pattern.compile("(.*)(_sb|point)=(\\{.*\\})(.*)");
     private static Pattern pat_args=Pattern.compile("(.*)(_sb|point)=(\\{.*\\})(.*)");
     private static Pattern pat_last=Pattern.compile("(\\w+)[#]*(.*)");

   private String parseGet(String arg1,String reserves,String args,String key)
   {
     String rtn=null;
     if(rtn==null&&arg1!=null&&arg1.indexOf(key)>=0)
     {
       String lower_decode_arg1=decodeString(String.valueOf(arg1)).toLowerCase();
       rtn=get_json_object(matchGet(lower_decode_arg1,pat_arg1,3),key);
     }
    
     if(rtn==null&&reserves!=null&&reserves.indexOf(key)>=0)
     {
       String lower_decode_reserves=decodeString(String.valueOf(reserves)).toLowerCase();
       rtn=get_json_object(matchGet(lower_decode_reserves,pat_reserves,3),key);
     }
    
     if(rtn==null&&args!=null&&args.indexOf(key)>=0)
     {
       String lower_decode_args=decodeString(String.valueOf(args)).toLowerCase();
       rtn=get_json_object(matchGet(lower_decode_args,pat_args,3),key);
     }
    
     if(rtn==null)
     {
       return null;
     }
    
     return matchGet(rtn,pat_last,1)
   }
  
   private boolean isempty(String refpid)
   {
     return refpid==null||refpid.isEmpty()||refpid.length()<5||refpid.length()>500;
   }
  
  @Override
  public DataIter parseLine(String line) throws InvalidEntryException {
   
   
    try {
      if(line==null)
      {
        return null;
      }
     
     
      String[] clicklog=line.split("\001",-1);
      if(clicklog.length<41)
      {
        return null;
      }
     
           
      if(clicklog[40].isEmpty()||clicklog[40].length()<=5)
      {
        return null;
      }
     
      String app_key = clicklog[13];//应用的标识
      String event_id=clicklog[33];
      String arg1=clicklog[34];
      String args=clicklog[37];
      String reserves =clicklog[29];
     
      boolean match_app_key="12278902".equals(app_key)||"12087020".equals(app_key)||"12500477".equals(app_key);
      if(!match_app_key)
      {
        return null;
      }
     
      boolean match_event_id="21032".equals(event_id)||"2001".equals(event_id)||"30001".equals(event_id);
      if(!match_event_id)
      {
        return null;

      }
     
      boolean match_ad_id=arg1.indexOf("ad_id")>=0||args.indexOf("ad_id")>=0||reserves.indexOf("ad_id")>=0;
      boolean match_refpid=arg1.indexOf("refpid")>=0||args.indexOf("refpid")>=0||reserves.indexOf("refpid")>=0;
     
      if(!(match_ad_id||match_refpid))
      {
        return null;
      }
   
      this.lines++;
      if(this.lines>100000)
      {
        this.laststartts=(System.currentTimeMillis()/1000)-TS_MAX;
        this.lastendts=(System.currentTimeMillis()/1000)+TS_MAX;
        this.lines=0;
      }

      long ts = Long.parseLong(clicklog[40]);
      this.lines_sb++;
      if(this.lines_sb>5000)
      {
        this.lines_sb=0;
        long nowts=System.currentTimeMillis();
        if(nowts-timediff>30000)
        {
          timediff=nowts;
          LOG.info("parseLine_sb_"+formatDayMin.format(new Date(ts*1000))+" "+formatRows(clicklog));
        }
      }

      if(ts<laststartts||ts>lastendts)
      {
        return null;
      }
       
      String refpid=parseGet(arg1, reserves, args, "refpid");;
      String ad_id=parseGet(arg1, reserves, args, "ad_id");;
     
      boolean iswireless=true;
      if(refpid==null&&ad_id!=null)
      {
        String strday=formatDay.format(new Date(ts*1000));
        refpid=FetchAdid2Pid.fetch().get(strday+"@"+String.valueOf(ad_id));
        iswireless=false;
      }
     
      if(isempty(refpid))
      {
        this.lines_sb3++;
        if(this.lines_sb3>100)
        {
          this.lines_sb3=0;
          long nowts=System.currentTimeMillis();
          if(nowts-timediff3>30000)
          {
            timediff3=nowts;
            LOG.info("parse error :"+formatDayMin.format(new Date(ts*1000))+" "+formatRows(clicklog));
          }
        }
       
        return null;
      }
      return new DataIterParse(ts,clicklog,refpid,iswireless);
    } catch (Throwable nfe) {
      if(groupCreateerror<100)
      {
        LOG.error("InvalidEntryException:"+line,nfe);
        groupCreateerror++;
      }
     
      throw new InvalidEntryException("Invalid log `" + line + "'\n" , nfe);
    }
  }
 
  public static class DataIterParse implements DataIter{
    private String[] pvlog=null;
    String refpid=null;
    boolean iswareless;

    long ts;
    public DataIterParse(long ts,String[] pvlog,String refpid,boolean iswareless) {
      this.pvlog = pvlog;
      this.ts=ts;
      this.refpid=refpid;
      this.iswareless=iswareless;
    }

    @Override
    public boolean next() {
      return false;
    }


    @Override
    public Number[] getSum() {
      return new Number[] { 0, 0, 0, 0, 0, 1, 0, 0 };
    }
   
    @Override
    public long getTs() {
       return (ts/10)*10000;
    }
   

      //wdm_v3_user_track
    @Override
    public Object[] getGroup() {
      long ts300 = (this.ts / 300) * 300000;
      Date d = new Date(ts300);
      String channel = String.valueOf(pvlog[20]).toLowerCase();
      if (channel.indexOf("android") >= 0) {
        channel = "android";
      } else if (channel.indexOf("iphone") >= 0||channel.indexOf("ios") >= 0) {
        channel = "ios";
      } else {
        channel = "other";
      }
      return new String[] {
          String.valueOf(formatDay.format(d)),
          String.valueOf(formatMin.format(d)),
          "wireless",
          "app_log_parse",
          (this.iswareless?"wireless":"pc"),
          String.valueOf(this.refpid) // media_pid,需要解析 reserves
          , channel // channel
          ,DebugVersion.version
      };

    }
  }
 
 
   
    public static String getNameNodecode(String url,String keyname)
  {
      try{
      String[] tem = url.split("\\?", 2);
      String params=tem[0];
      if (tem.length >= 2){
        params=tem[1];
      }
   
      for (String s: params.split("&", -1)) {
          String[] tem1 = s.split("=", -1);
          String key = decodeString(tem1[0]);
        if(key.equals(keyname))
        {
          String value = (tem1.length < 2
              ? "" : decodeString(tem1[1]));
          return value;
        }
      }
      }catch(Throwable e){}
    return null;
   }
   
    public static String getName(String url,String keyname)
  {
      try{
      String[] tem = decodeString(url).split("\\?", 2);
      String params=tem[0];
      if (tem.length >= 2){
        params=tem[1];
      }
   
      for (String s: params.split("&", -1)) {
          String[] tem1 = s.split("=", -1);
          String key = decodeString(tem1[0]);
        if(key.equals(keyname))
        {
          String value = (tem1.length < ? "" : decodeString(tem1[1]));
          return value;
        }
      }
      }catch(Throwable e){}
    return null;
   }
 

 
  private static String[] colname={
    "thedate"
    ,"miniute_5"
    ,"source"
    ,"sub_source"
    ,"media_name"
    ,"media_pid"
    ,"channel"
    ,"o2o"
};

 
  private static String[] colSumName={
    "pv_2"
    ,"click_1"
    ,"click_2"
    ,"promise_click"
    ,"pc_2_wap"
    ,"weakup"
    ,"backup_1"
    ,"backup_2"
};
 
 
  @Override
  public String[] getSumName() {
    return colSumName;

  }

  @Override
  public String getTableName() {
    return "rpt_adpmp_3_8_online";
  }
 
    private static SimpleDateFormat formatDayMin = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

    private static SimpleDateFormat formatDay = new SimpleDateFormat("yyyyMMdd");
    private static SimpleDateFormat formatMin = new SimpleDateFormat("HHmm");


  @Override
  public String[] getGroupName() {
    return colname;
  }
}

 
TOP

Related Classes of com.alimama.quanjingmonitor.mdrillImport.parse.app_log_parse$DataIterParse

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.