00

a9dc207f · mengxiangxuan · 38c4dde2 · a9dc207f · a9dc207f
Commit a9dc207f authored Oct 15, 2018 by mengxiangxuan
Show whitespace changes
Inline Side-by-side

Showing with 42 additions and 14 deletions

ad_slot_set.py auto-spread/auto_manage/ad_slot_set.py +4 -3

samples_create.py auto-spread/auto_manage/samples_create.py +38 -11

No files found.
--- a/auto-spread/auto_manage/ad_slot_set.py
+++ b/auto-spread/auto_manage/ad_slot_set.py
@@ -25,6 +25,7 @@ cursor.execute(sql11)
 df11 = pd.DataFrame(cursor.fetchall())
 df11.columns=['slotid','app_id','cnt']
 df11=df11.ix[df11['slotid'].notnull()].astype('int')
+df11=df11.ix[df11['cnt']>10] #剔除流量很低的广告位
 df11=df11.drop(['cnt'],axis=1)
 #全部有转化埋点非免费广告约1000
@@ -37,7 +38,7 @@ cursor.execute(sql12)
 df12 = pd.DataFrame(cursor.fetchall())
 df12.columns=['advert_id']
-#全部ad_slot组合约 250w
+#全部ad_slot组合约 300w
 print('all ad_slot')
 df1=pd.DataFrame(columns=['slotid','advert_id'])
 for i in df12['advert_id']:
@@ -46,7 +47,7 @@ for i in df12['advert_id']:
    df1=df1.append(df_temp)
-#有历史发券且置信或能拿到预估cvr的  ad+slot 约12w
+#有历史发券且置信或能拿到预估cvr的  ad+slot 约10w
 sql='''
 select advert_id,slotid
 from (
@@ -60,7 +61,7 @@ left outer join
 tmp.tmp_cpc_act_advert_df b
 on a.advert_id = b.advert_id
 group by a.advert_id,a.slotid) t
-where act_click_cnt>2 or launch_cnt>100
+where  launch_cnt>100
 '''.format(yestodayn,yestoday1)
 cursor.execute(sql)
 ad_slot_launch = pd.DataFrame(cursor.fetchall())

--- a/auto-spread/auto_manage/samples_create.py
+++ b/auto-spread/auto_manage/samples_create.py
@@ -72,7 +72,7 @@ sql2='''CREATE TABLE if not exists advert.dws_not_luanch_create_samples_mxx as
 	  where dt>='{0}' and dt<='{1}') c  
      distribute by slotid   sort by slotid,rank_num desc  
      )a  
-  where rnb<=500'''.format(yestodayn,yestoday1)
+  where rnb<=200'''.format(yestodayn,yestoday1)
 #未定向组合
 # sql3='''drop table advert.dws_not_luanch_slot_ad_mxx'''
 # sql4='''create table advert.dws_not_luanch_slot_ad_mxx  (slotid string,advert_id string)
@@ -100,15 +100,20 @@ cursor.execute(sql2)
 ###广告相关信息
-sql='''select distinct advert_id,account_id,case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
+sql='''select advert_id,account_id,
+     case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
     when length(match_tag_nums)=22 then substr(match_tag_nums,13)
-     else match_tag_nums end match_tag_nums
+     else match_tag_nums end match_tag_nums,avg(fee) fee
 from  advert.dws_advert_order_wide_v4_level_6_di
-where dt>='{0}' and dt<='{1}'
+where dt>='{0}' and dt<='{1}' and advert_id is not null
-and advert_id is not null'''.format(yestodayn,yestoday1)
+group by advert_id,account_id,
+     case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
+     when length(match_tag_nums)=22 then substr(match_tag_nums,13)
+     else match_tag_nums end'''.format(yestodayn,yestoday1)
 cursor.execute(sql)
 ad_info = pd.DataFrame(cursor.fetchall())
-ad_info.columns=['advert_id','account_id','match_tag_nums']
+ad_info.columns=['advert_id','account_id','match_tag_nums','fee']
+ad_info=ad_info.ix[ad_info['fee']>0]
 ad_info_match_slot=pd.merge(ad_slot_df,ad_info,how='left',on='advert_id')
 ad_info_match_slot['account_id']=ad_info_match_slot['account_id'].fillna(value=0).astype('int').astype('str')
@@ -198,16 +203,38 @@ cursor.execute(sql)
 df1 = pd.DataFrame(cursor.fetchall())
 df1.columns=list(la)
 df1=df1.astype('str')
-df1.to_csv('not_luanch_scene.csv',index=False,sep='|')
+df1.to_csv('scene_info.csv',index=False,sep='|')
 #广告特征数据
 df2=ad_info_match_slot
-df2.columns=['f108001','f101001','f106001','f102001']
+df2.columns=['f108001','f101001','f106001','f102001','fee']
 df2=df2.astype('str')
-df2.to_csv('not_luanch_ad_info.csv',index=False,sep='|')
+df2.to_csv('ad_info.csv',index=False,sep='|')
-slotid=list(set(df1['f108001']) & set(df2['f108001']))
-pd.DataFrame(slotid).to_csv('slotid.csv',index=None)
+#[slot，配置]发券最小arpu
+sql_min_arpu='''
+select slotid,min(arpu) min_arpu from
+(select slotid,advert_id,orientation_id,
+sum(charge_fees) cost,
+count(1) launch_cnt,
+sum(charge_fees)/count(1) arpu
+from advert.dws_advert_order_wide_v4_level_6_di
+where dt>='{0}' and dt<='{1}'
+group by slotid,advert_id,orientation_id) a
+where arpu>0 and launch_cnt>5
+group by slotid
+'''.format(yestodayn,yestoday1)
+cursor.execute(sql_min_arpu)
+slot_min_arpu = pd.DataFrame(cursor.fetchall())
+slot_min_arpu.columns=['slotid','min_arpu']
+slot_min_arpu=slot_min_arpu.ix[pd.notnull(slot_min_arpu['slotid'])]
+slot_min_arpu['slotid']=slot_min_arpu['slotid'].astype('int').astype('str')
+slotid_df=pd.DataFrame(list(set(df1['f108001']) & set(df2['f108001'])),columns=['slotid'])
+slot_min_arpu=pd.merge(slot_min_arpu,slotid_df,how='inner',on=['slotid'])
+slot_min_arpu.to_csv('slot_min_arpu.csv',index=None)
 #############################
 #java读取 场景info,广告info，slotid，拼接样本，流式处理