Commit e7b2c71b authored by mengxiangxuan's avatar mengxiangxuan

00

parent a9dc207f
......@@ -49,6 +49,7 @@ sql_ctr_roc = '''select ctr_label,pre_ctr,
WHEN rcmd_type=241 THEN 'fnn2'
WHEN rcmd_type=242 THEN 'deepFm2'
WHEN rcmd_type=243 THEN 'dcn'
WHEN rcmd_type=244 THEN 'dcn2'
END AS rcmd_name
from advert.dws_advert_order_wide_v4_level_6_di
where dt='{}' and pre_ctr is not null
......@@ -86,6 +87,7 @@ sql_cvr_roc = '''select cvr_label,pre_cvr,
WHEN rcmd_type=241 THEN 'fnn2'
WHEN rcmd_type=242 THEN 'deepFm2'
WHEN rcmd_type=243 THEN 'dcn'
WHEN rcmd_type=244 THEN 'dcn2'
END AS rcmd_name
from
(select * from advert.dws_advert_order_wide_v4_level_6_di where dt='{}' and charge_cnt>0) a
......@@ -104,13 +106,21 @@ cursor.execute(sql_cvr_roc)
cvr_roc_data=pd.DataFrame(cursor.fetchall(),
columns=['cvr_label','pre_cvr','rcmd_name'])
# alg=['alg-4.0','alg-4.0.1',
# 'alg-4.0.2','alg-4.0.4','alg-4.1','alg-4.2.1',
# 'alg-610','alg-act-tab','alg-online-learn','alg-online-learn223',
# 'alg-online-learn224','alg-online-weight',
# 'alg-app-optimize', 'alg-act-count',
# 'alg-fm-backend','BTM_AND_PC_31','BTM_AND_PC_32','BTM_AND_PC_33','Material_reform',
# 'v501','v502','fix_bias_2','fix_bias_3','fnn2','deepFm2','dcn']
alg=['alg-4.0','alg-4.0.1',
'alg-4.0.2','alg-4.0.4','alg-4.1','alg-4.2.1',
'alg-610','alg-act-tab','alg-online-learn','alg-online-learn223',
'alg-online-learn224','alg-online-weight',
'alg-app-optimize', 'alg-act-count',
'alg-fm-backend','BTM_AND_PC_31','BTM_AND_PC_32','BTM_AND_PC_33','Material_reform',
'v501','v502','fix_bias_2','fix_bias_3','fnn2','deepFm2','dcn']
'alg-fm-backend','BTM_AND_PC_31','Material_reform',
'v501','v502','fix_bias_2','fix_bias_3','fnn2','deepFm2','dcn2']
########roc曲线###=============================================================================
part=[(0,0),(0,1),(0,2),(0,3),(1,0),(1,1),(1,2),(1,3),(2,0),(2,1),(2,2),(2,3),(3,0),(3,1),(3,2),(3,3),
......
......@@ -9,12 +9,12 @@ cursor = hive.connect(host='10.50.10.11', port=10000, username='mengxiangxuan',
now = datetime.datetime.now()
today=now.strftime('%Y-%m-%d')
delta1 = datetime.timedelta(days=1)
deltan = datetime.timedelta(days=7)
deltan = datetime.timedelta(days=5)
yestoday1=(now - delta1).strftime('%Y-%m-%d')
yestodayn=(now - deltan).strftime('%Y-%m-%d')
#所有ad+slot(ad有转化埋点且fee!=0)
#有发券广告位id约3500
#有发券广告位id约2300
print('all slotid')
sql11='''
select slotid,app_id,count(1) cnt from advert.dws_advert_order_wide_v4_level_3_di
......@@ -25,7 +25,7 @@ cursor.execute(sql11)
df11 = pd.DataFrame(cursor.fetchall())
df11.columns=['slotid','app_id','cnt']
df11=df11.ix[df11['slotid'].notnull()].astype('int')
df11=df11.ix[df11['cnt']>10] #剔除流量很低的广告位
df11=df11.ix[df11['cnt']>100] #剔除流量很低的广告位
df11=df11.drop(['cnt'],axis=1)
#全部有转化埋点非免费广告约1000
......@@ -52,16 +52,12 @@ sql='''
select advert_id,slotid
from (
select a.advert_id,a.slotid,
sum(if(act_click_cnt is not null ,1,0)) as act_click_cnt,
count(1) as launch_cnt
from
(select * from advert.dws_advert_order_wide_v4_level_3_di
where dt>='{0}' and dt<='{1}') a
left outer join
tmp.tmp_cpc_act_advert_df b
on a.advert_id = b.advert_id
group by a.advert_id,a.slotid) t
where launch_cnt>100
where launch_cnt>50
'''.format(yestodayn,yestoday1)
cursor.execute(sql)
ad_slot_launch = pd.DataFrame(cursor.fetchall())
......
......@@ -12,11 +12,11 @@ delta1 = datetime.timedelta(days=1)
deltan = datetime.timedelta(days=7)
yestoday1=(now - delta1).strftime('%Y-%m-%d')
yestodayn=(now - deltan).strftime('%Y-%m-%d')
#from hdfs.client import Client
from ad_slot_set import *
#from ad_slot_not_orient import *
cursor = hive.connect(host='10.50.10.11', port=10000, username='mengxiangxuan', database='default').cursor()
#from hdfs.client import Client
#from ad_slot_not_orient import *
from ad_slot_set import *
#流量信息
......@@ -115,8 +115,8 @@ ad_info = pd.DataFrame(cursor.fetchall())
ad_info.columns=['advert_id','account_id','match_tag_nums','fee']
ad_info=ad_info.ix[ad_info['fee']>0]
ad_info_match_slot=pd.merge(ad_slot_df,ad_info,how='left',on='advert_id')
ad_info_match_slot['account_id']=ad_info_match_slot['account_id'].fillna(value=0).astype('int').astype('str')
ad_info_match_slot=pd.merge(ad_slot_df,ad_info,how='inner',on='advert_id')
ad_info_match_slot=ad_info_match_slot.dropna()
featrue_id=[
......@@ -213,9 +213,9 @@ df2.to_csv('ad_info.csv',index=False,sep='|')
#[slot,配置]发券最小arpu
#[slot,配置]发券平均arpu
sql_min_arpu='''
select slotid,min(arpu) min_arpu from
select slotid,avg(arpu) min_arpu from
(select slotid,advert_id,orientation_id,
sum(charge_fees) cost,
count(1) launch_cnt,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment