Commit 24efd996 authored by mxx's avatar mxx

1

parent e69f4ef2
......@@ -27,288 +27,305 @@ yestoday10 = (now - delta10).strftime('%Y-%m-%d')
yestoday15 = (now - delta15).strftime('%Y-%m-%d')
# 历史无数据---------------------------------------------------
# 解出广告位-广告维度预估cvr
nolunch_pre_cvr = pd.read_table(r'slot_ad_cvr.txt',sep=',')
nolunch_pre_cvr.columns = ['slotad','ctr','cvr']
nolunch_pre_cvr[['slotid','advert_id']]=nolunch_pre_cvr['slotad'].str.split('_',expand=True)
#
# # 匹配广告行业
# sql = '''select id,
# case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
# when length(match_tag_nums)=22 then substr(match_tag_nums,13)
# else match_tag_nums end match_tag_nums
# from advert.dwd_advert_df
# where dt='{0}' and length(match_tag_nums) in (10,16,22) '''.format(yestoday1)
# cursor.execute(sql)
# advert_trid = pd.DataFrame(cursor.fetchall())
# advert_trid.columns = ['advert_id', 'match_tag_nums']
# advert_trid['advert_id'] = advert_trid['advert_id'].astype('str')
# pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, advert_trid, how='left', on=['advert_id'])
#
# # 纠偏
# # 2 历史数据广告维度预估值和统计值偏差纠偏
# sql = '''
# select advert_id,avg(pre_cvr)/avg(stat_cvr) pre_diff
# from logs.dwd_nezha_result_log_di
# where dt>='{0}' and dt<='{1}' and order_id is not null
# group by advert_id
# '''.format(yestoday7,yestoday1)
# cursor.execute(sql)
# ad_pre_diff = pd.DataFrame(cursor.fetchall())
# ad_pre_diff.columns = ['advert_id', 'ad_diff']
# ad_pre_diff['advert_id'] = ad_pre_diff['advert_id'].astype('str')
#
# # 预估cvr纠偏############
# pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, ad_pre_diff, how='left', on=['advert_id'])
#
# pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['ad_diff'] > 1.2, 'fix_cvr'] = \
# pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['ad_diff'] > 1.2, 'cvr'] / pre_slotad_stat_cvr.ix[
# pre_slotad_stat_cvr['ad_diff'] > 1.2, 'ad_diff']
#
# pre_slotad_stat_cvr.ix[pd.isnull(pre_slotad_stat_cvr['fix_cvr']), 'fix_cvr'] = pre_slotad_stat_cvr.ix[
# pd.isnull(pre_slotad_stat_cvr['fix_cvr']), 'cvr']
nolunch_pre_cvr=nolunch_pre_cvr.sort_index(by=['cvr'],ascending=False)
pre_slotad_cvr_good=nolunch_pre_cvr.groupby('advert_id').head(500)
pre_slotad_cvr_good['cvrSet']=pre_slotad_cvr_good['cvr'].map(lambda x:[round(x,6)]*3)
pre_slotad_cvr_good['biasSet']=[[-1.0,-1.0,-1.0]]*pre_slotad_cvr_good.shape[0]
pre_slotad_cvr_good['confidenceSet']=[[0.0,0.0,0.0]]*pre_slotad_cvr_good.shape[0]
pre_slotad_cvr_good['priceSection']=[[0.0,0.0]]*pre_slotad_cvr_good.shape[0]
pre_slotad_cvr_good['key'] = "NZ_K076_" + pre_slotad_cvr_good['slotad']
pre_slotad_cvr_good['value'] = pre_slotad_cvr_good[[ 'cvrSet', 'biasSet', 'confidenceSet','priceSection']].apply(lambda x: x.to_json(orient='index'), axis=1)
pre_slotad_cvr_good.index = range(pre_slotad_cvr_good.shape[0])
# 连接nezha-redis
pool = redis.ConnectionPool(host='r-bp18da0abeaddc94285.redis.rds.aliyuncs.com',
password='hteK73Zxx3ji9LGCy2jBAZDJ6', port=6379, db=0)
r = redis.Redis(connection_pool=pool)
pipe = r.pipeline(transaction=True)
#先删除昨日候选集
print('pre_slotad_cvr_good2-----')
pre_slotad_cvr_good_old=pd.read_csv('pre_slotad_cvr_good2.csv')
pre_slotad_cvr_good_old.to_csv('pre_slotad_cvr_good_old2.csv', index=False)
for i in pre_slotad_cvr_good_old.index:
key = pre_slotad_cvr_good_old.ix[i, 'key']
value = pre_slotad_cvr_good_old.ix[i, 'value']
pipe.set(key, value, ex=900)
if i % 5000 == 0:
pipe.execute()
print(i)
time.sleep(0.5)
pipe.execute()
# 转化成key: NZ_K76_slotId_advertId, value:json cvr&预估偏差&置信度
for i in pre_slotad_cvr_good.index:
key = pre_slotad_cvr_good.ix[i, 'key']
value = pre_slotad_cvr_good.ix[i, 'value']
pipe.set(key, value, ex=432000)
if i % 5000 == 0:
pipe.execute()
print(i)
time.sleep(0.5)
pipe.execute()
pre_slotad_cvr_good.to_csv('pre_slotad_cvr_good2.csv', index=False)
from dingtalkchatbot.chatbot import DingtalkChatbot
# WebHook地址
webhook = 'https://oapi.dingtalk.com/robot/send?access_token=4f28ce996ab4f2601c0362fbfd0d48f58b0250a76953ff117ca41e9f1ec8e565'
# 初始化机器人小丁
xiaoding = DingtalkChatbot(webhook)
at_mobiles = ['18668032242']
try:
# 历史无数据---------------------------------------------------
# 解出广告位-广告维度预估cvr
nolunch_pre_cvr = pd.read_table(r'slot_ad_cvr.txt',sep=',')
nolunch_pre_cvr.columns = ['slotad','ctr','cvr']
nolunch_pre_cvr[['slotid','advert_id']]=nolunch_pre_cvr['slotad'].str.split('_',expand=True)
#
# # 匹配广告行业
# sql = '''select id,
# case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
# when length(match_tag_nums)=22 then substr(match_tag_nums,13)
# else match_tag_nums end match_tag_nums
# from advert.dwd_advert_df
# where dt='{0}' and length(match_tag_nums) in (10,16,22) '''.format(yestoday1)
# cursor.execute(sql)
# advert_trid = pd.DataFrame(cursor.fetchall())
# advert_trid.columns = ['advert_id', 'match_tag_nums']
# advert_trid['advert_id'] = advert_trid['advert_id'].astype('str')
# pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, advert_trid, how='left', on=['advert_id'])
#
# 纠偏
# 历史数据广告维度预估值和统计值偏差纠偏
sql = '''
select advert_id,avg(pre_cvr)/avg(stat_cvr) pre_diff
from logs.dwd_nezha_result_log_di
where dt>='{0}' and dt<='{1}' and order_id is not null
group by advert_id
'''.format(yestoday7,yestoday1)
cursor.execute(sql)
ad_pre_diff = pd.DataFrame(cursor.fetchall())
ad_pre_diff.columns = ['advert_id', 'ad_diff']
ad_pre_diff['advert_id'] = ad_pre_diff['advert_id'].astype('str')
# 预估cvr纠偏############
pre_slotad_stat_cvr = pd.merge(nolunch_pre_cvr, ad_pre_diff, how='left', on=['advert_id'])
pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['ad_diff'] > 1.5, 'fix_cvr'] = \
pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['ad_diff'] > 1.5, 'cvr'] / pre_slotad_stat_cvr.ix[
pre_slotad_stat_cvr['ad_diff'] > 1.5, 'ad_diff']
pre_slotad_stat_cvr.ix[pd.isnull(pre_slotad_stat_cvr['fix_cvr']), 'fix_cvr'] = pre_slotad_stat_cvr.ix[
pd.isnull(pre_slotad_stat_cvr['fix_cvr']), 'cvr']
nolunch_pre_cvr=pre_slotad_stat_cvr.sort_index(by=['fix_cvr'],ascending=False)
pre_slotad_cvr_good=nolunch_pre_cvr.groupby('advert_id').head(500)
pre_slotad_cvr_good['cvrSet']=pre_slotad_cvr_good['fix_cvr'].map(lambda x:[round(x,6)]*3)
pre_slotad_cvr_good['biasSet']=[[-1.0,-1.0,-1.0]]*pre_slotad_cvr_good.shape[0]
pre_slotad_cvr_good['confidenceSet']=[[0.0,0.0,0.0]]*pre_slotad_cvr_good.shape[0]
pre_slotad_cvr_good['priceSection']=[[0.0,0.0]]*pre_slotad_cvr_good.shape[0]
pre_slotad_cvr_good['key'] = "NZ_K076_" + pre_slotad_cvr_good['slotad']
pre_slotad_cvr_good['value'] = pre_slotad_cvr_good[[ 'cvrSet', 'biasSet', 'confidenceSet','priceSection']].apply(lambda x: x.to_json(orient='index'), axis=1)
pre_slotad_cvr_good.index = range(pre_slotad_cvr_good.shape[0])
# 连接nezha-redis
pool = redis.ConnectionPool(host='r-bp18da0abeaddc94285.redis.rds.aliyuncs.com',
password='hteK73Zxx3ji9LGCy2jBAZDJ6', port=6379, db=0)
r = redis.Redis(connection_pool=pool)
pipe = r.pipeline(transaction=True)
#先删除昨日候选集
print('pre_slotad_cvr_good2-----')
pre_slotad_cvr_good_old=pd.read_csv('pre_slotad_cvr_good2.csv')
pre_slotad_cvr_good_old.to_csv('pre_slotad_cvr_good_old2.csv', index=False)
for i in pre_slotad_cvr_good_old.index:
key = pre_slotad_cvr_good_old.ix[i, 'key']
value = pre_slotad_cvr_good_old.ix[i, 'value']
pipe.set(key, value, ex=900)
if i % 5000 == 0:
pipe.execute()
print(i)
time.sleep(0.5)
pipe.execute()
# 转化成key: NZ_K76_slotId_advertId, value:json cvr&预估偏差&置信度
for i in pre_slotad_cvr_good.index:
key = pre_slotad_cvr_good.ix[i, 'key']
value = pre_slotad_cvr_good.ix[i, 'value']
pipe.set(key, value, ex=432000)
if i % 5000 == 0:
pipe.execute()
print(i)
time.sleep(0.5)
pipe.execute()
pre_slotad_cvr_good.to_csv('pre_slotad_cvr_good2.csv', index=False)
except:
xiaoding.send_text(msg='候选集(无历史数据部分存储)程序异常!!!请排查!', at_mobiles=at_mobiles)
else:
xiaoding.send_text(msg='候选集(无历史数据部分)存储成功', at_mobiles=at_mobiles)
###########################################################################################
##--------------------------------------------------------------------------------------
###----广告粒度历史数据
#广告高中低出价预测发券 统计cvr, bias
sql_ad_fee0 = '''
select app_id,slotid,advert_id,
sum(charge_fees) cost,
sum(charge_fees)/sum(act_click_cnt) costconvert,
sum(act_click_cnt)/sum(charge_cnt) stat_cvr,
avg(pre_cvr) pre_cvr,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias ,
avg(b.fee0) fee0
from
(select * from advert.dws_advert_order_wide_v4_level_6_di where dt>='{0}' and dt<='{1}') a
left outer join
(select advert_id,percentile(fee,0.33) fee0
from advert.dws_advert_order_wide_v4_level_6_di
where dt>='{0}' and dt<='{1}' and fee>0
group by advert_id) b
on a.advert_id=b.advert_id
where a.fee<b.fee0 and a.slotid is not null
group by app_id,slotid,a.advert_id
'''.format(yestoday15, yestoday1)
sql_ad_fee1 = '''
select app_id,slotid,advert_id,
sum(charge_fees) cost,
sum(charge_fees)/sum(act_click_cnt) costconvert,
sum(act_click_cnt)/sum(charge_cnt) stat_cvr,
avg(pre_cvr) pre_cvr,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias,
avg(b.fee1) fee1
from
(select * from advert.dws_advert_order_wide_v4_level_6_di where dt>='{0}' and dt<='{1}') a
left outer join
(select advert_id,percentile(fee,0.66) fee1
from advert.dws_advert_order_wide_v4_level_6_di
where dt>='{0}' and dt<='{1}' and fee>0
group by advert_id) b
on a.advert_id=b.advert_id
where a.fee<b.fee1 and a.slotid is not null
group by app_id,slotid,a.advert_id
'''.format(yestoday15, yestoday1)
sql_ad_fee2='''select app_id,slotid,advert_id,
sum(charge_fees) cost,
sum(charge_fees)/sum(act_click_cnt) costconvert,
sum(act_click_cnt)/sum(charge_cnt) stat_cvr,
avg(pre_cvr) pre_cvr,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias
from advert.dws_advert_order_wide_v4_level_6_di
where dt>='{0}' and dt<='{1}' and slotid is not null
group by app_id,slotid,advert_id
'''.format(yestoday15, yestoday1)
sql_ad_costconvert='''select advert_id,
sum(charge_fees)/sum(act_click_cnt) ad_costconvert
from advert.dws_advert_order_wide_v4_level_6_di
where dt>='{0}' and dt<='{1}'
group by advert_id
'''.format(yestoday15, yestoday1)
cursor.execute(sql_ad_fee0)
stat_slotad_cvr_fee0 = pd.DataFrame(cursor.fetchall())
stat_slotad_cvr_fee0.columns=['app_id', 'slotid', 'advert_id','cost0','costconvert0','stat_cvr0','pre_cvr0','bias0','fee0']
stat_slotad_cvr_fee0=stat_slotad_cvr_fee0.ix[stat_slotad_cvr_fee0['cost0']>0]
cursor.execute(sql_ad_fee1)
stat_slotad_cvr_fee1 = pd.DataFrame(cursor.fetchall())
stat_slotad_cvr_fee1.columns=['app_id', 'slotid', 'advert_id','cost1','costconvert1','stat_cvr1','pre_cvr1','bias1','fee1']
stat_slotad_cvr_fee1=stat_slotad_cvr_fee1.ix[stat_slotad_cvr_fee1['cost1']>0]
cursor.execute(sql_ad_fee2)
stat_slotad_cvr_fee2 = pd.DataFrame(cursor.fetchall())
stat_slotad_cvr_fee2.columns=['app_id', 'slotid', 'advert_id','cost2','costconvert2','stat_cvr2','pre_cvr2','bias2']
stat_slotad_cvr_fee2=stat_slotad_cvr_fee2.ix[stat_slotad_cvr_fee2['cost2']>0]
cursor.execute(sql_ad_costconvert)
ad_costconvert = pd.DataFrame(cursor.fetchall())
ad_costconvert.columns=['advert_id','ad_costconvert']
stat_slotad_cvr_fee21=pd.merge(stat_slotad_cvr_fee2,stat_slotad_cvr_fee1,how='left',on=['app_id', 'slotid', 'advert_id'])
stat_slotad_cvr_fee210=pd.merge(stat_slotad_cvr_fee21,stat_slotad_cvr_fee0,how='left',on=['app_id', 'slotid', 'advert_id'])
stat_slotad_cvr=pd.merge(stat_slotad_cvr_fee210,ad_costconvert,how='left',on=['advert_id'])
stat_slotad_cvr=stat_slotad_cvr.ix[pd.notnull(stat_slotad_cvr['ad_costconvert'])]
stat_slotad_cvr['confidence0']=stat_slotad_cvr['cost0']/(stat_slotad_cvr['ad_costconvert']*5)
stat_slotad_cvr['confidence1']=stat_slotad_cvr['cost1']/(stat_slotad_cvr['ad_costconvert']*5)
stat_slotad_cvr['confidence2']=stat_slotad_cvr['cost2']/(stat_slotad_cvr['ad_costconvert']*5)
stat_slotad_cvr.ix[stat_slotad_cvr['confidence0']>1,'confidence0']=1
stat_slotad_cvr.ix[stat_slotad_cvr['confidence1']>1,'confidence1']=1
stat_slotad_cvr.ix[stat_slotad_cvr['confidence2']>1,'confidence2']=1
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence2']>=0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr2'])),
'stat_cvr2']=0
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence2']<0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr2'])),'stat_cvr2']=\
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence2']<0.2) & (pd.isnull(stat_slotad_cvr['stat_cvr2'])),'pre_cvr2']
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence1']>=0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr1'])),
'stat_cvr1']=0
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence1']<0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr1'])),'stat_cvr1']=\
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence1']<0.2) & (pd.isnull(stat_slotad_cvr['stat_cvr1'])),'pre_cvr1']
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence0']>=0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr0'])),
'stat_cvr0']=0
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence0']<0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr0'])),'stat_cvr0']=\
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence0']<0.2) & (pd.isnull(stat_slotad_cvr['stat_cvr0'])),'pre_cvr0']
stat_slotad_cvr[['stat_cvr0','stat_cvr1','stat_cvr2']]=stat_slotad_cvr[['stat_cvr0','stat_cvr1','stat_cvr2']].fillna(value=0)
stat_slotad_cvr.ix[(stat_slotad_cvr['stat_cvr2']==0) & pd.isnull(stat_slotad_cvr['bias2']),'bias2']=5.0
stat_slotad_cvr.ix[(stat_slotad_cvr['stat_cvr2']!=0) & pd.isnull(stat_slotad_cvr['bias2']),'bias2']=1.5
stat_slotad_cvr[['bias0','bias1']]=stat_slotad_cvr[['bias0','bias1']].fillna(value=5.0)
stat_slotad_cvr['confidence0']=stat_slotad_cvr['confidence0'].fillna(value=0).round(2).astype('str')
stat_slotad_cvr['confidence1']=stat_slotad_cvr['confidence1'].fillna(value=0).round(2).astype('str')
stat_slotad_cvr['confidence2']=stat_slotad_cvr['confidence2'].fillna(value=0).round(2).astype('str')
stat_slotad_cvr['confidenceSet']=stat_slotad_cvr['confidence0']+','+stat_slotad_cvr['confidence1']+','+stat_slotad_cvr['confidence2']
stat_slotad_cvr['confidenceSet']=stat_slotad_cvr['confidenceSet'].map(lambda x:x.split(','))
stat_slotad_cvr['stat_cvr0']=stat_slotad_cvr['stat_cvr0'].fillna(value=0).round(6).astype('str')
stat_slotad_cvr['stat_cvr1']=stat_slotad_cvr['stat_cvr1'].fillna(value=0).round(6).astype('str')
stat_slotad_cvr['stat_cvr2']=stat_slotad_cvr['stat_cvr2'].fillna(value=0).round(6).astype('str')
stat_slotad_cvr['cvrSet']=stat_slotad_cvr['stat_cvr0']+','+stat_slotad_cvr['stat_cvr1']+','+stat_slotad_cvr['stat_cvr2']
stat_slotad_cvr['cvrSet']=stat_slotad_cvr['cvrSet'].map(lambda x:x.split(','))
stat_slotad_cvr['bias0']=stat_slotad_cvr['bias0'].fillna(value=5.0).round(6).astype('str')
stat_slotad_cvr['bias1']=stat_slotad_cvr['bias1'].fillna(value=5.0).round(6).astype('str')
stat_slotad_cvr['bias2']=stat_slotad_cvr['bias2'].fillna(value=5.0).round(6).astype('str')
stat_slotad_cvr['biasSet']=stat_slotad_cvr['bias0']+','+stat_slotad_cvr['bias1']+','+stat_slotad_cvr['bias2']
stat_slotad_cvr['biasSet']=stat_slotad_cvr['biasSet'].map(lambda x:x.split(','))
stat_slotad_cvr['fee0']=stat_slotad_cvr['fee0'].fillna(value=9999.0).round(1).astype('str')
stat_slotad_cvr['fee1']=stat_slotad_cvr['fee1'].fillna(value=9999.0).round(1).astype('str')
stat_slotad_cvr['priceSection']=stat_slotad_cvr['fee0']+','+stat_slotad_cvr['fee1']
stat_slotad_cvr['priceSection']=stat_slotad_cvr['priceSection'].map(lambda x:x.split(','))
#选广告位
stat_slotad_cvr_good= stat_slotad_cvr[['slotid', 'advert_id', 'cvrSet', 'biasSet', 'confidenceSet','priceSection']]
stat_slotad_cvr_good[['slotid', 'advert_id']]=stat_slotad_cvr_good[['slotid', 'advert_id']].astype('str')
stat_slotad_cvr_good['key'] = "NZ_K076_" + stat_slotad_cvr_good['slotid'] + "_" + stat_slotad_cvr_good['advert_id']
stat_slotad_cvr_good['value'] = stat_slotad_cvr_good[[ 'cvrSet', 'biasSet', 'confidenceSet','priceSection']].apply(lambda x: x.to_json(orient='index'), axis=1)
stat_slotad_cvr_good.index = range(stat_slotad_cvr_good.shape[0])
# 连接nezha-redis
pool = redis.ConnectionPool(host='r-bp18da0abeaddc94285.redis.rds.aliyuncs.com',
password='hteK73Zxx3ji9LGCy2jBAZDJ6', port=6379, db=0)
r = redis.Redis(connection_pool=pool)
pipe = r.pipeline(transaction=True)
#先删除昨日候选集
print('stat_slotad_cvr_good2-----')
stat_slotad_cvr_good_old=pd.read_csv('stat_slotad_cvr_good2.csv')
stat_slotad_cvr_good_old.to_csv('stat_slotad_cvr_good_old2.csv', index=False)
for i in stat_slotad_cvr_good_old.index:
key = stat_slotad_cvr_good_old.ix[i, 'key']
value = stat_slotad_cvr_good_old.ix[i, 'value']
pipe.set(key, value, ex=900)
if i % 5000 == 0:
pipe.execute()
print(i)
time.sleep(0.5)
pipe.execute()
# 转化成key: NZ_K076_slotId_advertId, value:json cvr&预估偏差&置信度
for i in stat_slotad_cvr_good.index:
key = stat_slotad_cvr_good.ix[i, 'key']
value = stat_slotad_cvr_good.ix[i, 'value']
pipe.set(key, value, ex=432000)
if i % 5000 == 0:
pipe.execute()
print(i)
time.sleep(0.5)
pipe.execute()
stat_slotad_cvr_good.to_csv('stat_slotad_cvr_good2.csv', index=False)
try:
# 广告高中低出价预测发券 统计cvr, bias
sql_ad_fee0 = '''
select app_id,slotid,advert_id,
sum(charge_fees) cost,
sum(charge_fees)/sum(act_click_cnt) costconvert,
sum(act_click_cnt)/sum(charge_cnt) stat_cvr,
avg(pre_cvr) pre_cvr,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias ,
avg(b.fee0) fee0
from
(select * from advert.dws_advert_order_wide_v4_level_6_di where dt>='{0}' and dt<='{1}') a
left outer join
(select advert_id,percentile(fee,0.33) fee0
from advert.dws_advert_order_wide_v4_level_6_di
where dt>='{0}' and dt<='{1}' and fee>0
group by advert_id) b
on a.advert_id=b.advert_id
where a.fee<b.fee0 and a.slotid is not null
group by app_id,slotid,a.advert_id
'''.format(yestoday15, yestoday1)
sql_ad_fee1 = '''
select app_id,slotid,advert_id,
sum(charge_fees) cost,
sum(charge_fees)/sum(act_click_cnt) costconvert,
sum(act_click_cnt)/sum(charge_cnt) stat_cvr,
avg(pre_cvr) pre_cvr,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias,
avg(b.fee1) fee1
from
(select * from advert.dws_advert_order_wide_v4_level_6_di where dt>='{0}' and dt<='{1}') a
left outer join
(select advert_id,percentile(fee,0.66) fee1
from advert.dws_advert_order_wide_v4_level_6_di
where dt>='{0}' and dt<='{1}' and fee>0
group by advert_id) b
on a.advert_id=b.advert_id
where a.fee<b.fee1 and a.slotid is not null
group by app_id,slotid,a.advert_id
'''.format(yestoday15, yestoday1)
sql_ad_fee2='''select app_id,slotid,advert_id,
sum(charge_fees) cost,
sum(charge_fees)/sum(act_click_cnt) costconvert,
sum(act_click_cnt)/sum(charge_cnt) stat_cvr,
avg(pre_cvr) pre_cvr,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias
from advert.dws_advert_order_wide_v4_level_6_di
where dt>='{0}' and dt<='{1}' and slotid is not null
group by app_id,slotid,advert_id
'''.format(yestoday15, yestoday1)
sql_ad_costconvert='''select advert_id,
sum(charge_fees)/sum(act_click_cnt) ad_costconvert
from advert.dws_advert_order_wide_v4_level_6_di
where dt>='{0}' and dt<='{1}'
group by advert_id
'''.format(yestoday15, yestoday1)
cursor.execute(sql_ad_fee0)
stat_slotad_cvr_fee0 = pd.DataFrame(cursor.fetchall())
stat_slotad_cvr_fee0.columns=['app_id', 'slotid', 'advert_id','cost0','costconvert0','stat_cvr0','pre_cvr0','bias0','fee0']
stat_slotad_cvr_fee0=stat_slotad_cvr_fee0.ix[stat_slotad_cvr_fee0['cost0']>0]
cursor.execute(sql_ad_fee1)
stat_slotad_cvr_fee1 = pd.DataFrame(cursor.fetchall())
stat_slotad_cvr_fee1.columns=['app_id', 'slotid', 'advert_id','cost1','costconvert1','stat_cvr1','pre_cvr1','bias1','fee1']
stat_slotad_cvr_fee1=stat_slotad_cvr_fee1.ix[stat_slotad_cvr_fee1['cost1']>0]
cursor.execute(sql_ad_fee2)
stat_slotad_cvr_fee2 = pd.DataFrame(cursor.fetchall())
stat_slotad_cvr_fee2.columns=['app_id', 'slotid', 'advert_id','cost2','costconvert2','stat_cvr2','pre_cvr2','bias2']
stat_slotad_cvr_fee2=stat_slotad_cvr_fee2.ix[stat_slotad_cvr_fee2['cost2']>0]
cursor.execute(sql_ad_costconvert)
ad_costconvert = pd.DataFrame(cursor.fetchall())
ad_costconvert.columns=['advert_id','ad_costconvert']
stat_slotad_cvr_fee21=pd.merge(stat_slotad_cvr_fee2,stat_slotad_cvr_fee1,how='left',on=['app_id', 'slotid', 'advert_id'])
stat_slotad_cvr_fee210=pd.merge(stat_slotad_cvr_fee21,stat_slotad_cvr_fee0,how='left',on=['app_id', 'slotid', 'advert_id'])
stat_slotad_cvr=pd.merge(stat_slotad_cvr_fee210,ad_costconvert,how='left',on=['advert_id'])
stat_slotad_cvr=stat_slotad_cvr.ix[pd.notnull(stat_slotad_cvr['ad_costconvert'])]
stat_slotad_cvr['confidence0']=stat_slotad_cvr['cost0']/(stat_slotad_cvr['ad_costconvert']*5)
stat_slotad_cvr['confidence1']=stat_slotad_cvr['cost1']/(stat_slotad_cvr['ad_costconvert']*5)
stat_slotad_cvr['confidence2']=stat_slotad_cvr['cost2']/(stat_slotad_cvr['ad_costconvert']*5)
stat_slotad_cvr.ix[stat_slotad_cvr['confidence0']>1,'confidence0']=1
stat_slotad_cvr.ix[stat_slotad_cvr['confidence1']>1,'confidence1']=1
stat_slotad_cvr.ix[stat_slotad_cvr['confidence2']>1,'confidence2']=1
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence2']>=0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr2'])),
'stat_cvr2']=0
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence2']<0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr2'])),'stat_cvr2']=\
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence2']<0.2) & (pd.isnull(stat_slotad_cvr['stat_cvr2'])),'pre_cvr2']
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence1']>=0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr1'])),
'stat_cvr1']=0
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence1']<0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr1'])),'stat_cvr1']=\
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence1']<0.2) & (pd.isnull(stat_slotad_cvr['stat_cvr1'])),'pre_cvr1']
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence0']>=0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr0'])),
'stat_cvr0']=0
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence0']<0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr0'])),'stat_cvr0']=\
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence0']<0.2) & (pd.isnull(stat_slotad_cvr['stat_cvr0'])),'pre_cvr0']
stat_slotad_cvr[['stat_cvr0','stat_cvr1','stat_cvr2']]=stat_slotad_cvr[['stat_cvr0','stat_cvr1','stat_cvr2']].fillna(value=0)
stat_slotad_cvr.ix[(stat_slotad_cvr['stat_cvr2']==0) & pd.isnull(stat_slotad_cvr['bias2']),'bias2']=5.0
stat_slotad_cvr.ix[(stat_slotad_cvr['stat_cvr2']!=0) & pd.isnull(stat_slotad_cvr['bias2']),'bias2']=1.5
stat_slotad_cvr[['bias0','bias1']]=stat_slotad_cvr[['bias0','bias1']].fillna(value=5.0)
stat_slotad_cvr['confidence0']=stat_slotad_cvr['confidence0'].fillna(value=0).round(2).astype('str')
stat_slotad_cvr['confidence1']=stat_slotad_cvr['confidence1'].fillna(value=0).round(2).astype('str')
stat_slotad_cvr['confidence2']=stat_slotad_cvr['confidence2'].fillna(value=0).round(2).astype('str')
stat_slotad_cvr['confidenceSet']=stat_slotad_cvr['confidence0']+','+stat_slotad_cvr['confidence1']+','+stat_slotad_cvr['confidence2']
stat_slotad_cvr['confidenceSet']=stat_slotad_cvr['confidenceSet'].map(lambda x:x.split(','))
stat_slotad_cvr['stat_cvr0']=stat_slotad_cvr['stat_cvr0'].fillna(value=0).round(6).astype('str')
stat_slotad_cvr['stat_cvr1']=stat_slotad_cvr['stat_cvr1'].fillna(value=0).round(6).astype('str')
stat_slotad_cvr['stat_cvr2']=stat_slotad_cvr['stat_cvr2'].fillna(value=0).round(6).astype('str')
stat_slotad_cvr['cvrSet']=stat_slotad_cvr['stat_cvr0']+','+stat_slotad_cvr['stat_cvr1']+','+stat_slotad_cvr['stat_cvr2']
stat_slotad_cvr['cvrSet']=stat_slotad_cvr['cvrSet'].map(lambda x:x.split(','))
stat_slotad_cvr['bias0']=stat_slotad_cvr['bias0'].fillna(value=5.0).round(6).astype('str')
stat_slotad_cvr['bias1']=stat_slotad_cvr['bias1'].fillna(value=5.0).round(6).astype('str')
stat_slotad_cvr['bias2']=stat_slotad_cvr['bias2'].fillna(value=5.0).round(6).astype('str')
stat_slotad_cvr['biasSet']=stat_slotad_cvr['bias0']+','+stat_slotad_cvr['bias1']+','+stat_slotad_cvr['bias2']
stat_slotad_cvr['biasSet']=stat_slotad_cvr['biasSet'].map(lambda x:x.split(','))
stat_slotad_cvr['fee0']=stat_slotad_cvr['fee0'].fillna(value=9999.0).round(1).astype('str')
stat_slotad_cvr['fee1']=stat_slotad_cvr['fee1'].fillna(value=9999.0).round(1).astype('str')
stat_slotad_cvr['priceSection']=stat_slotad_cvr['fee0']+','+stat_slotad_cvr['fee1']
stat_slotad_cvr['priceSection']=stat_slotad_cvr['priceSection'].map(lambda x:x.split(','))
#选广告位
stat_slotad_cvr_good= stat_slotad_cvr[['slotid', 'advert_id', 'cvrSet', 'biasSet', 'confidenceSet','priceSection']]
stat_slotad_cvr_good[['slotid', 'advert_id']]=stat_slotad_cvr_good[['slotid', 'advert_id']].astype('str')
stat_slotad_cvr_good['key'] = "NZ_K076_" + stat_slotad_cvr_good['slotid'] + "_" + stat_slotad_cvr_good['advert_id']
stat_slotad_cvr_good['value'] = stat_slotad_cvr_good[[ 'cvrSet', 'biasSet', 'confidenceSet','priceSection']].apply(lambda x: x.to_json(orient='index'), axis=1)
stat_slotad_cvr_good.index = range(stat_slotad_cvr_good.shape[0])
# 连接nezha-redis
pool = redis.ConnectionPool(host='r-bp18da0abeaddc94285.redis.rds.aliyuncs.com',
password='hteK73Zxx3ji9LGCy2jBAZDJ6', port=6379, db=0)
r = redis.Redis(connection_pool=pool)
pipe = r.pipeline(transaction=True)
#先删除昨日候选集
print('stat_slotad_cvr_good2-----')
stat_slotad_cvr_good_old=pd.read_csv('stat_slotad_cvr_good2.csv')
stat_slotad_cvr_good_old.to_csv('stat_slotad_cvr_good_old2.csv', index=False)
for i in stat_slotad_cvr_good_old.index:
key = stat_slotad_cvr_good_old.ix[i, 'key']
value = stat_slotad_cvr_good_old.ix[i, 'value']
pipe.set(key, value, ex=900)
if i % 5000 == 0:
pipe.execute()
print(i)
time.sleep(0.5)
pipe.execute()
# 转化成key: NZ_K076_slotId_advertId, value:json cvr&预估偏差&置信度
for i in stat_slotad_cvr_good.index:
key = stat_slotad_cvr_good.ix[i, 'key']
value = stat_slotad_cvr_good.ix[i, 'value']
pipe.set(key, value, ex=432000)
if i % 5000 == 0:
pipe.execute()
print(i)
time.sleep(0.5)
pipe.execute()
stat_slotad_cvr_good.to_csv('stat_slotad_cvr_good2.csv', index=False)
except:
xiaoding.send_text(msg='候选集(有历史数据部分存储)程序异常!!!请排查!', at_mobiles=at_mobiles)
else:
xiaoding.send_text(msg='候选集(有历史数据部分)存储成功', at_mobiles=at_mobiles)
##############################################################################################
###############################################################################################
......
......@@ -12,7 +12,7 @@ from dingtalkchatbot.chatbot import DingtalkChatbot
# WebHook地址
webhook = 'https://oapi.dingtalk.com/robot/send?access_token=4f28ce996ab4f2601c0362fbfd0d48f58b0250a76953ff117ca41e9f1ec8e565'
# 初始化机器人小丁
xiaoding = DtalkChatbot(webhook)
xiaoding = DingtalkChatbot(webhook)
at_mobiles = ['18668032242']
os.chdir('/home/db_dlp/mengxiangxuan/auto_spread')
......@@ -54,7 +54,7 @@ try:
pre_slotad_stat_cvr['slotid'] = bb[:, 0]
pre_slotad_stat_cvr['advert_id'] = bb[:, 1]
pre_slotad_stat_cvr['cvr'] = ll[:, 1]
# pre_slotad_stat_cvr.head()
pre_slotad_stat_cvr['cvr'] = pre_slotad_stat_cvr['cvr'].map(lambda x: x.replace('E-', 'E-5'))
pre_slotad_stat_cvr['cvr'] = pre_slotad_stat_cvr['cvr'].astype('float')
# 匹配广告行业
......@@ -184,7 +184,7 @@ try:
for i in pre_slotad_cvr_good_old.index:
key = pre_slotad_cvr_good_old.ix[i, 'key']
value = pre_slotad_cvr_good_old.ix[i, 'value']
pipe.set(key, value, ex=200)
pipe.set(key, value, ex=600)
if i % 5000 == 0:
pipe.execute()
print(i)
......
import os
import pandas as pd
import numpy as np
from pyhive import hive
from sqlalchemy import create_engine
import pymysql
import redis
import datetime
import time
os.chdir('/home/db_dlp/mengxiangxuan/auto_spread')
cursor = hive.connect(host='10.50.10.11', port=10000, username='mengxiangxuan', database='default').cursor()
now = datetime.datetime.now()
today = now.strftime('%Y-%m-%d')
delta1 = datetime.timedelta(days=1)
delta3 = datetime.timedelta(days=3)
delta5 = datetime.timedelta(days=5)
delta7 = datetime.timedelta(days=7)
delta10 = datetime.timedelta(days=10)
delta15 = datetime.timedelta(days=15)
yestoday1 = (now - delta1).strftime('%Y-%m-%d')
yestoday3 = (now - delta3).strftime('%Y-%m-%d')
yestoday5 = (now - delta5).strftime('%Y-%m-%d')
yestoday7 = (now - delta7).strftime('%Y-%m-%d')
yestoday10 = (now - delta10).strftime('%Y-%m-%d')
yestoday15 = (now - delta15).strftime('%Y-%m-%d')
#按照偏差找价格切分点不可行
sql='''select advert_id,
fee_level,
avg(fee) fee,
sum(charge_fees) cost,
sum(act_click_cnt) convert,
avg(ad_costconvert) ad_costconvert
from
(select a.advert_id,
a.charge_fees,a.act_click_cnt,a.fee,
b.min_fee,b.max_fee,
floor((a.fee-b.min_fee)*100/(b.max_fee-b.min_fee)) fee_level,
b.ad_costconvert
from (select * from advert.dws_advert_order_wide_v4_level_3_di
where dt>='{0}' and dt<='{1}' and fee>0 and charge_fees>0) a
left outer join
(select
advert_id,min(fee) min_fee,max(fee) max_fee,
sum(charge_fees)/sum(act_click_cnt) ad_costconvert
from advert.dws_advert_order_wide_v4_level_3_di
where dt>='{0}' and dt<='{1}' and fee>0 and charge_fees>0
group by advert_id) b
on a.advert_id=b.advert_id) t
group by advert_id,fee_level'''.format(yestoday3, yestoday1)
cursor.execute(sql)
fee_level_data = pd.DataFrame(cursor.fetchall())
fee_level_data.columns=['advert_id','fee_level','fee','cost','convert','ad_costconvert']
fee_level_data['convert']=fee_level_data['convert'].fillna(value=0)
fee_level_data['cost_cumsum']=fee_level_data['cost'].groupby(fee_level_data['advert_id']).cumsum()
fee_level_data['convert_cumsum']=fee_level_data['convert'].groupby(fee_level_data['advert_id']).cumsum()
fee_level_data['costconvert']=fee_level_data['cost_cumsum']/fee_level_data['convert_cumsum']
fee_level_data['costconvert_bias']=fee_level_data['costconvert']/fee_level_data['ad_costconvert']
fee_level_data.to_csv('fee_level_data.csv',index=False)
#按分位数找切分点--效果评估
nolunch_pre_cvr = pd.read_table(r'slot_ad_cvr.txt',sep=',')
nolunch_pre_cvr.columns = ['slotad','ctr','cvr']
nolunch_pre_cvr[['slotid','advert_id']]=nolunch_pre_cvr['slotad'].str.split('_',expand=True)
nolunch_pre_cvr['w_sum_cvr']=nolunch_pre_cvr['ctr']*nolunch_pre_cvr['cvr']
nolunch_ad_cvr=pd.DataFrame(nolunch_pre_cvr.groupby(['advert_id']).sum())
nolunch_ad_cvr['pre_ad_cvr']=nolunch_ad_cvr['w_sum_cvr']/nolunch_ad_cvr['ctr']
nolunch_ad_cvr['advert_id']=nolunch_ad_cvr.index
sql = '''
select advert_id,avg(stat_cvr)
from logs.dwd_nezha_result_log_di
where dt>='{0}' and dt<='{1}' and order_id is not null
group by advert_id
'''.format(yestoday15,yestoday1)
cursor.execute(sql)
ad_pre_eva = pd.DataFrame(cursor.fetchall())
ad_pre_eva.columns = ['advert_id','ad_cvr']
ad_pre_eva['advert_id'] = ad_pre_eva['advert_id'].astype('str')
ad_pre_cvr_eva = pd.merge(nolunch_ad_cvr, ad_pre_eva, how='left', on=['advert_id'])
ad_pre_cvr_eva['ad_pre_diff']=ad_pre_cvr_eva['pre_ad_cvr']/ad_pre_cvr_eva['ad_cvr']
ad_pre_cvr_eva.ix[ad_pre_cvr_eva['ad_pre_diff']>1.5].shape[0]/ad_pre_cvr_eva.shape[0]#85%的准确率
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment