Commit 62f80b41 authored by mxx's avatar mxx

1

parent 0ba69625
# -*- coding: utf-8 -*-
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from pyhive import hive
......@@ -10,12 +11,13 @@ from sklearn import metrics
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import log_loss
import os
os.chdir('/home/db_dlp/mengxiangxuan/model_evaluat')
now = datetime.datetime.now()
delta = datetime.timedelta(days=1)
yestoday = (now - delta).strftime('%Y-%m-%d')
dt=yestoday
dt = yestoday
cursor = hive.connect(host='10.50.10.11', port=10000, username='mengxiangxuan', database='default').cursor()
sql_ctr_roc = '''select ctr_label,pre_ctr,
......@@ -66,6 +68,8 @@ sql_ctr_roc = '''select ctr_label,pre_ctr,
WHEN rcmd_type=258 THEN 'ffm001'
WHEN rcmd_type=259 THEN 'ffm004'
WHEN rcmd_type=260 THEN 'esmm003'
WHEN rcmd_type=261 THEN 'xDeepFm5'
WHEN rcmd_type=262 THEN 'xDeepFm6'
END AS rcmd_name
from advert.dws_advert_order_wide_v4_level_6_di
where dt='{}' and pre_ctr is not null
......@@ -122,6 +126,7 @@ sql_cvr_roc = '''select cvr_label,pre_cvr,
WHEN rcmd_type=260 THEN 'esmm003'
WHEN rcmd_type=261 THEN 'xDeepFm5'
WHEN rcmd_type=262 THEN 'xDeepFm6'
END AS rcmd_name
from
(select * from advert.dws_advert_order_wide_v4_level_6_di
......@@ -140,24 +145,22 @@ cursor.execute(sql_cvr_roc)
cvr_roc_data=pd.DataFrame(cursor.fetchall(),
columns=['cvr_label','pre_cvr','rcmd_name'])
alg=['alg-4.0','alg-4.0.1','alg-4.0.2','alg-4.1','alg-4.2.1',
'alg-act-tab','alg-online-learn223',
'alg-online-learn224','alg-app-optimize', 'alg-act-count',
'alg-fm-backend','BTM_AND_PC_31','Material_reform',
#'alg-4.0',
alg=['alg-4.0.1','alg-4.0.2','alg-4.1','alg-4.2.1',
'alg-online-learn223',
'alg-app-optimize',
'BTM_AND_PC_31',
'fix_bias_2','fix_bias_3','ffm004',
'fnn2','deepFm2','dcn2','dcn3','xDeepFm2','xDeepFm3',
'essm_online1','essm_online2','Material_reform_online',
'xDeepFm4','essm_deep1','essm_deep2','essm_deep3','esmm003',
'BTM_AND_PC_61','BTM_AND_PC_62']
'fnn2','dcn2',
'essm_online1','Material_reform_online',
'xDeepFm4','essm_deep3','essm_deep4']
########roc曲线###=============================================================================
part=[(0,0),(0,1),(0,2),(0,3),(1,0),(1,1),(1,2),(1,3),(2,0),(2,1),(2,2),(2,3),(3,0),(3,1),(3,2),(3,3),
(4,0),(4,1),(4,2),(4,3),(5,0),(5,1),(5,2),(5,3),(6,0),(6,1),(6,2),(6,3),(7,0),(7,1),(7,2),(7,3),
(8,0),(8,1),(8,2),(8,3)]
(4,0),(4,1),(4,2),(4,3)]
#ctr
ctr_cnt=[]
plt.figure(figsize=(16,32))
plt.figure(figsize=(16,20))
plt.style.use('ggplot')
for i in range(len(alg)):
y_ctr = ctr_roc_data.ix[ctr_roc_data['rcmd_name']==alg[i],'ctr_label'].values
......@@ -166,7 +169,7 @@ for i in range(len(alg)):
ctr_auc=auc(fpr, tpr)
ctr_logloss=log_loss(y_ctr,scores_ctr)
ctr_cnt.append([alg[i], ctr_auc, len(y_ctr)])
plt.subplot2grid((9, 4), part[i])
plt.subplot2grid((5, 4), part[i])
plt.plot(fpr,tpr)
plt.title(alg[i])
plt.text(0.5,0.5,'auc={0} \nlog_loss={1} \ncnt={2}'.format(round(ctr_auc,4),ctr_logloss,len(y_ctr)))
......@@ -180,7 +183,7 @@ pd.DataFrame(ctr_cnt,columns=['ctr_alg','auc','cnt']).to_csv('ctr_auc_cnt.csv')
#cvr
cvr_cnt=[]
plt.figure(figsize=(16,32))
plt.figure(figsize=(16,20))
plt.style.use('ggplot')
for i in range(len(alg)):
y_cvr = cvr_roc_data.ix[cvr_roc_data['rcmd_name']==alg[i],'cvr_label'].values
......@@ -189,7 +192,7 @@ for i in range(len(alg)):
cvr_auc=auc(fpr, tpr)
cvr_logloss=log_loss(y_cvr,scores_cvr)
cvr_cnt.append([alg[i], cvr_auc, len(y_cvr)])
plt.subplot2grid((9, 4), part[i])
plt.subplot2grid((5, 4), part[i])
plt.plot(fpr,tpr)
plt.title(alg[i])
plt.text(0.5,0.5,'auc={0} \nlog_loss={1} \ncnt={2}'.format(round(cvr_auc,4),cvr_logloss,len(y_cvr)))
......
This diff is collapsed.
......@@ -265,3 +265,223 @@ stat_slotad_cvr_merge.ix[(stat_slotad_cvr_merge['stat_cvr0_x']>0.05) & (stat_slo
#########################################################################################
##投放目标界定摸底
sql='''
select app_id,slotid,advert_id,
sum(charge_fees) cost,
sum(act_click_cnt) convert,
avg(pre_cvr) pre_cvr,
sum(act_click_cnt)/sum(charge_cnt) stat_cvr_0,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias_0,
sum(charge_fees)/sum(act_click_cnt) costconvert,
avg(afee) afee
from advert.dws_advert_order_wide_v4_level_8_di
where dt>='2019-02-13' and dt<='2019-02-15'
and new_trade!='应用分发' and charge_type=1
and slotid is not null
group by app_id,slotid,advert_id
'''
cursor.execute(sql)
df = pd.DataFrame(cursor.fetchall())
df.columns=['app_id', 'slotid', 'advert_id','cost','convert','pre_cvr','stat_cvr','bias','costconvert','afee']
df=df.ix[df['cost']>0]
df.ix[df['bias']==0].shape
df['cb_bias']=df['costconvert']/df['afee']
df.ix[pd.isnull(df['costconvert']),'cb_bias_level']='有消耗无转化'
df.ix[df['cb_bias']>1.2,'cb_bias_level']='成本偏差>1.2'
df.ix[df['cb_bias']>1.5,'cb_bias_level']='成本偏差>1.5'
df.ix[df['cb_bias']>1.7,'cb_bias_level']='成本偏差>1.7'
df.ix[df['cb_bias']>2,'cb_bias_level']='成本偏差>2'
df.ix[df['cb_bias']>2.5,'cb_bias_level']='成本偏差>2.5'
df.ix[df['cb_bias']<1.2,'cb_bias_level']='成本偏差小于1.2'
#有消耗无转化
df.ix[pd.isnull(df['costconvert']),'cost'].sum()/df['cost'].sum() #消耗占比7.7%
#占比
df.ix[pd.isnull(df['costconvert'])].shape[0]/df.shape[0] #74%的组合有消耗无转化
df.ix[(pd.isnull(df['costconvert'])) & (df['cost']>10000)].shape[0]/df.shape[0] #0%
df.ix[(pd.isnull(df['costconvert'])) & (df['cost']>2000)].shape[0]/df.shape[0] #2.5%
df.ix[(pd.isnull(df['costconvert'])) & (df['cost']>1000)].shape[0]/df.shape[0] #7%
df.ix[pd.isnull(df['costconvert']),'cost'].sum()/df['cost'].sum() #7.7%
df.ix[(pd.isnull(df['costconvert'])) & (df['cost']>2000),'cost'].sum()/df['cost'].sum() #2.6%
df.ix[(pd.isnull(df['costconvert'])) & (df['cost']>1000),'cost'].sum()/df['cost'].sum() #4%
#结论:有消耗无转化个数占比约74%,消耗占7.7%,有消耗无转化集中在低置信组合上
#整体分层
df.groupby('cb_bias_level').size()
#分层个数
成本偏差(1.2,1.5) 2049
成本偏差>1.5,1.7 805
成本偏差>1.7,2 791
成本偏差>2 ,2.5 706
成本偏差>2.5~ 769
成本偏差~1.2 16412
有消耗无转化 62251
#分层消耗占比
df['cost'].groupby(df['cb_bias_level']).sum()
成本偏差>1.2,1.5 29361048.0
成本偏差>1.5,1.7 8748328.0
成本偏差>1.7,2 6154347.0
成本偏差>2,2.5 4257706.0
成本偏差>2.5~ 4235043.0
成本偏差<1.2 218790894.0
有消耗无转化 22564716.0
不同预估偏差的成本情况分层
df.ix[df['bias']>2.5,'cost'].sum()/df['cost'].sum() #2%
df.ix[df['bias']>2.5].groupby('cb_bias_level').size()
成本偏差>1.2 43
成本偏差>1.5 43
成本偏差>1.7 93
成本偏差>2 221
成本偏差>2.5 552
成本偏差小于1.2 65
df.ix[df['bias']>2.5,'cost'].groupby(df['cb_bias_level']).sum()
成本偏差>1.2 395990.0
成本偏差>1.5 292962.0
成本偏差>1.7 527933.0
成本偏差>2 979272.0
成本偏差>2.5 3012548.0
成本偏差小于1.2 669782.0
df.ix[(df['bias']>2) & (df['bias']<2.5),'cost'].sum()/df['cost'].sum() #2%
df.ix[(df['bias']>2) & (df['bias']<2.5)].groupby('cb_bias_level').size()
成本偏差>1.2 87
成本偏差>1.5 106
成本偏差>1.7 203
成本偏差>2 252
成本偏差>2.5 107
成本偏差小于1.2 43
df.ix[(df['bias']>2) & (df['bias']<2.5),'cost'].groupby(df['cb_bias_level']).sum()
成本偏差>1.2 1041230.0
成本偏差>1.5 899478.0
成本偏差>1.7 1379212.0
成本偏差>2 1328635.0
成本偏差>2.5 410938.0
成本偏差小于1.2 463864.0
df.ix[(df['bias']>1.7) & (df['bias']<2),'cost'].sum()/df['cost'].sum() # 2%
df.ix[(df['bias']>1.7) & (df['bias']<2)].groupby('cb_bias_level').size()
成本偏差>1.2 207
成本偏差>1.5 208
成本偏差>1.7 224
成本偏差>2 112
成本偏差>2.5 52
成本偏差小于1.2 101
df.ix[(df['bias']>1.7) & (df['bias']<2),'cost'].groupby(df['cb_bias_level']).sum()
成本偏差>1.2 1499413.0
成本偏差>1.5 1201947.0
成本偏差>1.7 1265642.0
成本偏差>2 509800.0
成本偏差>2.5 436086.0
成本偏差小于1.2 847811.0
df.ix[(df['bias']>1.5) & (df['bias']<1.7),'cost'].sum()/df['cost'].sum() # 3%
df.ix[(df['bias']>1.5) & (df['bias']<1.7)].groupby('cb_bias_level').size()
成本偏差>1.2 350
成本偏差>1.5 173
成本偏差>1.7 108
成本偏差>2 45
成本偏差>2.5 21
成本偏差小于1.2 178
df.ix[(df['bias']>1.5) & (df['bias']<1.7),'cost'].groupby(df['cb_bias_level']).sum()
成本偏差>1.2 3343895.0
成本偏差>1.5 1099647.0
成本偏差>1.7 1119278.0
成本偏差>2 585935.0
成本偏差>2.5 109465.0
成本偏差小于1.2 1463302.0
df.ix[(df['bias']>1.2) & (df['bias']<1.5),'cost'].sum()/df['cost'].sum() # 12%
df.ix[(df['bias']>1.2) & (df['bias']<1.5)].groupby('cb_bias_level').size()
成本偏差>1.2 797
成本偏差>1.5 166
成本偏差>1.7 91
成本偏差>2 36
成本偏差>2.5 17
成本偏差小于1.2 1016
df.ix[(df['bias']>1.2) & (df['bias']<1.5),'cost'].groupby(df['cb_bias_level']).sum()
成本偏差>1.2 10675576.0
成本偏差>1.5 2071616.0
成本偏差>1.7 633924.0
成本偏差>2 328151.0
成本偏差>2.5 94828.0
成本偏差小于1.2 22603625.0
df.ix[df['bias']<1.2,'cost'].sum()/df['cost'].sum() # 71.5%
df.ix[df['bias']<1.2].groupby('cb_bias_level').size()
成本偏差>1.2 565
成本偏差>1.5 109
成本偏差>1.7 72
成本偏差>2 40
成本偏差>2.5 20
成本偏差小于1.2 15009
df.ix[df['bias']<1.2,'cost'].groupby(df['cb_bias_level']).sum()
成本偏差>1.2 12404944.0
成本偏差>1.5 3182678.0
成本偏差>1.7 1228358.0
成本偏差>2 525913.0
成本偏差>2.5 171178.0
成本偏差小于1.2 192742510.0
#拓新的表现
sql='''
select app_id,slotid,advert_id,
sum(charge_fees) cost,
sum(act_click_cnt) convert,
avg(pre_cvr) pre_cvr,
sum(act_click_cnt)/sum(charge_cnt) stat_cvr_0,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias_0,
sum(charge_fees)/sum(act_click_cnt) costconvert,
avg(afee) afee
from advert.dws_advert_order_wide_v4_level_8_di
where dt>='2019-02-14' and dt<='2019-02-16'
and new_trade!='应用分发' and charge_type=1
and slotid is not null
group by app_id,slotid,advert_id
'''
cursor.execute(sql)
df = pd.DataFrame(cursor.fetchall())
df.columns=['app_id', 'slotid', 'advert_id','cost','convert','pre_cvr','stat_cvr','bias','costconvert','afee']
df[['app_id', 'slotid', 'advert_id']]=df[['app_id', 'slotid', 'advert_id']].astype('str')
dd=pd.read_csv('not_download_stat_slotad_cvr_old215.csv')
dd[['slotid', 'advert_id']]=dd[['slotid', 'advert_id']].astype('str')
df=pd.merge(df,dd,how='inner',on=['slotid', 'advert_id'])
df.groupby('cb_bias_level').size()
df=df.ix[df['cost']>0]
df.ix[df['bias']==0].shape
df['cb_bias']=df['costconvert']/df['afee']
df.ix[pd.isnull(df['costconvert']),'cb_bias_level']='有消耗无转化'
df.ix[df['cb_bias']>1.2,'cb_bias_level']='成本偏差>1.2'
df.ix[df['cb_bias']>1.5,'cb_bias_level']='成本偏差>1.5'
df.ix[df['cb_bias']>1.7,'cb_bias_level']='成本偏差>1.7'
df.ix[df['cb_bias']>2,'cb_bias_level']='成本偏差>2'
df.ix[df['cb_bias']>2.5,'cb_bias_level']='成本偏差>2.5'
df.ix[df['cb_bias']<1.2,'cb_bias_level']='成本偏差小于1.2'
df.groupby('cb_bias_level').size()
#分层个数
成本偏差>1.2 1986
成本偏差>1.5 756
成本偏差>1.7 792
成本偏差>2 651
成本偏差>2.5 819
成本偏差小于1.2 14863
有消耗无转化 53907
#分层消耗占比
df['cost'].groupby(df['cb_bias_level']).sum()
成本偏差>1.2 25590365.0
成本偏差>1.5 7804690.0
成本偏差>1.7 6907655.0
成本偏差>2 3673685.0
成本偏差>2.5 4701507.0
成本偏差<1.2 203751319.0
有消耗无转化 20987495.0
\ No newline at end of file
import redis
import json
params_dict={
"releaseTarget3feedBackParams": {
"fuse1OrientCostG1dFactor":20000,
"fuse2OrientCostG1dFactor":50000,
"fuseOrientCostConvertBiasFactor":2.0,
"wSlotOrientationConfidenceFactor":0.2,
"wSlotOrientationCostConvertBiasFactor":1.2,
"bOrientConfidenceFactor":1.0,
"bOrientCostConvertBiasFactor":1.2,
"bSlotOrientationConfidenceFactor":0.5,
"bSlotOrientationCostConvertBiasFactor":3.5
},
"releaseTarget2feedBackParams": {
"fuse1OrientCostG1dFactor":20000,
"fuse2OrientCostG1dFactor":50000,
"fuseOrientCostConvertBiasFactor":2.0,
"wSlotOrientationConfidenceFactor":0.2,
"wSlotOrientationCostConvertBiasFactor":1.1,
"bOrientConfidenceFactor":1.0,
"bOrientCostConvertBiasFactor":1.2,
"bSlotOrientationConfidenceFactor":0.5,
"bSlotOrientationCostConvertBiasFactor":3.5
},
"releaseTarget1feedBackParams": {
"fuse1OrientCostG1dFactor":20000,
"fuse2OrientCostG1dFactor":50000,
"fuseOrientCostConvertBiasFactor":2.0,
"wSlotOrientationConfidenceFactor":0.2,
"wSlotOrientationCostConvertBiasFactor":1.3,
"bOrientConfidenceFactor":1.0,
"bOrientCostConvertBiasFactor":1.3,
"bSlotOrientationConfidenceFactor":0.8,
"bSlotOrientationCostConvertBiasFactor":4.0
},
"releaseTarget3slotRecommendParams": {
"startFactor":0.5,
"cpaBiasRatioFactor":1.5,
"cpaOrientRatioFactor":1.0,
"cpaBiasThresholdFactor":2.5,
"cpcTargetRatioFactor":1.0,
"cpcOrientRatioFactor":0.1,
"cpcBiasThresholdFactor":1.5
},
"releaseTarget2slotRecommendParams": {
"startFactor":0.5,
"cpaBiasRatioFactor":0.8,
"cpaOrientRatioFactor":0.5,
"cpaBiasThresholdFactor":1.5,
"cpcTargetRatioFactor":0.8,
"cpcOrientRatioFactor":0.1,
"cpcBiasThresholdFactor":1
},
"releaseTarget1slotRecommendParams": {
"startFactor":0.6,
"cpaBiasRatioFactor":1.6,
"cpaOrientRatioFactor":1.0,
"cpaBiasThresholdFactor":3.0,
"cpcTargetRatioFactor":1.0,
"cpcOrientRatioFactor":0.1,
"cpcBiasThresholdFactor":1.5
}
}
params_key="NZ_K86_trusteeship_params"
params_value=json.dumps(params_dict)
pool = redis.ConnectionPool(host='r-bp18da0abeaddc94285.redis.rds.aliyuncs.com',
password='hteK73Zxx3ji9LGCy2jBAZDJ6', port=6379, db=0)
r = redis.Redis(connection_pool=pool)
r.set(params_key,params_value)
print(params_value)
\ No newline at end of file
import pandas as pd
from pyhive import hive
import redis
import os
os.chdir('/home/db_dlp/mengxiangxuan/auto_spread')
cursor = hive.connect(host='10.50.10.11', port=10000, username='mengxiangxuan', database='default').cursor()
#无历史数据
pre_slotad_stat_cvr=pd.read_csv('pre_slotad_stat_cvr.csv')
#有历史数据
not_download_stat_slotad_cvr=pd.read_csv('not_download_stat_slotad_cvr.csv')
#应用分发
historyDownload_stat_slotad_cvr=pd.read_csv('historyDownload_stat_slotad_cvr.csv')
#风控数据
sql='''
select slot_id,level,shield_advert from tmp.slot_shile_advert_merge
'''
cursor.execute(sql)
sheld_data = pd.DataFrame(cursor.fetchall())
sheld_data.columns=['slotid','level','advert_id']
risk_data=sheld_data.ix[sheld_data['advert_id']!=-1]
risk_data['key'] = "NZ_K76_" + risk_data['slotid'].astype('str') + "_" + risk_data['advert_id'].astype('str')
risk_data['value'] = '{"cost20d":0.0,"cvrSet":["0.0","0","0","0","0"],"biasSet":["99.0","99","99","99","99"]}'
# 连接nezha-redis
pool = redis.ConnectionPool(host='r-bp18da0abeaddc94285.redis.rds.aliyuncs.com',
password='hteK73Zxx3ji9LGCy2jBAZDJ6', port=6379, db=0)
r = redis.Redis(connection_pool=pool)
pipe = r.pipeline(transaction=True)
for i in risk_data.index:
key = risk_data.ix[i, 'key']
value = risk_data.ix[i, 'value']
pipe.set(key, value, ex=10)
if i % 100 == 0:
print(i)
pipe.execute()
risk_data_level6=sheld_data.ix[sheld_data['advert_id']==-1]
risk_pre=pd.merge(pre_slotad_stat_cvr,risk_data_level6,on=['slotid'],how='inner')
risk_history=pd.merge(not_download_stat_slotad_cvr,risk_data_level6,on=['slotid'],how='inner')
risk_down=pd.merge(historyDownload_stat_slotad_cvr,risk_data_level6,on=['slotid'],how='inner')
risk_data_l6=pd.concat([risk_pre[['key','value']],risk_history[['key','value']],risk_down[['key','value']]])
risk_data_l6.index = range(risk_data_l6.shape[0])
for i in risk_data_l6.index:
key = risk_data_l6.ix[i, 'key']
value = risk_data_l6.ix[i, 'value']
pipe.set(key, value, ex=10)
if i % 100 == 0:
print(i)
pipe.execute()
\ No newline at end of file
This diff is collapsed.
......@@ -27,32 +27,105 @@ yestoday10 = (now - delta10).strftime('%Y-%m-%d')
yestoday15 = (now - delta15).strftime('%Y-%m-%d')
sql = '''
select
a.advert_id,
a.data1 orientation_id,
dt,
targetapp_limit,
sum(charge_fees) cost,
sum(act_click_cnt) convert,
sum(charge_fees)/sum(act_click_cnt) cost_convert
from
(select * from logs.dwd_tuia_launch_log_di where dt>='2018-11-16' and dt<='2018-12-12') a
inner join
(select order_id,charge_fees,act_click_cnt from advert.dws_advert_order_wide_v4_level_6_di
where dt>='2018-11-16' and dt<='2018-12-12') b
on a.order_id=b.order_id
group by a.advert_id,a.data1,dt,targetapp_limit
'''
cursor.execute(sql)
ad_orient1 = pd.DataFrame(cursor.fetchall())
ad_orient1.columns = ['advert_id','orientation_id','dt','targetapp_limit','cost','convert','cost_convert']
ad_orient1.ix[ad_orient1['targetapp_limit']=='2','istg']=1
ad_orient1['istg']=ad_orient1['istg'].fillna(value=0)
ad_orient1['ad_ori']=ad_orient1['advert_id'].astype('str')+'_'+ad_orient1['orientation_id'].astype('str')
a=pd.DataFrame(ad_orient1['istg'].groupby(ad_orient1['ad_ori']).sum())
a['ad_ori']=a.index
ad_orient=pd.merge(ad_orient1,a.ix[a['istg']>0],how='inner',on=['ad_ori'])
from dingtalkchatbot.chatbot import DingtalkChatbot
# WebHook地址
webhook = 'https://oapi.dingtalk.com/robot/send?access_token=4f28ce996ab4f2601c0362fbfd0d48f58b0250a76953ff117ca41e9f1ec8e565'
# 初始化机器人小丁
xiaoding = DingtalkChatbot(webhook)
at_mobiles = ['18668032242']
##非应用分发有历史数据
sql_ad_slot='''select
app_id,slotid,advert_id,
sum(charge_fees) cost,
avg(pre_cvr) pre_cvr_0,
sum(act_click_cnt)/sum(charge_cnt) stat_cvr_0,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias_0
from advert.dws_advert_order_wide_v4_level_8_di
where dt>='{0}' and dt<='{1}' and slotid is not null
and fee>0 and new_trade!='应用分发'
group by app_id,slotid,advert_id
'''.format(yestoday15, yestoday1)
cursor.execute(sql_ad_slot)
stat_slotad_cvr = pd.DataFrame(cursor.fetchall())
stat_slotad_cvr.columns=['app_id', 'slotid', 'advert_id','cost20d','pre_cvr_0','stat_cvr_0','bias_0']
bias为空置信的部分要改成5,不置信
把置信度拿到线上去算,cost/后端目标转化出价,后端类型不同置信计算方式不同
看消耗=0特殊处理,bias=5并配合消耗值特殊处理,线上决定定向条件
stat_slotad_cvr['cost20d']=stat_slotad_cvr['cost20d'].fillna(value=0)
stat_slotad_cvr['bias_1']=None
stat_slotad_cvr['bias_2']=None
stat_slotad_cvr['bias_3']=None
stat_slotad_cvr['bias_4']=None
stat_slotad_cvr[['bias_0','bias_1','bias_2','bias_3','bias_4']]=\
stat_slotad_cvr[['bias_0','bias_1','bias_2','bias_3','bias_4']].fillna(value=99)
stat_slotad_cvr['bias_0']=stat_slotad_cvr['bias_0'].round(6).astype('str')
stat_slotad_cvr['bias_1']=stat_slotad_cvr['bias_1'].round(6).astype('str')
stat_slotad_cvr['bias_2']=stat_slotad_cvr['bias_2'].round(6).astype('str')
stat_slotad_cvr['bias_3']=stat_slotad_cvr['bias_3'].round(6).astype('str')
stat_slotad_cvr['bias_4']=stat_slotad_cvr['bias_4'].round(6).astype('str')
stat_slotad_cvr['biasSet']=stat_slotad_cvr['bias_0']+','+stat_slotad_cvr['bias_1']+','+stat_slotad_cvr['bias_2']\
+','+stat_slotad_cvr['bias_3']+','+stat_slotad_cvr['bias_4']
stat_slotad_cvr['biasSet']=stat_slotad_cvr['biasSet'].map(lambda x:x.split(','))
stat_slotad_cvr['stat_cvr_1']=None
stat_slotad_cvr['stat_cvr_2']=None
stat_slotad_cvr['stat_cvr_3']=None
stat_slotad_cvr['stat_cvr_4']=None
stat_slotad_cvr[['stat_cvr_0','stat_cvr_1','stat_cvr_2','stat_cvr_3','stat_cvr_4']]=\
stat_slotad_cvr[['stat_cvr_0','stat_cvr_1','stat_cvr_2','stat_cvr_3','stat_cvr_4']].fillna(value=0)
stat_slotad_cvr['stat_cvr_0']=stat_slotad_cvr['stat_cvr_0'].round(6).astype('str')
stat_slotad_cvr['stat_cvr_1']=stat_slotad_cvr['stat_cvr_1'].round(6).astype('str')
stat_slotad_cvr['stat_cvr_2']=stat_slotad_cvr['stat_cvr_2'].round(6).astype('str')
stat_slotad_cvr['stat_cvr_3']=stat_slotad_cvr['stat_cvr_3'].round(6).astype('str')
stat_slotad_cvr['stat_cvr_4']=stat_slotad_cvr['stat_cvr_4'].round(6).astype('str')
stat_slotad_cvr['cvrSet']=stat_slotad_cvr['stat_cvr_0']+','+stat_slotad_cvr['stat_cvr_1']+','+stat_slotad_cvr['stat_cvr_2']\
+','+stat_slotad_cvr['stat_cvr_3']+','+stat_slotad_cvr['stat_cvr_4']
stat_slotad_cvr['cvrSet']=stat_slotad_cvr['cvrSet'].map(lambda x:x.split(','))
stat_slotad_cvr[['slotid', 'advert_id']]=stat_slotad_cvr[['slotid', 'advert_id']].astype('str')
#约定key
stat_slotad_cvr['key'] = "NZ_K76_" + stat_slotad_cvr['slotid'] + "_" + stat_slotad_cvr['advert_id']
stat_slotad_cvr['value'] = stat_slotad_cvr[['cost20d','cvrSet','biasSet']].apply(lambda x: x.to_json(orient='index'), axis=1)
stat_slotad_cvr.index = range(stat_slotad_cvr.shape[0])
# 连接nezha-redis
pool = redis.ConnectionPool(host='r-bp18da0abeaddc94285.redis.rds.aliyuncs.com',
password='hteK73Zxx3ji9LGCy2jBAZDJ6', port=6379, db=0)
r = redis.Redis(connection_pool=pool)
pipe = r.pipeline(transaction=True)
#先删除昨日候选集
print('stat_slotad_cvr_good2-----')
stat_slotad_cvr_old=pd.read_csv('stat_slotad_cvr.csv')
stat_slotad_cvr_old.to_csv('stat_slotad_cvr_old.csv', index=False)
for i in stat_slotad_cvr_old.index:
key = stat_slotad_cvr_old.ix[i, 'key']
value = stat_slotad_cvr_old.ix[i, 'value']
pipe.set(key, value, ex=900)
if i % 5000 == 0:
pipe.execute()
print(i)
time.sleep(0.5)
pipe.execute()
# 转化成key: NZ_K76_slotId_advertId,value:json cvr&预估偏差&置信度
for i in stat_slotad_cvr.head(99).index:
key = stat_slotad_cvr.ix[i, 'key']
value = stat_slotad_cvr.ix[i, 'value']
pipe.set(key, value, ex=432000)
if i % 5000 == 0:
pipe.execute()
print(i)
time.sleep(0.5)
pipe.execute()
stat_slotad_cvr.to_csv('stat_slotad_cvr.csv', index=False)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment