Commit f1f0bbbf authored by mxx's avatar mxx

1

parent 18dbee01
......@@ -30,136 +30,56 @@ yestoday15 = (now - delta15).strftime('%Y-%m-%d')
# 历史无数据---------------------------------------------------
# 解出广告位-广告维度预估cvr
nolunch_pre_cvr = pd.read_table(r'slot_ad_cvr.txt')
nolunch_pre_cvr.columns = ['c']
a = nolunch_pre_cvr['c'].map(lambda x: x.replace('{', '').replace('}', '').split(' '))
# a=nolunch_pre_cvr['c'].map(lambda x:x.replace('=',':'))
l = []
for i in a:
l.extend(i)
ll = [x[:-1].split('=') for x in l]
ll = np.array(ll)
pre_slotad_stat_cvr = pd.DataFrame()
b = [s.split(',') for s in ll[:, 0]]
bb = np.array(b)
pre_slotad_stat_cvr['slotid'] = bb[:, 0]
pre_slotad_stat_cvr['advert_id'] = bb[:, 1]
pre_slotad_stat_cvr['cvr'] = ll[:, 1]
# pre_slotad_stat_cvr.head()
pre_slotad_stat_cvr['cvr'] = pre_slotad_stat_cvr['cvr'].astype('float')
# 匹配广告行业
sql = '''select id,
case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
when length(match_tag_nums)=22 then substr(match_tag_nums,13)
else match_tag_nums end match_tag_nums
from advert.dwd_advert_df
where dt='{0}' and length(match_tag_nums) in (10,16,22) '''.format(yestoday1)
cursor.execute(sql)
advert_trid = pd.DataFrame(cursor.fetchall())
advert_trid.columns = ['advert_id', 'match_tag_nums']
advert_trid['advert_id'] = advert_trid['advert_id'].astype('str')
pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, advert_trid, how='left', on=['advert_id'])
# 纠偏
# 1 历史数据行业+广告位维度预估值和统计值偏差,来纠偏
nolunch_pre_cvr = pd.read_table(r'slot_ad_cvr.txt',sep=',')
nolunch_pre_cvr.columns = ['slotad','ctr','cvr']
nolunch_pre_cvr[['slotid','advert_id']]=nolunch_pre_cvr['slotad'].str.split('_',expand=True)
#
# # 匹配广告行业
# sql = '''select id,
# case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
# when length(match_tag_nums)=22 then substr(match_tag_nums,13)
# else match_tag_nums end match_tag_nums
# from advert.dwd_advert_df
# where dt='{0}' and length(match_tag_nums) in (10,16,22) '''.format(yestoday1)
# cursor.execute(sql)
# advert_trid = pd.DataFrame(cursor.fetchall())
# advert_trid.columns = ['advert_id', 'match_tag_nums']
# advert_trid['advert_id'] = advert_trid['advert_id'].astype('str')
# pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, advert_trid, how='left', on=['advert_id'])
#
# # 纠偏
# # 2 历史数据广告维度预估值和统计值偏差纠偏
# sql = '''
# select match_tag_nums,app_id,avg(pre_cvr)/avg(stat_cvr) pre_diff from
# (select advert_id,app_id,pre_cvr,stat_cvr
# select advert_id,avg(pre_cvr)/avg(stat_cvr) pre_diff
# from logs.dwd_nezha_result_log_di
# where dt>='{0}' and dt<='{1}' and order_id is not null ) p1
# left outer join
# (select id,case when length(match_tag_nums)>10 then substr(match_tag_nums,7)
# else match_tag_nums end match_tag_nums
# from advert.dwd_advert_df where dt>='{0}' and dt<='{1}') p2
# on p1.advert_id=p2.id
# group by match_tag_nums,app_id
# where dt>='{0}' and dt<='{1}' and order_id is not null
# group by advert_id
# '''.format(yestoday7,yestoday1)
# cursor.execute(sql)
# trid_slot_pre_diff = pd.DataFrame(cursor.fetchall())
# trid_slot_pre_diff.columns = ['match_tag_nums', 'slotid', 'trid_slot_diff']
# # trid_slot_pre_diff=trid_slot_pre_diff.ix[trid_slot_pre_diff['trid_slot_diff']>2]
# trid_slot_pre_diff['slotid'] = trid_slot_pre_diff['slotid'].fillna(value=-11).astype('int').astype('str')
# 2 历史数据广告维度预估值和统计值偏差纠偏
sql = '''
select advert_id,avg(pre_cvr)/avg(stat_cvr) pre_diff
from logs.dwd_nezha_result_log_di
where dt>='{0}' and dt<='{1}' and order_id is not null
group by advert_id
'''.format(yestoday3,yestoday1)
cursor.execute(sql)
ad_pre_diff = pd.DataFrame(cursor.fetchall())
ad_pre_diff.columns = ['advert_id', 'ad_diff']
# ad_pre_diff=ad_pre_diff.ix[ad_pre_diff['ad_diff']>2]
ad_pre_diff['advert_id'] = ad_pre_diff['advert_id'].astype('str')
# 预估cvr纠偏############
#pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, trid_slot_pre_diff, how='left', on=['slotid', 'match_tag_nums'])
pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, ad_pre_diff, how='left', on=['advert_id'])
# pre_slotad_stat_cvr.ix[pd.isnull(pre_slotad_stat_cvr['trid_slot_diff']), 'trid_slot_diff'] = pre_slotad_stat_cvr.ix[
# pd.isnull(pre_slotad_stat_cvr['trid_slot_diff']), 'ad_diff']
# pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['trid_slot_diff'] > 2, 'fix_cvr'] = \
# pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['trid_slot_diff'] > 2, 'cvr'] / pre_slotad_stat_cvr.ix[
# pre_slotad_stat_cvr['trid_slot_diff'] > 2, 'trid_slot_diff']
pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['ad_diff'] > 1.2, 'fix_cvr'] = \
pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['ad_diff'] > 1.2, 'cvr'] / pre_slotad_stat_cvr.ix[
pre_slotad_stat_cvr['ad_diff'] > 1.2, 'ad_diff']
pre_slotad_stat_cvr.ix[pd.isnull(pre_slotad_stat_cvr['fix_cvr']), 'fix_cvr'] = pre_slotad_stat_cvr.ix[
pd.isnull(pre_slotad_stat_cvr['fix_cvr']), 'cvr']
#####匹配行业+slot出价
sql_fee1='''
select match_tag_nums,slotid,avg(fee) fee from
(select case when length(match_tag_nums)>10 then substr(match_tag_nums,7)
else match_tag_nums end match_tag_nums,
slotid,fee
from advert.dws_advert_order_wide_v4_level_3_di
where dt="{0}") a
group by match_tag_nums,slotid
'''.format(yestoday1)
cursor.execute(sql_fee1)
tride_slot_fee = pd.DataFrame(cursor.fetchall())
tride_slot_fee.columns = ['match_tag_nums','slotid','m_s_fee']
tride_slot_fee['slotid']=tride_slot_fee['slotid'].astype('str').map(lambda x:x.replace('.0',''))
pre_slotad_stat_cvr=pd.merge(pre_slotad_stat_cvr,tride_slot_fee,on=['match_tag_nums','slotid'],how='left')
#####匹配广告出价
sql_fee2='''
select advert_id,avg(fee) fee
from advert.dws_advert_order_wide_v4_level_3_di
where dt="{0}"
group by advert_id
'''.format(yestoday1)
cursor.execute(sql_fee2)
advert_fee = pd.DataFrame(cursor.fetchall())
advert_fee.columns = ['advert_id','ad_fee']
advert_fee['advert_id']=advert_fee['advert_id'].astype('str')
pre_slotad_stat_cvr=pd.merge(pre_slotad_stat_cvr,advert_fee,on=['advert_id'],how='left')
pre_slotad_stat_cvr['pre_launch_r']=pre_slotad_stat_cvr['ad_fee']/pre_slotad_stat_cvr['m_s_fee']
####筛选预估可以发券的组合
pre_slotad_cvr=pre_slotad_stat_cvr.ix[(pre_slotad_stat_cvr['pre_launch_r']>1) & (pre_slotad_stat_cvr['cvr']>0.01)]
pre_slotad_cvr = pre_slotad_cvr[['slotid', 'advert_id', 'fix_cvr', 'ad_diff']]
pre_slotad_cvr['confidence'] = 0
pre_slotad_cvr.columns = ['slotid', 'advert_id', 'cvr', 'bias','confidence']
pre_slotad_cvr_good=pre_slotad_cvr.ix[pre_slotad_cvr['bias']<1.1]
pre_slotad_cvr_good=pre_slotad_cvr_good.sort_index(by=['bias'])
#pre_slotad_cvr_good.groupby('advert_id').size()
pre_slotad_cvr_good=pre_slotad_cvr_good.groupby('advert_id').head(20)
pre_slotad_cvr_good[['slotid', 'advert_id']]=pre_slotad_cvr_good[['slotid', 'advert_id']].astype('str')
pre_slotad_cvr_good['key'] = "NZ_K76_" + pre_slotad_cvr_good['slotid'] + "_" + pre_slotad_cvr_good['advert_id']
pre_slotad_cvr_good['value'] = pre_slotad_cvr_good[['cvr', 'bias', 'confidence']].apply(lambda x: x.to_json(orient='index'), axis=1)
# ad_pre_diff = pd.DataFrame(cursor.fetchall())
# ad_pre_diff.columns = ['advert_id', 'ad_diff']
# ad_pre_diff['advert_id'] = ad_pre_diff['advert_id'].astype('str')
#
# # 预估cvr纠偏############
# pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, ad_pre_diff, how='left', on=['advert_id'])
#
# pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['ad_diff'] > 1.2, 'fix_cvr'] = \
# pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['ad_diff'] > 1.2, 'cvr'] / pre_slotad_stat_cvr.ix[
# pre_slotad_stat_cvr['ad_diff'] > 1.2, 'ad_diff']
#
# pre_slotad_stat_cvr.ix[pd.isnull(pre_slotad_stat_cvr['fix_cvr']), 'fix_cvr'] = pre_slotad_stat_cvr.ix[
# pd.isnull(pre_slotad_stat_cvr['fix_cvr']), 'cvr']
nolunch_pre_cvr=nolunch_pre_cvr.sort_index(by=['cvr'],ascending=False)
pre_slotad_cvr_good=nolunch_pre_cvr.groupby('advert_id').head(500)
pre_slotad_cvr_good['cvrSet']=pre_slotad_cvr_good['cvr'].map(lambda x:[round(x,6)]*3)
pre_slotad_cvr_good['biasSet']=[[-1.0,-1.0,-1.0]]*pre_slotad_cvr_good.shape[0]
pre_slotad_cvr_good['confidenceSet']=[[0.0,0.0,0.0]]*pre_slotad_cvr_good.shape[0]
pre_slotad_cvr_good['priceSection']=[[0.0,0.0]]*pre_slotad_cvr_good.shape[0]
pre_slotad_cvr_good['key'] = "NZ_K076_" + pre_slotad_cvr_good['slotad']
pre_slotad_cvr_good['value'] = pre_slotad_cvr_good[[ 'cvrSet', 'biasSet', 'confidenceSet','priceSection']].apply(lambda x: x.to_json(orient='index'), axis=1)
pre_slotad_cvr_good.index = range(pre_slotad_cvr_good.shape[0])
......@@ -172,9 +92,9 @@ pipe = r.pipeline(transaction=True)
#先删除昨日候选集
print('pre_slotad_cvr_good-----')
pre_slotad_cvr_good_old=pd.read_csv('pre_slotad_cvr_good.csv')
pre_slotad_cvr_good_old.to_csv('pre_slotad_cvr_good_old.csv', index=False)
print('pre_slotad_cvr_good2-----')
pre_slotad_cvr_good_old=pd.read_csv('pre_slotad_cvr_good2.csv')
pre_slotad_cvr_good_old.to_csv('pre_slotad_cvr_good_old2.csv', index=False)
for i in pre_slotad_cvr_good_old.index:
key = pre_slotad_cvr_good_old.ix[i, 'key']
value = pre_slotad_cvr_good_old.ix[i, 'value']
......@@ -197,7 +117,7 @@ for i in pre_slotad_cvr_good.index:
pipe.execute()
pre_slotad_cvr_good.to_csv('pre_slotad_cvr_good.csv', index=False)
pre_slotad_cvr_good.to_csv('pre_slotad_cvr_good2.csv', index=False)
......@@ -205,7 +125,6 @@ pre_slotad_cvr_good.to_csv('pre_slotad_cvr_good.csv', index=False)
##--------------------------------------------------------------------------------------
###----广告粒度历史数据
#广告高中低出价预测发券 统计cvr, bias
sql_ad_fee0 = '''
select app_id,slotid,advert_id,
sum(charge_fees) cost,
......@@ -215,7 +134,7 @@ avg(pre_cvr) pre_cvr,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias ,
avg(b.fee0) fee0
from
(select * from advert.dws_advert_order_wide_v4_level_3_di where dt>='{0}' and dt<='{1}') a
(select * from advert.dws_advert_order_wide_v4_level_6_di where dt>='{0}' and dt<='{1}') a
left outer join
(select advert_id,percentile(fee,0.33) fee0
from advert.dws_advert_order_wide_v4_level_3_di
......@@ -370,7 +289,7 @@ stat_slotad_cvr_good_old.to_csv('stat_slotad_cvr_good_old2.csv', index=False)
for i in stat_slotad_cvr_good_old.index:
key = stat_slotad_cvr_good_old.ix[i, 'key']
value = stat_slotad_cvr_good_old.ix[i, 'value']
pipe.set(key, value, ex=200)
pipe.set(key, value, ex=600)
if i % 2000 == 0:
pipe.execute()
print(i)
......
......@@ -179,7 +179,7 @@ for i in pre_slotad_cvr_good_old.index:
key = pre_slotad_cvr_good_old.ix[i, 'key']
value = pre_slotad_cvr_good_old.ix[i, 'value']
pipe.set(key, value, ex=200)
if i % 2000 == 0:
if i % 5000 == 0:
pipe.execute()
print(i)
time.sleep(0.5)
......@@ -189,8 +189,8 @@ pipe.execute()
for i in pre_slotad_cvr_good.index:
key = pre_slotad_cvr_good.ix[i, 'key']
value = pre_slotad_cvr_good.ix[i, 'value']
pipe.set(key, value, ex=90000)
if i % 2000 == 0:
pipe.set(key, value, ex=432000)
if i % 5000 == 0:
pipe.execute()
print(i)
time.sleep(0.5)
......@@ -288,7 +288,7 @@ stat_slotad_cvr_good_old.to_csv('stat_slotad_cvr_good_old.csv', index=False)
for i in stat_slotad_cvr_good_old.index:
key = stat_slotad_cvr_good_old.ix[i, 'key']
value = stat_slotad_cvr_good_old.ix[i, 'value']
pipe.set(key, value, ex=200)
pipe.set(key, value, ex=600)
if i % 2000 == 0:
pipe.execute()
print(i)
......@@ -299,7 +299,7 @@ pipe.execute()
for i in stat_slotad_cvr_good.index:
key = stat_slotad_cvr_good.ix[i, 'key']
value = stat_slotad_cvr_good.ix[i, 'value']
pipe.set(key, value, ex=187200)
pipe.set(key, value, ex=432000)
if i % 5000 == 0:
pipe.execute()
print(i)
......
import redis
import json
params_dict={
#recommend方法
'startFacter':0.5,'cpaBiasRatioFacter':1.0,
'cpaOrientRatioFacter':0.5,
'cpaBiasThresholdFacter':2.0,
'cpcTargetRatioFacter':0.5,
'cpcOrientRatioFacter':0.01,
'cpcBiasThresholdFacter':1.0,
#熔断
'fuseOrientCostG1dFacter':50000.0,
'fuseOrientCostConvertbiasFacter':2.0,
#白名单参数
'wSlotOrientationConfidenceFacter':0.2,
'wSlotOrientationCostConvertBiasFacter':1.2,
#高置信黑名单参数
'bOrientConfidenceFacter1':1.0,
'bOrientCostConvertbiasFacter1':1.2,
'bSlotOrientationConfidenceFacter1':0.5,
'bSlotOrientationCostConvertBiasFacter1':3.5,
#低置信黑名单参数
'bOrientConfidenceFacter2':1.0,
'bOrientCostConvertbiasFacter2':1.5,
'bSlotOrientationConfidenceFacter2':0.5,
'bSlotOrientRadioFacter2':2.0,
"feedBackParams": {
"bOrientConfidenceFactor1": 1,
"bOrientConfidenceFactor2": 1,
"bOrientCostConvertBiasFactor1": 1.2,
"bOrientCostConvertBiasFactor2": 1.5,
"bSlotOrientRadioFactor2": 2,
"bSlotOrientationConfidenceFactor1": 0.5,
"bSlotOrientationConfidenceFactor2": 0.5,
"bSlotOrientationCostConvertBiasFactor1": 3.5,
"fuseOrientCostConvertBiasFactor": 2,
"fuseOrientCostG1dFactor": 50000,
"wSlotOrientationConfidenceFactor": 0.2,
"wSlotOrientationCostConvertBiasFactor": 1.2
},
"slotRecommendParams": {
"cpaBiasRatioFactor": 1,
"cpaBiasThresholdFactor": 2,
"cpaOrientRatioFactor": 0.6,
"cpcBiasThresholdFactor": 1,
"cpcOrientRatioFactor": 0.1,
"cpcTargetRatioFactor": 0.5,
"startFactor": 0.5
}
}
params_key="NZ_K??_auto_manage_params"
params_key="NZ_K86_trusteeship_params"
params_value=json.dumps(params_dict)
pool = redis.ConnectionPool(host='r-bp18da0abeaddc94285.redis.rds.aliyuncs.com',
password='hteK73Zxx3ji9LGCy2jBAZDJ6', port=6379, db=0)
r = redis.Redis(connection_pool=pool)
r.set(params_key,params_value)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment