Commit 18dbee01 authored by mxx's avatar mxx

1

parent e7b2c71b
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.2 (D:\Program Files\Anaconda3\python.exe)" project-jdk-type="Python SDK" /> <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.2 (D:\Program Files\anaconda3\python.exe)" project-jdk-type="Python SDK" />
</project> </project>
\ No newline at end of file
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4"> <module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager"> <component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" /> <content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" /> <orderEntry type="jdk" jdkName="Python 3.5.2 (D:\Program Files\anaconda3\python.exe)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
</module> </module>
\ No newline at end of file
...@@ -8,7 +8,7 @@ import redis ...@@ -8,7 +8,7 @@ import redis
import datetime import datetime
import time import time
#注意:目前只有测试的广告上传redis
os.chdir('/home/db_dlp/mengxiangxuan/auto_spread') os.chdir('/home/db_dlp/mengxiangxuan/auto_spread')
cursor = hive.connect(host='10.50.10.11', port=10000, username='mengxiangxuan', database='default').cursor() cursor = hive.connect(host='10.50.10.11', port=10000, username='mengxiangxuan', database='default').cursor()
now = datetime.datetime.now() now = datetime.datetime.now()
...@@ -30,7 +30,7 @@ yestoday15 = (now - delta15).strftime('%Y-%m-%d') ...@@ -30,7 +30,7 @@ yestoday15 = (now - delta15).strftime('%Y-%m-%d')
# 历史无数据--------------------------------------------------- # 历史无数据---------------------------------------------------
# 解出广告位-广告维度预估cvr # 解出广告位-广告维度预估cvr
nolunch_pre_cvr = pd.read_table(r'slot_ad_stat_cvr.txt') nolunch_pre_cvr = pd.read_table(r'slot_ad_cvr.txt')
nolunch_pre_cvr.columns = ['c'] nolunch_pre_cvr.columns = ['c']
a = nolunch_pre_cvr['c'].map(lambda x: x.replace('{', '').replace('}', '').split(' ')) a = nolunch_pre_cvr['c'].map(lambda x: x.replace('{', '').replace('}', '').split(' '))
# a=nolunch_pre_cvr['c'].map(lambda x:x.replace('=',':')) # a=nolunch_pre_cvr['c'].map(lambda x:x.replace('=',':'))
...@@ -204,72 +204,156 @@ pre_slotad_cvr_good.to_csv('pre_slotad_cvr_good.csv', index=False) ...@@ -204,72 +204,156 @@ pre_slotad_cvr_good.to_csv('pre_slotad_cvr_good.csv', index=False)
########################################################################################### ###########################################################################################
##-------------------------------------------------------------------------------------- ##--------------------------------------------------------------------------------------
###----广告粒度历史数据 ###----广告粒度历史数据
sql_ad = ''' #广告高中低出价预测发券 统计cvr, bias
select a.app_id,a.slot_id,a.advert_id,
a.stat_cvr, sql_ad_fee0 = '''
b.pre_cvr/a.stat_cvr as bias, select app_id,slotid,advert_id,
case when act_click_cnt>0 then act_click_cnt/5 sum(charge_fees) cost,
when act_click_cnt=0 then cost/(c.afee*5) end confidence, sum(charge_fees)/sum(act_click_cnt) costconvert,
a.cost, sum(act_click_cnt)/sum(charge_cnt) stat_cvr,
a.act_click_cnt, avg(pre_cvr) pre_cvr,
a.launch_cnt, avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias ,
c.afee, avg(b.fee0) fee0
cpc_fee
from from
(select (select * from advert.dws_advert_order_wide_v4_level_3_di where dt>='{0}' and dt<='{1}') a
app_id,slot_id,advert_id,
sum(ad_consume) cost,
sum(effect_pv) act_click_cnt,
sum(effect_pv)/sum(tuia_consumer_count) stat_cvr,
count(1) launch_cnt
from advert.dws_advert_effect_analyse_di
where dt>='{0}' and dt<='{1}'
group by app_id,slot_id,advert_id) a
left outer join left outer join
(select advert_id,percentile(fee,0.33) fee0
(select app_id,slot_id, from advert.dws_advert_order_wide_v4_level_3_di
advert_id,avg(pre_cvr) pre_cvr,avg(fee) cpc_fee where dt>='{0}' and dt<='{1}' and fee>0
from ( group by advert_id) b
select app_id,slot_id, advert_id,package_id, on a.advert_id=b.advert_id
pre_cvr, where a.fee<b.fee0 and a.slotid is not null
case when charge_type=1 then fee else 0 end fee group by app_id,slotid,a.advert_id
from logs.dwd_nezha_result_log_di '''.format(yestoday15, yestoday1)
where dt='{1}' ) m
group by app_id,slot_id,advert_id) b sql_ad_fee1 = '''
on a.app_id=b.app_id and a.slot_id=b.slot_id and a.advert_id=b.advert_id select app_id,slotid,advert_id,
sum(charge_fees) cost,
sum(charge_fees)/sum(act_click_cnt) costconvert,
sum(act_click_cnt)/sum(charge_cnt) stat_cvr,
avg(pre_cvr) pre_cvr,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias,
avg(b.fee1) fee1
from
(select * from advert.dws_advert_order_wide_v4_level_3_di where dt>='{0}' and dt<='{1}') a
left outer join left outer join
(select advert_id,percentile(fee,0.66) fee1
from advert.dws_advert_order_wide_v4_level_3_di
where dt>='{0}' and dt<='{1}' and fee>0
group by advert_id) b
on a.advert_id=b.advert_id
where a.fee<b.fee1 and a.slotid is not null
group by app_id,slotid,a.advert_id
'''.format(yestoday15, yestoday1)
sql_ad_fee2='''select app_id,slotid,advert_id,
sum(charge_fees) cost,
sum(charge_fees)/sum(act_click_cnt) costconvert,
sum(act_click_cnt)/sum(charge_cnt) stat_cvr,
avg(pre_cvr) pre_cvr,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias
from advert.dws_advert_order_wide_v4_level_3_di
where dt>='{0}' and dt<='{1}' and slotid is not null
group by app_id,slotid,advert_id
'''.format(yestoday15, yestoday1)
(select sql_ad_costconvert='''select advert_id,
advert_id, sum(charge_fees)/sum(act_click_cnt) ad_costconvert
sum(ad_consume)/sum(effect_pv) afee from advert.dws_advert_order_wide_v4_level_3_di
from advert.dws_advert_effect_analyse_di where dt>='{0}' and dt<='{1}'
where dt='{1}' group by advert_id
group by advert_id) c '''.format(yestoday15, yestoday1)
on a.advert_id=c.advert_id'''.format(yestoday15, yestoday1)
cursor.execute(sql_ad) cursor.execute(sql_ad_fee0)
stat_slotad_cvr = pd.DataFrame(cursor.fetchall()) stat_slotad_cvr_fee0 = pd.DataFrame(cursor.fetchall())
stat_slotad_cvr.columns = ['app_id', 'slotid', 'advert_id', stat_slotad_cvr_fee0.columns=['app_id', 'slotid', 'advert_id','cost0','costconvert0','stat_cvr0','pre_cvr0','bias0','fee0']
'cvr', 'bias', 'confidence','cost','act_click_cnt','launch_cnt','afee','cpc_fee'] stat_slotad_cvr_fee0=stat_slotad_cvr_fee0.ix[stat_slotad_cvr_fee0['cost0']>0]
stat_slotad_cvr['confidence'][stat_slotad_cvr['confidence'] > 1] = 1 cursor.execute(sql_ad_fee1)
stat_slotad_cvr['costconvert']=stat_slotad_cvr['cost']/stat_slotad_cvr['act_click_cnt'] stat_slotad_cvr_fee1 = pd.DataFrame(cursor.fetchall())
stat_slotad_cvr['costconvert_bias']=stat_slotad_cvr['costconvert']/stat_slotad_cvr['afee'] stat_slotad_cvr_fee1.columns=['app_id', 'slotid', 'advert_id','cost1','costconvert1','stat_cvr1','pre_cvr1','bias1','fee1']
stat_slotad_cvr['cpc_target_cvr']=stat_slotad_cvr['cpc_fee']/stat_slotad_cvr['afee'] stat_slotad_cvr_fee1=stat_slotad_cvr_fee1.ix[stat_slotad_cvr_fee1['cost1']>0]
cursor.execute(sql_ad_fee2)
stat_slotad_cvr_fee2 = pd.DataFrame(cursor.fetchall())
stat_slotad_cvr_fee2.columns=['app_id', 'slotid', 'advert_id','cost2','costconvert2','stat_cvr2','pre_cvr2','bias2']
stat_slotad_cvr_fee2=stat_slotad_cvr_fee2.ix[stat_slotad_cvr_fee2['cost2']>0]
cursor.execute(sql_ad_costconvert)
ad_costconvert = pd.DataFrame(cursor.fetchall())
ad_costconvert.columns=['advert_id','ad_costconvert']
stat_slotad_cvr_fee21=pd.merge(stat_slotad_cvr_fee2,stat_slotad_cvr_fee1,how='left',on=['app_id', 'slotid', 'advert_id'])
stat_slotad_cvr_fee210=pd.merge(stat_slotad_cvr_fee21,stat_slotad_cvr_fee0,how='left',on=['app_id', 'slotid', 'advert_id'])
stat_slotad_cvr=pd.merge(stat_slotad_cvr_fee210,ad_costconvert,how='left',on=['advert_id'])
stat_slotad_cvr=stat_slotad_cvr.ix[pd.notnull(stat_slotad_cvr['ad_costconvert'])]
stat_slotad_cvr['confidence0']=stat_slotad_cvr['cost0']/(stat_slotad_cvr['ad_costconvert']*5)
stat_slotad_cvr['confidence1']=stat_slotad_cvr['cost1']/(stat_slotad_cvr['ad_costconvert']*5)
stat_slotad_cvr['confidence2']=stat_slotad_cvr['cost2']/(stat_slotad_cvr['ad_costconvert']*5)
stat_slotad_cvr.ix[stat_slotad_cvr['confidence0']>1,'confidence0']=1
stat_slotad_cvr.ix[stat_slotad_cvr['confidence1']>1,'confidence1']=1
stat_slotad_cvr.ix[stat_slotad_cvr['confidence2']>1,'confidence2']=1
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence2']>=0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr2'])),
'stat_cvr2']=0
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence2']<0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr2'])),'stat_cvr2']=\
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence2']<0.2) & (pd.isnull(stat_slotad_cvr['stat_cvr2'])),'pre_cvr2']
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence1']>=0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr1'])),
'stat_cvr1']=0
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence1']<0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr1'])),'stat_cvr1']=\
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence1']<0.2) & (pd.isnull(stat_slotad_cvr['stat_cvr1'])),'pre_cvr1']
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence0']>=0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr0'])),
'stat_cvr0']=0
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence0']<0.2) &
(pd.isnull(stat_slotad_cvr['stat_cvr0'])),'stat_cvr0']=\
stat_slotad_cvr.ix[(stat_slotad_cvr['confidence0']<0.2) & (pd.isnull(stat_slotad_cvr['stat_cvr0'])),'pre_cvr0']
stat_slotad_cvr[['stat_cvr0','stat_cvr1','stat_cvr2']]=stat_slotad_cvr[['stat_cvr0','stat_cvr1','stat_cvr2']].fillna(value=0)
stat_slotad_cvr.ix[(stat_slotad_cvr['stat_cvr2']==0) & pd.isnull(stat_slotad_cvr['bias2']),'bias2']=5.0
stat_slotad_cvr.ix[(stat_slotad_cvr['stat_cvr2']!=0) & pd.isnull(stat_slotad_cvr['bias2']),'bias2']=1.5
stat_slotad_cvr[['bias0','bias1']]=stat_slotad_cvr[['bias0','bias1']].fillna(value=5.0)
stat_slotad_cvr['confidence0']=stat_slotad_cvr['confidence0'].fillna(value=0).round(2).astype('str')
stat_slotad_cvr['confidence1']=stat_slotad_cvr['confidence1'].fillna(value=0).round(2).astype('str')
stat_slotad_cvr['confidence2']=stat_slotad_cvr['confidence2'].fillna(value=0).round(2).astype('str')
stat_slotad_cvr['confidenceSet']=stat_slotad_cvr['confidence0']+','+stat_slotad_cvr['confidence1']+','+stat_slotad_cvr['confidence2']
stat_slotad_cvr['confidenceSet']=stat_slotad_cvr['confidenceSet'].map(lambda x:x.split(','))
stat_slotad_cvr['stat_cvr0']=stat_slotad_cvr['stat_cvr0'].fillna(value=0).round(6).astype('str')
stat_slotad_cvr['stat_cvr1']=stat_slotad_cvr['stat_cvr1'].fillna(value=0).round(6).astype('str')
stat_slotad_cvr['stat_cvr2']=stat_slotad_cvr['stat_cvr2'].fillna(value=0).round(6).astype('str')
stat_slotad_cvr['cvrSet']=stat_slotad_cvr['stat_cvr0']+','+stat_slotad_cvr['stat_cvr1']+','+stat_slotad_cvr['stat_cvr2']
stat_slotad_cvr['cvrSet']=stat_slotad_cvr['cvrSet'].map(lambda x:x.split(','))
stat_slotad_cvr['bias0']=stat_slotad_cvr['bias0'].fillna(value=5.0).round(6).astype('str')
stat_slotad_cvr['bias1']=stat_slotad_cvr['bias1'].fillna(value=5.0).round(6).astype('str')
stat_slotad_cvr['bias2']=stat_slotad_cvr['bias2'].fillna(value=5.0).round(6).astype('str')
stat_slotad_cvr['biasSet']=stat_slotad_cvr['bias0']+','+stat_slotad_cvr['bias1']+','+stat_slotad_cvr['bias2']
stat_slotad_cvr['biasSet']=stat_slotad_cvr['biasSet'].map(lambda x:x.split(','))
stat_slotad_cvr['fee0']=stat_slotad_cvr['fee0'].fillna(value=9999.0).round(1).astype('str')
stat_slotad_cvr['fee1']=stat_slotad_cvr['fee1'].fillna(value=9999.0).round(1).astype('str')
stat_slotad_cvr['priceSection']=stat_slotad_cvr['fee0']+','+stat_slotad_cvr['fee1']
stat_slotad_cvr['priceSection']=stat_slotad_cvr['priceSection'].map(lambda x:x.split(','))
#选广告位 #选广告位
stat_slotad_cvr_good=stat_slotad_cvr.ix[(stat_slotad_cvr['costconvert_bias'] <= 1.5) stat_slotad_cvr_good= stat_slotad_cvr[['slotid', 'advert_id', 'cvrSet', 'biasSet', 'confidenceSet','priceSection']]
& (stat_slotad_cvr['cvr'] >= 0.01)
& (stat_slotad_cvr['confidence'] >= 0.1)]
stat_slotad_cvr_good = stat_slotad_cvr_good[['slotid', 'advert_id', 'cvr', 'bias', 'confidence']]
stat_slotad_cvr_good[['slotid', 'advert_id']]=stat_slotad_cvr_good[['slotid', 'advert_id']].astype('str') stat_slotad_cvr_good[['slotid', 'advert_id']]=stat_slotad_cvr_good[['slotid', 'advert_id']].astype('str')
stat_slotad_cvr_good['key'] = "NZ_K76_" + stat_slotad_cvr_good['slotid'] + "_" + stat_slotad_cvr_good['advert_id'] stat_slotad_cvr_good['key'] = "NZ_K076_" + stat_slotad_cvr_good['slotid'] + "_" + stat_slotad_cvr_good['advert_id']
stat_slotad_cvr_good['value'] = stat_slotad_cvr_good[['cvr', 'bias', 'confidence']].apply(lambda x: x.to_json(orient='index'), axis=1) stat_slotad_cvr_good['value'] = stat_slotad_cvr_good[[ 'cvrSet', 'biasSet', 'confidenceSet','priceSection']].apply(lambda x: x.to_json(orient='index'), axis=1)
stat_slotad_cvr_good.index = range(stat_slotad_cvr_good.shape[0]) stat_slotad_cvr_good.index = range(stat_slotad_cvr_good.shape[0])
stat_slotad_cvr_good['bias']=stat_slotad_cvr_good['bias'].fillna(value=1.4)
# 连接nezha-redis # 连接nezha-redis
pool = redis.ConnectionPool(host='r-bp18da0abeaddc94285.redis.rds.aliyuncs.com', pool = redis.ConnectionPool(host='r-bp18da0abeaddc94285.redis.rds.aliyuncs.com',
...@@ -280,9 +364,9 @@ pipe = r.pipeline(transaction=True) ...@@ -280,9 +364,9 @@ pipe = r.pipeline(transaction=True)
#先删除昨日候选集 #先删除昨日候选集
print('stat_slotad_cvr_good-----') print('stat_slotad_cvr_good2-----')
stat_slotad_cvr_good_old=pd.read_csv('stat_slotad_cvr_good.csv') stat_slotad_cvr_good_old=pd.read_csv('stat_slotad_cvr_good2.csv')
stat_slotad_cvr_good_old.to_csv('stat_slotad_cvr_good_old.csv', index=False) stat_slotad_cvr_good_old.to_csv('stat_slotad_cvr_good_old2.csv', index=False)
for i in stat_slotad_cvr_good_old.index: for i in stat_slotad_cvr_good_old.index:
key = stat_slotad_cvr_good_old.ix[i, 'key'] key = stat_slotad_cvr_good_old.ix[i, 'key']
value = stat_slotad_cvr_good_old.ix[i, 'value'] value = stat_slotad_cvr_good_old.ix[i, 'value']
...@@ -305,7 +389,7 @@ for i in stat_slotad_cvr_good.index: ...@@ -305,7 +389,7 @@ for i in stat_slotad_cvr_good.index:
pipe.execute() pipe.execute()
stat_slotad_cvr_good.to_csv('stat_slotad_cvr_good.csv', index=False) stat_slotad_cvr_good.to_csv('stat_slotad_cvr_good2.csv', index=False)
############################################################################################## ##############################################################################################
############################################################################################### ###############################################################################################
......
import redis
import json
params_dict={
#recommend方法
'startFacter':0.5,'cpaBiasRatioFacter':1.0,
'cpaOrientRatioFacter':0.5,
'cpaBiasThresholdFacter':2.0,
'cpcTargetRatioFacter':0.5,
'cpcOrientRatioFacter':0.01,
'cpcBiasThresholdFacter':1.0,
#熔断
'fuseOrientCostG1dFacter':50000.0,
'fuseOrientCostConvertbiasFacter':2.0,
#白名单参数
'wSlotOrientationConfidenceFacter':0.2,
'wSlotOrientationCostConvertBiasFacter':1.2,
#高置信黑名单参数
'bOrientConfidenceFacter1':1.0,
'bOrientCostConvertbiasFacter1':1.2,
'bSlotOrientationConfidenceFacter1':0.5,
'bSlotOrientationCostConvertBiasFacter1':3.5,
#低置信黑名单参数
'bOrientConfidenceFacter2':1.0,
'bOrientCostConvertbiasFacter2':1.5,
'bSlotOrientationConfidenceFacter2':0.5,
'bSlotOrientRadioFacter2':2.0,
}
params_key="NZ_K??_auto_manage_params"
params_value=json.dumps(params_dict)
...@@ -105,7 +105,7 @@ sql='''select advert_id,account_id, ...@@ -105,7 +105,7 @@ sql='''select advert_id,account_id,
when length(match_tag_nums)=22 then substr(match_tag_nums,13) when length(match_tag_nums)=22 then substr(match_tag_nums,13)
else match_tag_nums end match_tag_nums,avg(fee) fee else match_tag_nums end match_tag_nums,avg(fee) fee
from advert.dws_advert_order_wide_v4_level_6_di from advert.dws_advert_order_wide_v4_level_6_di
where dt>='{0}' and dt<='{1}' and advert_id is not null where dt>='{0}' and dt<='{1}' and advert_id is not null and fee>0
group by advert_id,account_id, group by advert_id,account_id,
case when length(match_tag_nums)=16 then substr(match_tag_nums,7) case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
when length(match_tag_nums)=22 then substr(match_tag_nums,13) when length(match_tag_nums)=22 then substr(match_tag_nums,13)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment