Commit e69f4ef2 authored by mxx's avatar mxx

1

parent c408b2a6
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.2 (D:\Program Files\Anaconda3\python.exe)" project-jdk-type="Python SDK" /> <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.2 (D:\Program Files\anaconda3\python.exe)" project-jdk-type="Python SDK" />
</project> </project>
\ No newline at end of file
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4"> <module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager"> <component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" /> <content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.5.2 (D:\Program Files\Anaconda3\python.exe)" jdkType="Python SDK" /> <orderEntry type="jdk" jdkName="Python 3.5.2 (D:\Program Files\anaconda3\python.exe)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
</module> </module>
\ No newline at end of file
...@@ -98,8 +98,8 @@ pre_slotad_cvr_good_old.to_csv('pre_slotad_cvr_good_old2.csv', index=False) ...@@ -98,8 +98,8 @@ pre_slotad_cvr_good_old.to_csv('pre_slotad_cvr_good_old2.csv', index=False)
for i in pre_slotad_cvr_good_old.index: for i in pre_slotad_cvr_good_old.index:
key = pre_slotad_cvr_good_old.ix[i, 'key'] key = pre_slotad_cvr_good_old.ix[i, 'key']
value = pre_slotad_cvr_good_old.ix[i, 'value'] value = pre_slotad_cvr_good_old.ix[i, 'value']
pipe.set(key, value, ex=200) pipe.set(key, value, ex=900)
if i % 2000 == 0: if i % 5000 == 0:
pipe.execute() pipe.execute()
print(i) print(i)
time.sleep(0.5) time.sleep(0.5)
...@@ -109,8 +109,8 @@ pipe.execute() ...@@ -109,8 +109,8 @@ pipe.execute()
for i in pre_slotad_cvr_good.index: for i in pre_slotad_cvr_good.index:
key = pre_slotad_cvr_good.ix[i, 'key'] key = pre_slotad_cvr_good.ix[i, 'key']
value = pre_slotad_cvr_good.ix[i, 'value'] value = pre_slotad_cvr_good.ix[i, 'value']
pipe.set(key, value, ex=90000) pipe.set(key, value, ex=432000)
if i % 2000 == 0: if i % 5000 == 0:
pipe.execute() pipe.execute()
print(i) print(i)
time.sleep(0.5) time.sleep(0.5)
...@@ -137,7 +137,7 @@ from ...@@ -137,7 +137,7 @@ from
(select * from advert.dws_advert_order_wide_v4_level_6_di where dt>='{0}' and dt<='{1}') a (select * from advert.dws_advert_order_wide_v4_level_6_di where dt>='{0}' and dt<='{1}') a
left outer join left outer join
(select advert_id,percentile(fee,0.33) fee0 (select advert_id,percentile(fee,0.33) fee0
from advert.dws_advert_order_wide_v4_level_3_di from advert.dws_advert_order_wide_v4_level_6_di
where dt>='{0}' and dt<='{1}' and fee>0 where dt>='{0}' and dt<='{1}' and fee>0
group by advert_id) b group by advert_id) b
on a.advert_id=b.advert_id on a.advert_id=b.advert_id
...@@ -154,10 +154,10 @@ avg(pre_cvr) pre_cvr, ...@@ -154,10 +154,10 @@ avg(pre_cvr) pre_cvr,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias, avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias,
avg(b.fee1) fee1 avg(b.fee1) fee1
from from
(select * from advert.dws_advert_order_wide_v4_level_3_di where dt>='{0}' and dt<='{1}') a (select * from advert.dws_advert_order_wide_v4_level_6_di where dt>='{0}' and dt<='{1}') a
left outer join left outer join
(select advert_id,percentile(fee,0.66) fee1 (select advert_id,percentile(fee,0.66) fee1
from advert.dws_advert_order_wide_v4_level_3_di from advert.dws_advert_order_wide_v4_level_6_di
where dt>='{0}' and dt<='{1}' and fee>0 where dt>='{0}' and dt<='{1}' and fee>0
group by advert_id) b group by advert_id) b
on a.advert_id=b.advert_id on a.advert_id=b.advert_id
...@@ -171,14 +171,14 @@ sum(charge_fees)/sum(act_click_cnt) costconvert, ...@@ -171,14 +171,14 @@ sum(charge_fees)/sum(act_click_cnt) costconvert,
sum(act_click_cnt)/sum(charge_cnt) stat_cvr, sum(act_click_cnt)/sum(charge_cnt) stat_cvr,
avg(pre_cvr) pre_cvr, avg(pre_cvr) pre_cvr,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias
from advert.dws_advert_order_wide_v4_level_3_di from advert.dws_advert_order_wide_v4_level_6_di
where dt>='{0}' and dt<='{1}' and slotid is not null where dt>='{0}' and dt<='{1}' and slotid is not null
group by app_id,slotid,advert_id group by app_id,slotid,advert_id
'''.format(yestoday15, yestoday1) '''.format(yestoday15, yestoday1)
sql_ad_costconvert='''select advert_id, sql_ad_costconvert='''select advert_id,
sum(charge_fees)/sum(act_click_cnt) ad_costconvert sum(charge_fees)/sum(act_click_cnt) ad_costconvert
from advert.dws_advert_order_wide_v4_level_3_di from advert.dws_advert_order_wide_v4_level_6_di
where dt>='{0}' and dt<='{1}' where dt>='{0}' and dt<='{1}'
group by advert_id group by advert_id
'''.format(yestoday15, yestoday1) '''.format(yestoday15, yestoday1)
...@@ -289,19 +289,19 @@ stat_slotad_cvr_good_old.to_csv('stat_slotad_cvr_good_old2.csv', index=False) ...@@ -289,19 +289,19 @@ stat_slotad_cvr_good_old.to_csv('stat_slotad_cvr_good_old2.csv', index=False)
for i in stat_slotad_cvr_good_old.index: for i in stat_slotad_cvr_good_old.index:
key = stat_slotad_cvr_good_old.ix[i, 'key'] key = stat_slotad_cvr_good_old.ix[i, 'key']
value = stat_slotad_cvr_good_old.ix[i, 'value'] value = stat_slotad_cvr_good_old.ix[i, 'value']
pipe.set(key, value, ex=600) pipe.set(key, value, ex=900)
if i % 2000 == 0: if i % 5000 == 0:
pipe.execute() pipe.execute()
print(i) print(i)
time.sleep(0.5) time.sleep(0.5)
pipe.execute() pipe.execute()
# 转化成key: NZ_K76_slotId_advertId, value:json cvr&预估偏差&置信度 # 转化成key: NZ_K076_slotId_advertId, value:json cvr&预估偏差&置信度
for i in stat_slotad_cvr_good.index: for i in stat_slotad_cvr_good.index:
key = stat_slotad_cvr_good.ix[i, 'key'] key = stat_slotad_cvr_good.ix[i, 'key']
value = stat_slotad_cvr_good.ix[i, 'value'] value = stat_slotad_cvr_good.ix[i, 'value']
pipe.set(key, value, ex=187200) pipe.set(key, value, ex=432000)
if i % 2000 == 0: if i % 5000 == 0:
pipe.execute() pipe.execute()
print(i) print(i)
time.sleep(0.5) time.sleep(0.5)
......
import os
import pandas as pd
import numpy as np
from pyhive import hive
from sqlalchemy import create_engine
import pymysql
import redis
import datetime
import time
from dingtalkchatbot.chatbot import DingtalkChatbot
# WebHook地址
webhook = 'https://oapi.dingtalk.com/robot/send?access_token=4f28ce996ab4f2601c0362fbfd0d48f58b0250a76953ff117ca41e9f1ec8e565'
# 初始化机器人小丁
xiaoding = DingtalkChatbot(webhook)
at_mobiles = ['18668032242']
os.chdir('/home/db_dlp/mengxiangxuan/auto_spread')
cursor = hive.connect(host='10.50.10.11', port=10000, username='mengxiangxuan', database='default').cursor()
now = datetime.datetime.now()
today = now.strftime('%Y-%m-%d')
delta1 = datetime.timedelta(days=1)
delta3 = datetime.timedelta(days=3)
delta5 = datetime.timedelta(days=5)
delta7 = datetime.timedelta(days=7)
delta10 = datetime.timedelta(days=10)
delta15 = datetime.timedelta(days=15)
yestoday1 = (now - delta1).strftime('%Y-%m-%d')
yestoday3 = (now - delta3).strftime('%Y-%m-%d')
yestoday5 = (now - delta5).strftime('%Y-%m-%d')
yestoday7 = (now - delta7).strftime('%Y-%m-%d')
yestoday10 = (now - delta10).strftime('%Y-%m-%d')
yestoday15 = (now - delta15).strftime('%Y-%m-%d')
try:
# 历史无数据---------------------------------------------------
# 解出广告位-广告维度预估cvr
nolunch_pre_cvr = pd.read_table(r'slot_ad_stat_cvr.txt')
nolunch_pre_cvr.columns = ['c']
a = nolunch_pre_cvr['c'].map(lambda x: x.replace('{', '').replace('}', '').split(' '))
# a=nolunch_pre_cvr['c'].map(lambda x:x.replace('=',':'))
l = []
for i in a:
l.extend(i)
ll = [x[:-1].split('=') for x in l]
ll = np.array(ll)
pre_slotad_stat_cvr = pd.DataFrame()
b = [s.split(',') for s in ll[:, 0]]
bb = np.array(b)
pre_slotad_stat_cvr['slotid'] = bb[:, 0]
pre_slotad_stat_cvr['advert_id'] = bb[:, 1]
pre_slotad_stat_cvr['cvr'] = ll[:, 1]
# pre_slotad_stat_cvr.head()
pre_slotad_stat_cvr['cvr'] = pre_slotad_stat_cvr['cvr'].astype('float')
# 匹配广告行业
sql = '''select id,
case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
when length(match_tag_nums)=22 then substr(match_tag_nums,13)
else match_tag_nums end match_tag_nums
from advert.dwd_advert_df
where dt='{0}' and length(match_tag_nums) in (10,16,22) '''.format(yestoday1)
cursor.execute(sql)
advert_trid = pd.DataFrame(cursor.fetchall())
advert_trid.columns = ['advert_id', 'match_tag_nums']
advert_trid['advert_id'] = advert_trid['advert_id'].astype('str')
pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, advert_trid, how='left', on=['advert_id'])
# 纠偏
# 1 历史数据行业+广告位维度预估值和统计值偏差,来纠偏
# sql = '''
# select match_tag_nums,app_id,avg(pre_cvr)/avg(stat_cvr) pre_diff from
# (select advert_id,app_id,pre_cvr,stat_cvr
# from logs.dwd_nezha_result_log_di
# where dt>='{0}' and dt<='{1}' and order_id is not null ) p1
# left outer join
# (select id,case when length(match_tag_nums)>10 then substr(match_tag_nums,7)
# else match_tag_nums end match_tag_nums
# from advert.dwd_advert_df where dt>='{0}' and dt<='{1}') p2
# on p1.advert_id=p2.id
# group by match_tag_nums,app_id
# '''.format(yestoday7,yestoday1)
# cursor.execute(sql)
# trid_slot_pre_diff = pd.DataFrame(cursor.fetchall())
# trid_slot_pre_diff.columns = ['match_tag_nums', 'slotid', 'trid_slot_diff']
# # trid_slot_pre_diff=trid_slot_pre_diff.ix[trid_slot_pre_diff['trid_slot_diff']>2]
# trid_slot_pre_diff['slotid'] = trid_slot_pre_diff['slotid'].fillna(value=-11).astype('int').astype('str')
# 2 历史数据广告维度预估值和统计值偏差纠偏
sql = '''
select advert_id,avg(pre_cvr)/avg(stat_cvr) pre_diff
from logs.dwd_nezha_result_log_di
where dt>='{0}' and dt<='{1}' and order_id is not null
group by advert_id
'''.format(yestoday3,yestoday1)
cursor.execute(sql)
ad_pre_diff = pd.DataFrame(cursor.fetchall())
ad_pre_diff.columns = ['advert_id', 'ad_diff']
# ad_pre_diff=ad_pre_diff.ix[ad_pre_diff['ad_diff']>2]
ad_pre_diff['advert_id'] = ad_pre_diff['advert_id'].astype('str')
# 预估cvr纠偏############
#pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, trid_slot_pre_diff, how='left', on=['slotid', 'match_tag_nums'])
pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, ad_pre_diff, how='left', on=['advert_id'])
# pre_slotad_stat_cvr.ix[pd.isnull(pre_slotad_stat_cvr['trid_slot_diff']), 'trid_slot_diff'] = pre_slotad_stat_cvr.ix[
# pd.isnull(pre_slotad_stat_cvr['trid_slot_diff']), 'ad_diff']
# pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['trid_slot_diff'] > 2, 'fix_cvr'] = \
# pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['trid_slot_diff'] > 2, 'cvr'] / pre_slotad_stat_cvr.ix[
# pre_slotad_stat_cvr['trid_slot_diff'] > 2, 'trid_slot_diff']
pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['ad_diff'] > 1.2, 'fix_cvr'] = \
pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['ad_diff'] > 1.2, 'cvr'] / pre_slotad_stat_cvr.ix[
pre_slotad_stat_cvr['ad_diff'] > 1.2, 'ad_diff']
pre_slotad_stat_cvr.ix[pd.isnull(pre_slotad_stat_cvr['fix_cvr']), 'fix_cvr'] = pre_slotad_stat_cvr.ix[
pd.isnull(pre_slotad_stat_cvr['fix_cvr']), 'cvr']
#####匹配行业+slot出价
sql_fee1='''
select match_tag_nums,slotid,avg(fee) fee from
(select case when length(match_tag_nums)>10 then substr(match_tag_nums,7)
else match_tag_nums end match_tag_nums,
slotid,fee
from advert.dws_advert_order_wide_v4_level_3_di
where dt="{0}") a
group by match_tag_nums,slotid
'''.format(yestoday1)
cursor.execute(sql_fee1)
tride_slot_fee = pd.DataFrame(cursor.fetchall())
tride_slot_fee.columns = ['match_tag_nums','slotid','m_s_fee']
tride_slot_fee['slotid']=tride_slot_fee['slotid'].astype('str').map(lambda x:x.replace('.0',''))
pre_slotad_stat_cvr=pd.merge(pre_slotad_stat_cvr,tride_slot_fee,on=['match_tag_nums','slotid'],how='left')
#####匹配广告出价
sql_fee2='''
select advert_id,avg(fee) fee
from advert.dws_advert_order_wide_v4_level_3_di
where dt="{0}"
group by advert_id
'''.format(yestoday1)
cursor.execute(sql_fee2)
advert_fee = pd.DataFrame(cursor.fetchall())
advert_fee.columns = ['advert_id','ad_fee']
advert_fee['advert_id']=advert_fee['advert_id'].astype('str')
pre_slotad_stat_cvr=pd.merge(pre_slotad_stat_cvr,advert_fee,on=['advert_id'],how='left')
pre_slotad_stat_cvr['pre_launch_r']=pre_slotad_stat_cvr['ad_fee']/pre_slotad_stat_cvr['m_s_fee']
####筛选预估可以发券的组合
pre_slotad_cvr=pre_slotad_stat_cvr.ix[(pre_slotad_stat_cvr['pre_launch_r']>1) & (pre_slotad_stat_cvr['cvr']>0.01)]
pre_slotad_cvr = pre_slotad_cvr[['slotid', 'advert_id', 'fix_cvr', 'ad_diff']]
pre_slotad_cvr['confidence'] = 0
pre_slotad_cvr.columns = ['slotid', 'advert_id', 'cvr', 'bias','confidence']
pre_slotad_cvr_good=pre_slotad_cvr.ix[pre_slotad_cvr['bias']<1.1]
pre_slotad_cvr_good=pre_slotad_cvr_good.sort_index(by=['bias'])
#pre_slotad_cvr_good.groupby('advert_id').size()
pre_slotad_cvr_good=pre_slotad_cvr_good.groupby('advert_id').head(100)
pre_slotad_cvr_good[['slotid', 'advert_id']]=pre_slotad_cvr_good[['slotid', 'advert_id']].astype('str')
pre_slotad_cvr_good['key'] = "NZ_K76_" + pre_slotad_cvr_good['slotid'] + "_" + pre_slotad_cvr_good['advert_id']
pre_slotad_cvr_good['value'] = pre_slotad_cvr_good[['cvr', 'bias', 'confidence']].apply(lambda x: x.to_json(orient='index'), axis=1)
pre_slotad_cvr_good.index = range(pre_slotad_cvr_good.shape[0])
# 连接nezha-redis
pool = redis.ConnectionPool(host='r-bp18da0abeaddc94285.redis.rds.aliyuncs.com',
password='hteK73Zxx3ji9LGCy2jBAZDJ6', port=6379, db=0)
r = redis.Redis(connection_pool=pool)
pipe = r.pipeline(transaction=True)
#先删除昨日候选集
print('pre_slotad_cvr_good-----')
pre_slotad_cvr_good_old=pd.read_csv('pre_slotad_cvr_good.csv')
pre_slotad_cvr_good_old.to_csv('pre_slotad_cvr_good_old.csv', index=False)
for i in pre_slotad_cvr_good_old.index:
key = pre_slotad_cvr_good_old.ix[i, 'key']
value = pre_slotad_cvr_good_old.ix[i, 'value']
pipe.set(key, value, ex=200)
if i % 5000 == 0:
pipe.execute()
print(i)
time.sleep(0.5)
pipe.execute()
# 转化成key: NZ_K76_slotId_advertId, value:json cvr&预估偏差&置信度
for i in pre_slotad_cvr_good.index:
key = pre_slotad_cvr_good.ix[i, 'key']
value = pre_slotad_cvr_good.ix[i, 'value']
pipe.set(key, value, ex=432000)
if i % 5000 == 0:
pipe.execute()
print(i)
time.sleep(0.5)
pipe.execute()
pre_slotad_cvr_good.to_csv('pre_slotad_cvr_good.csv', index=False)
except:
xiaoding.send_text(msg='候选集(无历史数据部分存储)程序异常!!!请排查!', at_mobiles=at_mobiles)
else:
xiaoding.send_text(msg='候选集(无历史数据部分)存储成功', at_mobiles=at_mobiles)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment