Commit 7216c29b authored by mxx's avatar mxx

1

parent 5b5e25c3
......@@ -61,6 +61,9 @@ sql_ctr_roc = '''select ctr_label,pre_ctr,
WHEN rcmd_type=253 THEN 'xDeepFm4'
WHEN rcmd_type=254 THEN 'essm_deep1'
WHEN rcmd_type=255 THEN 'essm_deep2'
WHEN rcmd_type=258 THEN 'ffm001'
WHEN rcmd_type=259 THEN 'ffm004'
WHEN rcmd_type=260 THEN 'esmm003'
END AS rcmd_name
from advert.dws_advert_order_wide_v4_level_6_di
where dt='{}' and pre_ctr is not null
......@@ -110,6 +113,9 @@ sql_cvr_roc = '''select cvr_label,pre_cvr,
WHEN rcmd_type=253 THEN 'xDeepFm4'
WHEN rcmd_type=254 THEN 'essm_deep1'
WHEN rcmd_type=255 THEN 'essm_deep2'
WHEN rcmd_type=258 THEN 'ffm001'
WHEN rcmd_type=259 THEN 'ffm004'
WHEN rcmd_type=260 THEN 'esmm003'
END AS rcmd_name
from
(select * from advert.dws_advert_order_wide_v4_level_6_di where dt='{}' and charge_cnt>0) a
......@@ -145,12 +151,12 @@ alg=['alg-4.0','alg-4.0.1',
'v502','fix_bias_2','fix_bias_3',
'fnn2','deepFm2','dcn2','dcn3','xDeepFm2','xDeepFm3',
'essm_online1','essm_online2','Material_reform_online',
'xDeepFm4','essm_deep1','essm_deep2']
'xDeepFm4','essm_deep1','essm_deep2','ffm001','ffm004','esmm003']
########roc曲线###=============================================================================
part=[(0,0),(0,1),(0,2),(0,3),(1,0),(1,1),(1,2),(1,3),(2,0),(2,1),(2,2),(2,3),(3,0),(3,1),(3,2),(3,3),
(4,0),(4,1),(4,2),(4,3),(5,0),(5,1),(5,2),(5,3),(6,0),(6,1),(6,2),(6,3),(7,0),(7,1),(7,2),(7,3),
]
(8, 0),(8, 1),(8, 2),(8, 3)]
#ctr
ctr_cnt=[]
plt.figure(figsize=(16,24))
......@@ -162,7 +168,7 @@ for i in range(len(alg)):
ctr_auc=auc(fpr, tpr)
ctr_logloss=log_loss(y_ctr,scores_ctr)
ctr_cnt.append([alg[i], ctr_auc, len(y_ctr)])
plt.subplot2grid((7, 4), part[i])
plt.subplot2grid((9, 4), part[i])
plt.plot(fpr,tpr)
plt.title(alg[i])
plt.text(0.5,0.5,'auc={0} \nlog_loss={1} \ncnt={2}'.format(round(ctr_auc,4),ctr_logloss,len(y_ctr)))
......@@ -185,7 +191,7 @@ for i in range(len(alg)):
cvr_auc=auc(fpr, tpr)
cvr_logloss=log_loss(y_cvr,scores_cvr)
cvr_cnt.append([alg[i], cvr_auc, len(y_cvr)])
plt.subplot2grid((7, 4), part[i])
plt.subplot2grid((9, 4), part[i])
plt.plot(fpr,tpr)
plt.title(alg[i])
plt.text(0.5,0.5,'auc={0} \nlog_loss={1} \ncnt={2}'.format(round(cvr_auc,4),cvr_logloss,len(y_cvr)))
......
import os
import pandas as pd
import numpy as np
from pyhive import hive
from sqlalchemy import create_engine
import pymysql
import redis
import datetime
import time
os.chdir('/home/db_dlp/mengxiangxuan/auto_spread')
cursor = hive.connect(host='10.50.10.11', port=10000, username='mengxiangxuan', database='default').cursor()
now = datetime.datetime.now()
today = now.strftime('%Y-%m-%d')
delta1 = datetime.timedelta(days=1)
delta3 = datetime.timedelta(days=3)
delta5 = datetime.timedelta(days=5)
delta7 = datetime.timedelta(days=7)
delta10 = datetime.timedelta(days=10)
delta15 = datetime.timedelta(days=15)
yestoday1 = (now - delta1).strftime('%Y-%m-%d')
yestoday3 = (now - delta3).strftime('%Y-%m-%d')
yestoday5 = (now - delta5).strftime('%Y-%m-%d')
yestoday7 = (now - delta7).strftime('%Y-%m-%d')
yestoday10 = (now - delta10).strftime('%Y-%m-%d')
yestoday15 = (now - delta15).strftime('%Y-%m-%d')
sql = '''
select a.*,b.id from
(select advert_id,orientation_package_id,app_ids from ods_credits.tb_advert_target_app where dt='2018-12-09') a
left outer join
(select id,advert_id,case when is_default=1 then 0 else id end orientation_package_id,is_default
from advert.dwd_advert_orientation_package_df where dt='2018-12-09') b
on a.advert_id=b.advert_id and a.orientation_package_id=b.orientation_package_id
'''
cursor.execute(sql)
ad_orient1 = pd.DataFrame(cursor.fetchall())
ad_orient1.columns = ['advert_id','orientation_package_id','app_ids','id']
##----------------------------
sql = '''
select a.advert_id,a.orientation_package_id,b.app_ids from
(select * from ods_credits.tb_advert_app_package where dt='2018-12-09') a
inner join
(select * from ods_credits.tb_app_package where dt='2018-12-09') b
on a.app_package_id=b.id
'''
cursor.execute(sql)
ad_orient2 = pd.DataFrame(cursor.fetchall())
ad_orient2.columns = ['advert_id','id','app_ids']
ad_orient=pd.merge(ad_orient1,ad_orient2,how='outer',on=['advert_id','id'])
ad_orient=ad_orient.fillna(value='')
ad_orient['app_ids_1']=ad_orient['app_ids_x'].map(lambda x:str(x).split(','))
ad_orient['app_ids_2']=ad_orient['app_ids_y'].map(lambda x:str(x).split(','))
ad_orient['app_ids']=ad_orient['app_ids_1']+ad_orient['app_ids_2']
#广告配置的消耗
sql = '''
select advert_id,orientation_id,sum(charge_fees) cost
from advert.dws_advert_order_wide_v4_level_3_di
where dt>'2018-12-02' and dt<'2018-12-09'
group by advert_id,orientation_id
'''
cursor.execute(sql)
ad_orient_cost = pd.DataFrame(cursor.fetchall())
ad_orient_cost.columns = ['advert_id','orientation_package_id','cost']
ad_orient_cost[['advert_id','orientation_package_id']]=ad_orient_cost[['advert_id','orientation_package_id']].astype('str')
ad_orient[['advert_id','orientation_package_id']]=ad_orient[['advert_id','orientation_package_id']].astype('str')
ad_orient['orientation_package_id']=ad_orient['orientation_package_id'].map(lambda x:x.replace('.0',''))
ad_orient=pd.merge(ad_orient,ad_orient_cost,how='left',on=['advert_id','orientation_package_id'])
ad_orient_cost_1000=ad_orient.ix[ad_orient['cost']>100000]
ad_orient_cost_1000['app_ids_str']=ad_orient_cost_1000['app_ids'].map(lambda x:str(x))
ad_orient_cost_1000['app_cnt']=ad_orient_cost_1000['app_ids'].map(lambda x:len(x))
#潜在通投计划 (cost为8天消耗)
ad_orient_app0=ad_orient_cost_1000.ix[ad_orient_cost_1000['app_ids_str']=="['', '']"]
ad_orient_0_apps=pd.DataFrame(ad_orient_app0['cost'].groupby(ad_orient_app0['advert_id']).sum())
ad_orient_0_apps['advert_id']=ad_orient_0_apps.index
ad_orient0=set(ad_orient_0_apps['advert_id'])
ad_orient_0_apps['isall']=1
#定向媒体大于20个的计划
ad_orient_app20=ad_orient_cost_1000.ix[ad_orient_cost_1000['app_cnt']>20]
ad_orient_20_apps=pd.DataFrame(ad_orient_app20['cost'].groupby(ad_orient_app20['advert_id']).sum())
ad_orient_20_apps['advert_id']=ad_orient_20_apps.index
ad_orient20=set(ad_orient_20_apps['advert_id'])
ad_orient_20_apps['is20']=1
#合并
orient20_or_all=pd.merge(ad_orient_0_apps,ad_orient_20_apps,how='outer',on=['advert_id'])
orient20_or_all=orient20_or_all.fillna(value=0)
orient20_or_all['cost']=orient20_or_all['cost_x']+orient20_or_all['cost_y']
orient20_or_all[['advert_id','isall','is20','cost']].to_csv('orient20_or_all.csv',index=False)
# #既有通投配置 又有定向大于20的广告 110个
# orient20_and_all=set(ad_orient_20_apps['advert_id']) & set(ad_orient_0_apps['advert_id'])
# #通投广告
# orient0=ad_orient0-orient20_and_all
# #定向大于20的广告
# orient20=ad_orient20-orient20_and_all
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment