Commit ae9e1e99 authored by mengxiangxuan's avatar mengxiangxuan

00

parent 47e321d9
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.2 (D:\Program Files\anaconda3\python.exe)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.2 (D:\Program Files\Anaconda3\python.exe)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
......@@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.5.2 (D:\Program Files\anaconda3\python.exe)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.5.2 (D:\Program Files\Anaconda3\python.exe)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
......@@ -50,6 +50,9 @@ sql_ctr_roc = '''select ctr_label,pre_ctr,
WHEN rcmd_type=242 THEN 'deepFm2'
WHEN rcmd_type=243 THEN 'dcn'
WHEN rcmd_type=244 THEN 'dcn2'
WHEN rcmd_type=245 THEN 'dcn3'
WHEN rcmd_type=246 THEN 'xDeepFm2'
WHEN rcmd_type=247 THEN 'xDeepFm3'
END AS rcmd_name
from advert.dws_advert_order_wide_v4_level_6_di
where dt='{}' and pre_ctr is not null
......@@ -88,6 +91,9 @@ sql_cvr_roc = '''select cvr_label,pre_cvr,
WHEN rcmd_type=242 THEN 'deepFm2'
WHEN rcmd_type=243 THEN 'dcn'
WHEN rcmd_type=244 THEN 'dcn2'
WHEN rcmd_type=245 THEN 'dcn3'
WHEN rcmd_type=246 THEN 'xDeepFm2'
WHEN rcmd_type=247 THEN 'xDeepFm3'
END AS rcmd_name
from
(select * from advert.dws_advert_order_wide_v4_level_6_di where dt='{}' and charge_cnt>0) a
......@@ -116,15 +122,16 @@ cvr_roc_data=pd.DataFrame(cursor.fetchall(),
alg=['alg-4.0','alg-4.0.1',
'alg-4.0.2','alg-4.0.4','alg-4.1','alg-4.2.1',
'alg-610','alg-act-tab','alg-online-learn','alg-online-learn223',
'alg-610','alg-act-tab','alg-online-learn223',
'alg-online-learn224','alg-online-weight',
'alg-app-optimize', 'alg-act-count',
'alg-fm-backend','BTM_AND_PC_31','Material_reform',
'v501','v502','fix_bias_2','fix_bias_3','fnn2','deepFm2','dcn2']
'v501','v502','fix_bias_2','fix_bias_3',
'fnn2','deepFm2','dcn2','dcn3','xDeepFm2','xDeepFm3']
########roc曲线###=============================================================================
part=[(0,0),(0,1),(0,2),(0,3),(1,0),(1,1),(1,2),(1,3),(2,0),(2,1),(2,2),(2,3),(3,0),(3,1),(3,2),(3,3),
(4,0),(4,1),(4,2),(4,3),(5,0),(5,1),(5,2),(5,3),(6,0),(6,1),(6,2)]
(4,0),(4,1),(4,2),(4,3),(5,0),(5,1),(5,2),(5,3),(6,0),(6,1),(6,2),(6,3)]
#ctr
ctr_cnt=[]
plt.figure(figsize=(16,24))
......
......@@ -129,8 +129,6 @@ try:
except:
xiaoding.send_text(msg='候选集(无历史数据部分存储)程序异常!!!请排查!', at_mobiles=at_mobiles)
else:
xiaoding.send_text(msg='候选集(无历史数据部分)存储成功', at_mobiles=at_mobiles)
###########################################################################################
......@@ -324,8 +322,7 @@ try:
except:
xiaoding.send_text(msg='候选集(有历史数据部分存储)程序异常!!!请排查!', at_mobiles=at_mobiles)
else:
xiaoding.send_text(msg='候选集(有历史数据部分)存储成功', at_mobiles=at_mobiles)
##############################################################################################
###############################################################################################
......
This diff is collapsed.
import os
import pandas as pd
import numpy as np
from pyhive import hive
from sqlalchemy import create_engine
import pymysql
import redis
import datetime
import time
os.chdir('/home/db_dlp/mengxiangxuan/auto_spread')
cursor = hive.connect(host='10.50.10.11', port=10000, username='mengxiangxuan', database='default').cursor()
featrue_id=[
'f101001',
'f106001',
'f102001',
'f201001',
'f108001',
'f301001',
'f501001',
'f611001',
'f110001',
'f502001',
'f505001',
'f502002',
'f601001',
'f603001',
'f602001',
'f604001',
'f605001',
'f606001',
'f607001',
'f608001',
'f609001',
'f503001',
'f306001',
'f610001']
#无历史数据准确性预估 用历史数据做样本测试模型准确性
sql1='''
select a.* from
(select
advert_id,
account_id,
match_tag_nums,
app_id,
slotid,
activity_id,
ua,
put_index,
times,
gmt_create_hour,
price_section,
gmt_create_weekday,
day_order_rank_level,
day_activity_order_rank_level,
order_rank_level,
activity_order_rank_level,
order_gmt_intervel_level,
activity_order_gmt_intervel_level,
activity_last_charge_status,
last_charge_status,
last_activity_equal_status,
city_id,
activity_use_type,
day_last_match_tag_nums_equal_status
from advert.dws_advert_order_wide_v4_level_6_di
where dt='2018-11-11'
and rand()>0.5) a
inner join
(select advert_id,count(1) cnt
from advert.dws_advert_order_wide_v4_level_6_di
where dt='2018-11-11'
group by advert_id limit 200) b
on a.advert_id=b.advert_id
'''
cursor.execute(sql1)
samples_df = pd.DataFrame(cursor.fetchall())
samples_df=samples_df.fillna(value='.0')
samples_df.columns=featrue_id
samples_df.to_csv("samples_eva.csv",index=False,sep='|')
nolunch_pre_cvr = pd.read_table('slot_ad_cvr_eva.txt',sep=',')
nolunch_pre_cvr.columns = ['slotad','ctr','cvr']
nolunch_pre_cvr[['slotid','advert_id']]=nolunch_pre_cvr['slotad'].str.split('_',expand=True)
sql = '''
select slotid,advert_id,avg(stat_cvr),avg(pre_cvr)/avg(stat_cvr) from
advert.dws_advert_order_wide_v4_level_6_di
where dt='2018-11-11' and order_id is not null
group by slotid,advert_id
'''
cursor.execute(sql)
ad_pre_eva = pd.DataFrame(cursor.fetchall())
ad_pre_eva.columns = ['slotid','advert_id','ad_cvr','diff']
ad_pre_eva['slotid'] = ad_pre_eva['slotid'].astype('str').map(lambda x:x.replace('.0',''))
ad_pre_eva['advert_id'] = ad_pre_eva['advert_id'].astype('str')
ad_pre_eva.ix[ad_pre_eva['diff']<1.5].shape[0]/ad_pre_eva.ix[ad_pre_eva['diff']>0].shape[0]
ad_pre_cvr_eva = pd.merge(nolunch_pre_cvr, ad_pre_eva, how='inner', on=['slotid','advert_id'])
ad_pre_cvr_eva['ad_pre_diff']=ad_pre_cvr_eva['cvr']/ad_pre_cvr_eva['ad_cvr']
#综合80% 高cvr 81% 低cvr<0.02 35% cvr<0.01 10%
ad_pre_cvr_eva.ix[ad_pre_cvr_eva['ad_pre_diff']<1.5].shape[0]/ad_pre_cvr_eva.ix[ad_pre_cvr_eva['ad_pre_diff']>0].shape[0]#26%的准确率
ad_pre_cvr_eva.ix[(ad_pre_cvr_eva['ad_pre_diff']<1.5) & (ad_pre_cvr_eva['ad_cvr']>0.05)].shape[0]/ad_pre_cvr_eva.ix[(ad_pre_cvr_eva['ad_pre_diff']>0) & (ad_pre_cvr_eva['ad_cvr']>0.05)].shape[0]
ad_pre_cvr_eva.ix[(ad_pre_cvr_eva['ad_pre_diff']<1.5) & (ad_pre_cvr_eva['ad_cvr']<=0.02)].shape[0]/ad_pre_cvr_eva.ix[(ad_pre_cvr_eva['ad_pre_diff']>0) & (ad_pre_cvr_eva['ad_cvr']<=0.02)].shape[0]
......@@ -43,7 +43,8 @@ sql2='''CREATE TABLE if not exists advert.dws_not_luanch_create_samples_mxx as
last_charge_status,
last_activity_equal_status,
city_id,
activity_use_type
activity_use_type,
day_last_match_tag_nums_equal_status
from (
select
app_id,
......@@ -65,7 +66,7 @@ sql2='''CREATE TABLE if not exists advert.dws_not_luanch_create_samples_mxx as
last_charge_status,
last_activity_equal_status,
city_id,
activity_use_type,
activity_use_type
rank_num,ROW_NUMBER() OVER(PARTITION BY slotid ORDER BY rank_num) rnb
from
(select *,cast(rand()*100000000 as double) rank_num from advert.dws_advert_order_wide_v4_level_6_di
......@@ -142,7 +143,9 @@ featrue_id=[
"f505001",
"f611001",
"f110001",
"f306001"]
"f306001",
"f610001"
]
featrue_name=[
"ua",
......@@ -167,7 +170,8 @@ featrue_name=[
"price_section",
"put_index",
"times",
"activity_use_type"]
"activity_use_type",
"day_last_match_tag_nums_equal_status"]
d=pd.DataFrame([featrue_id,featrue_name]).T
d.index=d[1]
......@@ -191,7 +195,8 @@ l=['app_id',
'last_charge_status',
'last_activity_equal_status',
'city_id',
"activity_use_type"]
"activity_use_type",
"day_last_match_tag_nums_equal_status"]
la=list(d.ix[l].ix[:,0])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment