Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tuia-alg-engineering-py
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
mengxiangxuan
tuia-alg-engineering-py
Commits
62f80b41
Commit
62f80b41
authored
Feb 28, 2019
by
mxx
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1
parent
0ba69625
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
1177 additions
and
45 deletions
+1177
-45
roc_ks.py
alg-evaluat-system/model_evaluat/roc_ks.py
+19
-16
candidate_set_new.py
auto-spread/auto_manage/candidate_set_new.py
+369
-0
effect_eva.py
auto-spread/auto_manage/effect_eva.py
+220
-0
params_new.py
auto-spread/auto_manage/params_new.py
+78
-0
risk_data_remove.py
auto-spread/auto_manage/risk_data_remove.py
+56
-0
samples_create_new.py
auto-spread/auto_manage/samples_create_new.py
+333
-0
test.py
auto-spread/auto_manage/test.py
+102
-29
No files found.
alg-evaluat-system/model_evaluat/roc_ks.py
View file @
62f80b41
# -*- coding: utf-8 -*-
import
matplotlib
matplotlib
.
use
(
'Agg'
)
import
matplotlib.pyplot
as
plt
from
pyhive
import
hive
...
...
@@ -10,12 +11,13 @@ from sklearn import metrics
from
sklearn.metrics
import
roc_curve
,
auc
from
sklearn.metrics
import
log_loss
import
os
os
.
chdir
(
'/home/db_dlp/mengxiangxuan/model_evaluat'
)
now
=
datetime
.
datetime
.
now
()
delta
=
datetime
.
timedelta
(
days
=
1
)
yestoday
=
(
now
-
delta
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
dt
=
yestoday
dt
=
yestoday
cursor
=
hive
.
connect
(
host
=
'10.50.10.11'
,
port
=
10000
,
username
=
'mengxiangxuan'
,
database
=
'default'
)
.
cursor
()
sql_ctr_roc
=
'''select ctr_label,pre_ctr,
...
...
@@ -66,6 +68,8 @@ sql_ctr_roc = '''select ctr_label,pre_ctr,
WHEN rcmd_type=258 THEN 'ffm001'
WHEN rcmd_type=259 THEN 'ffm004'
WHEN rcmd_type=260 THEN 'esmm003'
WHEN rcmd_type=261 THEN 'xDeepFm5'
WHEN rcmd_type=262 THEN 'xDeepFm6'
END AS rcmd_name
from advert.dws_advert_order_wide_v4_level_6_di
where dt='{}' and pre_ctr is not null
...
...
@@ -122,6 +126,7 @@ sql_cvr_roc = '''select cvr_label,pre_cvr,
WHEN rcmd_type=260 THEN 'esmm003'
WHEN rcmd_type=261 THEN 'xDeepFm5'
WHEN rcmd_type=262 THEN 'xDeepFm6'
END AS rcmd_name
from
(select * from advert.dws_advert_order_wide_v4_level_6_di
...
...
@@ -140,24 +145,22 @@ cursor.execute(sql_cvr_roc)
cvr_roc_data
=
pd
.
DataFrame
(
cursor
.
fetchall
(),
columns
=
[
'cvr_label'
,
'pre_cvr'
,
'rcmd_name'
])
alg
=
[
'alg-4.0
'
,
'alg-4.0
.1'
,
'alg-4.0.2'
,
'alg-4.1'
,
'alg-4.2.1'
,
'alg-
act-tab'
,
'alg-
online-learn223'
,
'alg-
online-learn224'
,
'alg-app-optimize'
,
'alg-act-count
'
,
'
alg-fm-backend'
,
'BTM_AND_PC_31'
,
'Material_reform
'
,
#'alg-4.0',
alg
=
[
'alg-4.0.1'
,
'alg-4.0.2'
,
'alg-4.1'
,
'alg-4.2.1'
,
'alg-online-learn223'
,
'alg-
app-optimize
'
,
'
BTM_AND_PC_31
'
,
'fix_bias_2'
,
'fix_bias_3'
,
'ffm004'
,
'fnn2'
,
'deepFm2'
,
'dcn2'
,
'dcn3'
,
'xDeepFm2'
,
'xDeepFm3'
,
'essm_online1'
,
'essm_online2'
,
'Material_reform_online'
,
'xDeepFm4'
,
'essm_deep1'
,
'essm_deep2'
,
'essm_deep3'
,
'esmm003'
,
'BTM_AND_PC_61'
,
'BTM_AND_PC_62'
]
'fnn2'
,
'dcn2'
,
'essm_online1'
,
'Material_reform_online'
,
'xDeepFm4'
,
'essm_deep3'
,
'essm_deep4'
]
########roc曲线###=============================================================================
part
=
[(
0
,
0
),(
0
,
1
),(
0
,
2
),(
0
,
3
),(
1
,
0
),(
1
,
1
),(
1
,
2
),(
1
,
3
),(
2
,
0
),(
2
,
1
),(
2
,
2
),(
2
,
3
),(
3
,
0
),(
3
,
1
),(
3
,
2
),(
3
,
3
),
(
4
,
0
),(
4
,
1
),(
4
,
2
),(
4
,
3
),(
5
,
0
),(
5
,
1
),(
5
,
2
),(
5
,
3
),(
6
,
0
),(
6
,
1
),(
6
,
2
),(
6
,
3
),(
7
,
0
),(
7
,
1
),(
7
,
2
),(
7
,
3
),
(
8
,
0
),(
8
,
1
),(
8
,
2
),(
8
,
3
)]
(
4
,
0
),(
4
,
1
),(
4
,
2
),(
4
,
3
)]
#ctr
ctr_cnt
=
[]
plt
.
figure
(
figsize
=
(
16
,
32
))
plt
.
figure
(
figsize
=
(
16
,
20
))
plt
.
style
.
use
(
'ggplot'
)
for
i
in
range
(
len
(
alg
)):
y_ctr
=
ctr_roc_data
.
ix
[
ctr_roc_data
[
'rcmd_name'
]
==
alg
[
i
],
'ctr_label'
]
.
values
...
...
@@ -166,7 +169,7 @@ for i in range(len(alg)):
ctr_auc
=
auc
(
fpr
,
tpr
)
ctr_logloss
=
log_loss
(
y_ctr
,
scores_ctr
)
ctr_cnt
.
append
([
alg
[
i
],
ctr_auc
,
len
(
y_ctr
)])
plt
.
subplot2grid
((
9
,
4
),
part
[
i
])
plt
.
subplot2grid
((
5
,
4
),
part
[
i
])
plt
.
plot
(
fpr
,
tpr
)
plt
.
title
(
alg
[
i
])
plt
.
text
(
0.5
,
0.5
,
'auc={0}
\n
log_loss={1}
\n
cnt={2}'
.
format
(
round
(
ctr_auc
,
4
),
ctr_logloss
,
len
(
y_ctr
)))
...
...
@@ -180,7 +183,7 @@ pd.DataFrame(ctr_cnt,columns=['ctr_alg','auc','cnt']).to_csv('ctr_auc_cnt.csv')
#cvr
cvr_cnt
=
[]
plt
.
figure
(
figsize
=
(
16
,
32
))
plt
.
figure
(
figsize
=
(
16
,
20
))
plt
.
style
.
use
(
'ggplot'
)
for
i
in
range
(
len
(
alg
)):
y_cvr
=
cvr_roc_data
.
ix
[
cvr_roc_data
[
'rcmd_name'
]
==
alg
[
i
],
'cvr_label'
]
.
values
...
...
@@ -189,7 +192,7 @@ for i in range(len(alg)):
cvr_auc
=
auc
(
fpr
,
tpr
)
cvr_logloss
=
log_loss
(
y_cvr
,
scores_cvr
)
cvr_cnt
.
append
([
alg
[
i
],
cvr_auc
,
len
(
y_cvr
)])
plt
.
subplot2grid
((
9
,
4
),
part
[
i
])
plt
.
subplot2grid
((
5
,
4
),
part
[
i
])
plt
.
plot
(
fpr
,
tpr
)
plt
.
title
(
alg
[
i
])
plt
.
text
(
0.5
,
0.5
,
'auc={0}
\n
log_loss={1}
\n
cnt={2}'
.
format
(
round
(
cvr_auc
,
4
),
cvr_logloss
,
len
(
y_cvr
)))
...
...
auto-spread/auto_manage/candidate_set_new.py
0 → 100644
View file @
62f80b41
This diff is collapsed.
Click to expand it.
auto-spread/auto_manage/effect_eva.py
View file @
62f80b41
...
...
@@ -265,3 +265,223 @@ stat_slotad_cvr_merge.ix[(stat_slotad_cvr_merge['stat_cvr0_x']>0.05) & (stat_slo
#########################################################################################
##投放目标界定摸底
sql
=
'''
select app_id,slotid,advert_id,
sum(charge_fees) cost,
sum(act_click_cnt) convert,
avg(pre_cvr) pre_cvr,
sum(act_click_cnt)/sum(charge_cnt) stat_cvr_0,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias_0,
sum(charge_fees)/sum(act_click_cnt) costconvert,
avg(afee) afee
from advert.dws_advert_order_wide_v4_level_8_di
where dt>='2019-02-13' and dt<='2019-02-15'
and new_trade!='应用分发' and charge_type=1
and slotid is not null
group by app_id,slotid,advert_id
'''
cursor
.
execute
(
sql
)
df
=
pd
.
DataFrame
(
cursor
.
fetchall
())
df
.
columns
=
[
'app_id'
,
'slotid'
,
'advert_id'
,
'cost'
,
'convert'
,
'pre_cvr'
,
'stat_cvr'
,
'bias'
,
'costconvert'
,
'afee'
]
df
=
df
.
ix
[
df
[
'cost'
]
>
0
]
df
.
ix
[
df
[
'bias'
]
==
0
]
.
shape
df
[
'cb_bias'
]
=
df
[
'costconvert'
]
/
df
[
'afee'
]
df
.
ix
[
pd
.
isnull
(
df
[
'costconvert'
]),
'cb_bias_level'
]
=
'有消耗无转化'
df
.
ix
[
df
[
'cb_bias'
]
>
1.2
,
'cb_bias_level'
]
=
'成本偏差>1.2'
df
.
ix
[
df
[
'cb_bias'
]
>
1.5
,
'cb_bias_level'
]
=
'成本偏差>1.5'
df
.
ix
[
df
[
'cb_bias'
]
>
1.7
,
'cb_bias_level'
]
=
'成本偏差>1.7'
df
.
ix
[
df
[
'cb_bias'
]
>
2
,
'cb_bias_level'
]
=
'成本偏差>2'
df
.
ix
[
df
[
'cb_bias'
]
>
2.5
,
'cb_bias_level'
]
=
'成本偏差>2.5'
df
.
ix
[
df
[
'cb_bias'
]
<
1.2
,
'cb_bias_level'
]
=
'成本偏差小于1.2'
#有消耗无转化
df
.
ix
[
pd
.
isnull
(
df
[
'costconvert'
]),
'cost'
]
.
sum
()
/
df
[
'cost'
]
.
sum
()
#消耗占比7.7%
#占比
df
.
ix
[
pd
.
isnull
(
df
[
'costconvert'
])]
.
shape
[
0
]
/
df
.
shape
[
0
]
#74%的组合有消耗无转化
df
.
ix
[(
pd
.
isnull
(
df
[
'costconvert'
]))
&
(
df
[
'cost'
]
>
10000
)]
.
shape
[
0
]
/
df
.
shape
[
0
]
#0%
df
.
ix
[(
pd
.
isnull
(
df
[
'costconvert'
]))
&
(
df
[
'cost'
]
>
2000
)]
.
shape
[
0
]
/
df
.
shape
[
0
]
#2.5%
df
.
ix
[(
pd
.
isnull
(
df
[
'costconvert'
]))
&
(
df
[
'cost'
]
>
1000
)]
.
shape
[
0
]
/
df
.
shape
[
0
]
#7%
df
.
ix
[
pd
.
isnull
(
df
[
'costconvert'
]),
'cost'
]
.
sum
()
/
df
[
'cost'
]
.
sum
()
#7.7%
df
.
ix
[(
pd
.
isnull
(
df
[
'costconvert'
]))
&
(
df
[
'cost'
]
>
2000
),
'cost'
]
.
sum
()
/
df
[
'cost'
]
.
sum
()
#2.6%
df
.
ix
[(
pd
.
isnull
(
df
[
'costconvert'
]))
&
(
df
[
'cost'
]
>
1000
),
'cost'
]
.
sum
()
/
df
[
'cost'
]
.
sum
()
#4%
#结论:有消耗无转化个数占比约74%,消耗占7.7%,有消耗无转化集中在低置信组合上
#整体分层
df
.
groupby
(
'cb_bias_level'
)
.
size
()
#分层个数
成本偏差
(
1.2
,
1.5
)
2049
成本偏差
>
1.5
,
1.7
805
成本偏差
>
1.7
,
2
791
成本偏差
>
2
,
2.5
706
成本偏差
>
2.5
~
769
成本偏差
~
1.2
16412
有消耗无转化
62251
#分层消耗占比
df
[
'cost'
]
.
groupby
(
df
[
'cb_bias_level'
])
.
sum
()
成本偏差
>
1.2
,
1.5
29361048.0
成本偏差
>
1.5
,
1.7
8748328.0
成本偏差
>
1.7
,
2
6154347.0
成本偏差
>
2
,
2.5
4257706.0
成本偏差
>
2.5
~
4235043.0
成本偏差
<
1.2
218790894.0
有消耗无转化
22564716.0
不同预估偏差的成本情况分层
df
.
ix
[
df
[
'bias'
]
>
2.5
,
'cost'
]
.
sum
()
/
df
[
'cost'
]
.
sum
()
#2%
df
.
ix
[
df
[
'bias'
]
>
2.5
]
.
groupby
(
'cb_bias_level'
)
.
size
()
成本偏差
>
1.2
43
成本偏差
>
1.5
43
成本偏差
>
1.7
93
成本偏差
>
2
221
成本偏差
>
2.5
552
成本偏差小于
1.2
65
df
.
ix
[
df
[
'bias'
]
>
2.5
,
'cost'
]
.
groupby
(
df
[
'cb_bias_level'
])
.
sum
()
成本偏差
>
1.2
395990.0
成本偏差
>
1.5
292962.0
成本偏差
>
1.7
527933.0
成本偏差
>
2
979272.0
成本偏差
>
2.5
3012548.0
成本偏差小于
1.2
669782.0
df
.
ix
[(
df
[
'bias'
]
>
2
)
&
(
df
[
'bias'
]
<
2.5
),
'cost'
]
.
sum
()
/
df
[
'cost'
]
.
sum
()
#2%
df
.
ix
[(
df
[
'bias'
]
>
2
)
&
(
df
[
'bias'
]
<
2.5
)]
.
groupby
(
'cb_bias_level'
)
.
size
()
成本偏差
>
1.2
87
成本偏差
>
1.5
106
成本偏差
>
1.7
203
成本偏差
>
2
252
成本偏差
>
2.5
107
成本偏差小于
1.2
43
df
.
ix
[(
df
[
'bias'
]
>
2
)
&
(
df
[
'bias'
]
<
2.5
),
'cost'
]
.
groupby
(
df
[
'cb_bias_level'
])
.
sum
()
成本偏差
>
1.2
1041230.0
成本偏差
>
1.5
899478.0
成本偏差
>
1.7
1379212.0
成本偏差
>
2
1328635.0
成本偏差
>
2.5
410938.0
成本偏差小于
1.2
463864.0
df
.
ix
[(
df
[
'bias'
]
>
1.7
)
&
(
df
[
'bias'
]
<
2
),
'cost'
]
.
sum
()
/
df
[
'cost'
]
.
sum
()
# 2%
df
.
ix
[(
df
[
'bias'
]
>
1.7
)
&
(
df
[
'bias'
]
<
2
)]
.
groupby
(
'cb_bias_level'
)
.
size
()
成本偏差
>
1.2
207
成本偏差
>
1.5
208
成本偏差
>
1.7
224
成本偏差
>
2
112
成本偏差
>
2.5
52
成本偏差小于
1.2
101
df
.
ix
[(
df
[
'bias'
]
>
1.7
)
&
(
df
[
'bias'
]
<
2
),
'cost'
]
.
groupby
(
df
[
'cb_bias_level'
])
.
sum
()
成本偏差
>
1.2
1499413.0
成本偏差
>
1.5
1201947.0
成本偏差
>
1.7
1265642.0
成本偏差
>
2
509800.0
成本偏差
>
2.5
436086.0
成本偏差小于
1.2
847811.0
df
.
ix
[(
df
[
'bias'
]
>
1.5
)
&
(
df
[
'bias'
]
<
1.7
),
'cost'
]
.
sum
()
/
df
[
'cost'
]
.
sum
()
# 3%
df
.
ix
[(
df
[
'bias'
]
>
1.5
)
&
(
df
[
'bias'
]
<
1.7
)]
.
groupby
(
'cb_bias_level'
)
.
size
()
成本偏差
>
1.2
350
成本偏差
>
1.5
173
成本偏差
>
1.7
108
成本偏差
>
2
45
成本偏差
>
2.5
21
成本偏差小于
1.2
178
df
.
ix
[(
df
[
'bias'
]
>
1.5
)
&
(
df
[
'bias'
]
<
1.7
),
'cost'
]
.
groupby
(
df
[
'cb_bias_level'
])
.
sum
()
成本偏差
>
1.2
3343895.0
成本偏差
>
1.5
1099647.0
成本偏差
>
1.7
1119278.0
成本偏差
>
2
585935.0
成本偏差
>
2.5
109465.0
成本偏差小于
1.2
1463302.0
df
.
ix
[(
df
[
'bias'
]
>
1.2
)
&
(
df
[
'bias'
]
<
1.5
),
'cost'
]
.
sum
()
/
df
[
'cost'
]
.
sum
()
# 12%
df
.
ix
[(
df
[
'bias'
]
>
1.2
)
&
(
df
[
'bias'
]
<
1.5
)]
.
groupby
(
'cb_bias_level'
)
.
size
()
成本偏差
>
1.2
797
成本偏差
>
1.5
166
成本偏差
>
1.7
91
成本偏差
>
2
36
成本偏差
>
2.5
17
成本偏差小于
1.2
1016
df
.
ix
[(
df
[
'bias'
]
>
1.2
)
&
(
df
[
'bias'
]
<
1.5
),
'cost'
]
.
groupby
(
df
[
'cb_bias_level'
])
.
sum
()
成本偏差
>
1.2
10675576.0
成本偏差
>
1.5
2071616.0
成本偏差
>
1.7
633924.0
成本偏差
>
2
328151.0
成本偏差
>
2.5
94828.0
成本偏差小于
1.2
22603625.0
df
.
ix
[
df
[
'bias'
]
<
1.2
,
'cost'
]
.
sum
()
/
df
[
'cost'
]
.
sum
()
# 71.5%
df
.
ix
[
df
[
'bias'
]
<
1.2
]
.
groupby
(
'cb_bias_level'
)
.
size
()
成本偏差
>
1.2
565
成本偏差
>
1.5
109
成本偏差
>
1.7
72
成本偏差
>
2
40
成本偏差
>
2.5
20
成本偏差小于
1.2
15009
df
.
ix
[
df
[
'bias'
]
<
1.2
,
'cost'
]
.
groupby
(
df
[
'cb_bias_level'
])
.
sum
()
成本偏差
>
1.2
12404944.0
成本偏差
>
1.5
3182678.0
成本偏差
>
1.7
1228358.0
成本偏差
>
2
525913.0
成本偏差
>
2.5
171178.0
成本偏差小于
1.2
192742510.0
#拓新的表现
sql
=
'''
select app_id,slotid,advert_id,
sum(charge_fees) cost,
sum(act_click_cnt) convert,
avg(pre_cvr) pre_cvr,
sum(act_click_cnt)/sum(charge_cnt) stat_cvr_0,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias_0,
sum(charge_fees)/sum(act_click_cnt) costconvert,
avg(afee) afee
from advert.dws_advert_order_wide_v4_level_8_di
where dt>='2019-02-14' and dt<='2019-02-16'
and new_trade!='应用分发' and charge_type=1
and slotid is not null
group by app_id,slotid,advert_id
'''
cursor
.
execute
(
sql
)
df
=
pd
.
DataFrame
(
cursor
.
fetchall
())
df
.
columns
=
[
'app_id'
,
'slotid'
,
'advert_id'
,
'cost'
,
'convert'
,
'pre_cvr'
,
'stat_cvr'
,
'bias'
,
'costconvert'
,
'afee'
]
df
[[
'app_id'
,
'slotid'
,
'advert_id'
]]
=
df
[[
'app_id'
,
'slotid'
,
'advert_id'
]]
.
astype
(
'str'
)
dd
=
pd
.
read_csv
(
'not_download_stat_slotad_cvr_old215.csv'
)
dd
[[
'slotid'
,
'advert_id'
]]
=
dd
[[
'slotid'
,
'advert_id'
]]
.
astype
(
'str'
)
df
=
pd
.
merge
(
df
,
dd
,
how
=
'inner'
,
on
=
[
'slotid'
,
'advert_id'
])
df
.
groupby
(
'cb_bias_level'
)
.
size
()
df
=
df
.
ix
[
df
[
'cost'
]
>
0
]
df
.
ix
[
df
[
'bias'
]
==
0
]
.
shape
df
[
'cb_bias'
]
=
df
[
'costconvert'
]
/
df
[
'afee'
]
df
.
ix
[
pd
.
isnull
(
df
[
'costconvert'
]),
'cb_bias_level'
]
=
'有消耗无转化'
df
.
ix
[
df
[
'cb_bias'
]
>
1.2
,
'cb_bias_level'
]
=
'成本偏差>1.2'
df
.
ix
[
df
[
'cb_bias'
]
>
1.5
,
'cb_bias_level'
]
=
'成本偏差>1.5'
df
.
ix
[
df
[
'cb_bias'
]
>
1.7
,
'cb_bias_level'
]
=
'成本偏差>1.7'
df
.
ix
[
df
[
'cb_bias'
]
>
2
,
'cb_bias_level'
]
=
'成本偏差>2'
df
.
ix
[
df
[
'cb_bias'
]
>
2.5
,
'cb_bias_level'
]
=
'成本偏差>2.5'
df
.
ix
[
df
[
'cb_bias'
]
<
1.2
,
'cb_bias_level'
]
=
'成本偏差小于1.2'
df
.
groupby
(
'cb_bias_level'
)
.
size
()
#分层个数
成本偏差
>
1.2
1986
成本偏差
>
1.5
756
成本偏差
>
1.7
792
成本偏差
>
2
651
成本偏差
>
2.5
819
成本偏差小于
1.2
14863
有消耗无转化
53907
#分层消耗占比
df
[
'cost'
]
.
groupby
(
df
[
'cb_bias_level'
])
.
sum
()
成本偏差
>
1.2
25590365.0
成本偏差
>
1.5
7804690.0
成本偏差
>
1.7
6907655.0
成本偏差
>
2
3673685.0
成本偏差
>
2.5
4701507.0
成本偏差
<
1.2
203751319.0
有消耗无转化
20987495.0
\ No newline at end of file
auto-spread/auto_manage/params_new.py
0 → 100644
View file @
62f80b41
import
redis
import
json
params_dict
=
{
"releaseTarget3feedBackParams"
:
{
"fuse1OrientCostG1dFactor"
:
20000
,
"fuse2OrientCostG1dFactor"
:
50000
,
"fuseOrientCostConvertBiasFactor"
:
2.0
,
"wSlotOrientationConfidenceFactor"
:
0.2
,
"wSlotOrientationCostConvertBiasFactor"
:
1.2
,
"bOrientConfidenceFactor"
:
1.0
,
"bOrientCostConvertBiasFactor"
:
1.2
,
"bSlotOrientationConfidenceFactor"
:
0.5
,
"bSlotOrientationCostConvertBiasFactor"
:
3.5
},
"releaseTarget2feedBackParams"
:
{
"fuse1OrientCostG1dFactor"
:
20000
,
"fuse2OrientCostG1dFactor"
:
50000
,
"fuseOrientCostConvertBiasFactor"
:
2.0
,
"wSlotOrientationConfidenceFactor"
:
0.2
,
"wSlotOrientationCostConvertBiasFactor"
:
1.1
,
"bOrientConfidenceFactor"
:
1.0
,
"bOrientCostConvertBiasFactor"
:
1.2
,
"bSlotOrientationConfidenceFactor"
:
0.5
,
"bSlotOrientationCostConvertBiasFactor"
:
3.5
},
"releaseTarget1feedBackParams"
:
{
"fuse1OrientCostG1dFactor"
:
20000
,
"fuse2OrientCostG1dFactor"
:
50000
,
"fuseOrientCostConvertBiasFactor"
:
2.0
,
"wSlotOrientationConfidenceFactor"
:
0.2
,
"wSlotOrientationCostConvertBiasFactor"
:
1.3
,
"bOrientConfidenceFactor"
:
1.0
,
"bOrientCostConvertBiasFactor"
:
1.3
,
"bSlotOrientationConfidenceFactor"
:
0.8
,
"bSlotOrientationCostConvertBiasFactor"
:
4.0
},
"releaseTarget3slotRecommendParams"
:
{
"startFactor"
:
0.5
,
"cpaBiasRatioFactor"
:
1.5
,
"cpaOrientRatioFactor"
:
1.0
,
"cpaBiasThresholdFactor"
:
2.5
,
"cpcTargetRatioFactor"
:
1.0
,
"cpcOrientRatioFactor"
:
0.1
,
"cpcBiasThresholdFactor"
:
1.5
},
"releaseTarget2slotRecommendParams"
:
{
"startFactor"
:
0.5
,
"cpaBiasRatioFactor"
:
0.8
,
"cpaOrientRatioFactor"
:
0.5
,
"cpaBiasThresholdFactor"
:
1.5
,
"cpcTargetRatioFactor"
:
0.8
,
"cpcOrientRatioFactor"
:
0.1
,
"cpcBiasThresholdFactor"
:
1
},
"releaseTarget1slotRecommendParams"
:
{
"startFactor"
:
0.6
,
"cpaBiasRatioFactor"
:
1.6
,
"cpaOrientRatioFactor"
:
1.0
,
"cpaBiasThresholdFactor"
:
3.0
,
"cpcTargetRatioFactor"
:
1.0
,
"cpcOrientRatioFactor"
:
0.1
,
"cpcBiasThresholdFactor"
:
1.5
}
}
params_key
=
"NZ_K86_trusteeship_params"
params_value
=
json
.
dumps
(
params_dict
)
pool
=
redis
.
ConnectionPool
(
host
=
'r-bp18da0abeaddc94285.redis.rds.aliyuncs.com'
,
password
=
'hteK73Zxx3ji9LGCy2jBAZDJ6'
,
port
=
6379
,
db
=
0
)
r
=
redis
.
Redis
(
connection_pool
=
pool
)
r
.
set
(
params_key
,
params_value
)
print
(
params_value
)
\ No newline at end of file
auto-spread/auto_manage/risk_data_remove.py
0 → 100644
View file @
62f80b41
import
pandas
as
pd
from
pyhive
import
hive
import
redis
import
os
os
.
chdir
(
'/home/db_dlp/mengxiangxuan/auto_spread'
)
cursor
=
hive
.
connect
(
host
=
'10.50.10.11'
,
port
=
10000
,
username
=
'mengxiangxuan'
,
database
=
'default'
)
.
cursor
()
#无历史数据
pre_slotad_stat_cvr
=
pd
.
read_csv
(
'pre_slotad_stat_cvr.csv'
)
#有历史数据
not_download_stat_slotad_cvr
=
pd
.
read_csv
(
'not_download_stat_slotad_cvr.csv'
)
#应用分发
historyDownload_stat_slotad_cvr
=
pd
.
read_csv
(
'historyDownload_stat_slotad_cvr.csv'
)
#风控数据
sql
=
'''
select slot_id,level,shield_advert from tmp.slot_shile_advert_merge
'''
cursor
.
execute
(
sql
)
sheld_data
=
pd
.
DataFrame
(
cursor
.
fetchall
())
sheld_data
.
columns
=
[
'slotid'
,
'level'
,
'advert_id'
]
risk_data
=
sheld_data
.
ix
[
sheld_data
[
'advert_id'
]
!=-
1
]
risk_data
[
'key'
]
=
"NZ_K76_"
+
risk_data
[
'slotid'
]
.
astype
(
'str'
)
+
"_"
+
risk_data
[
'advert_id'
]
.
astype
(
'str'
)
risk_data
[
'value'
]
=
'{"cost20d":0.0,"cvrSet":["0.0","0","0","0","0"],"biasSet":["99.0","99","99","99","99"]}'
# 连接nezha-redis
pool
=
redis
.
ConnectionPool
(
host
=
'r-bp18da0abeaddc94285.redis.rds.aliyuncs.com'
,
password
=
'hteK73Zxx3ji9LGCy2jBAZDJ6'
,
port
=
6379
,
db
=
0
)
r
=
redis
.
Redis
(
connection_pool
=
pool
)
pipe
=
r
.
pipeline
(
transaction
=
True
)
for
i
in
risk_data
.
index
:
key
=
risk_data
.
ix
[
i
,
'key'
]
value
=
risk_data
.
ix
[
i
,
'value'
]
pipe
.
set
(
key
,
value
,
ex
=
10
)
if
i
%
100
==
0
:
print
(
i
)
pipe
.
execute
()
risk_data_level6
=
sheld_data
.
ix
[
sheld_data
[
'advert_id'
]
==-
1
]
risk_pre
=
pd
.
merge
(
pre_slotad_stat_cvr
,
risk_data_level6
,
on
=
[
'slotid'
],
how
=
'inner'
)
risk_history
=
pd
.
merge
(
not_download_stat_slotad_cvr
,
risk_data_level6
,
on
=
[
'slotid'
],
how
=
'inner'
)
risk_down
=
pd
.
merge
(
historyDownload_stat_slotad_cvr
,
risk_data_level6
,
on
=
[
'slotid'
],
how
=
'inner'
)
risk_data_l6
=
pd
.
concat
([
risk_pre
[[
'key'
,
'value'
]],
risk_history
[[
'key'
,
'value'
]],
risk_down
[[
'key'
,
'value'
]]])
risk_data_l6
.
index
=
range
(
risk_data_l6
.
shape
[
0
])
for
i
in
risk_data_l6
.
index
:
key
=
risk_data_l6
.
ix
[
i
,
'key'
]
value
=
risk_data_l6
.
ix
[
i
,
'value'
]
pipe
.
set
(
key
,
value
,
ex
=
10
)
if
i
%
100
==
0
:
print
(
i
)
pipe
.
execute
()
\ No newline at end of file
auto-spread/auto_manage/samples_create_new.py
0 → 100644
View file @
62f80b41
This diff is collapsed.
Click to expand it.
auto-spread/auto_manage/test.py
View file @
62f80b41
...
...
@@ -27,32 +27,105 @@ yestoday10 = (now - delta10).strftime('%Y-%m-%d')
yestoday15
=
(
now
-
delta15
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
sql
=
'''
select
a.advert_id,
a.data1 orientation_id,
dt,
targetapp_limit,
sum(charge_fees) cost,
sum(act_click_cnt) convert,
sum(charge_fees)/sum(act_click_cnt) cost_convert
from
(select * from logs.dwd_tuia_launch_log_di where dt>='2018-11-16' and dt<='2018-12-12') a
inner join
(select order_id,charge_fees,act_click_cnt from advert.dws_advert_order_wide_v4_level_6_di
where dt>='2018-11-16' and dt<='2018-12-12') b
on a.order_id=b.order_id
group by a.advert_id,a.data1,dt,targetapp_limit
'''
cursor
.
execute
(
sql
)
ad_orient1
=
pd
.
DataFrame
(
cursor
.
fetchall
())
ad_orient1
.
columns
=
[
'advert_id'
,
'orientation_id'
,
'dt'
,
'targetapp_limit'
,
'cost'
,
'convert'
,
'cost_convert'
]
ad_orient1
.
ix
[
ad_orient1
[
'targetapp_limit'
]
==
'2'
,
'istg'
]
=
1
ad_orient1
[
'istg'
]
=
ad_orient1
[
'istg'
]
.
fillna
(
value
=
0
)
ad_orient1
[
'ad_ori'
]
=
ad_orient1
[
'advert_id'
]
.
astype
(
'str'
)
+
'_'
+
ad_orient1
[
'orientation_id'
]
.
astype
(
'str'
)
a
=
pd
.
DataFrame
(
ad_orient1
[
'istg'
]
.
groupby
(
ad_orient1
[
'ad_ori'
])
.
sum
())
a
[
'ad_ori'
]
=
a
.
index
ad_orient
=
pd
.
merge
(
ad_orient1
,
a
.
ix
[
a
[
'istg'
]
>
0
],
how
=
'inner'
,
on
=
[
'ad_ori'
])
from
dingtalkchatbot.chatbot
import
DingtalkChatbot
# WebHook地址
webhook
=
'https://oapi.dingtalk.com/robot/send?access_token=4f28ce996ab4f2601c0362fbfd0d48f58b0250a76953ff117ca41e9f1ec8e565'
# 初始化机器人小丁
xiaoding
=
DingtalkChatbot
(
webhook
)
at_mobiles
=
[
'18668032242'
]
##非应用分发有历史数据
sql_ad_slot
=
'''select
app_id,slotid,advert_id,
sum(charge_fees) cost,
avg(pre_cvr) pre_cvr_0,
sum(act_click_cnt)/sum(charge_cnt) stat_cvr_0,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias_0
from advert.dws_advert_order_wide_v4_level_8_di
where dt>='{0}' and dt<='{1}' and slotid is not null
and fee>0 and new_trade!='应用分发'
group by app_id,slotid,advert_id
'''
.
format
(
yestoday15
,
yestoday1
)
cursor
.
execute
(
sql_ad_slot
)
stat_slotad_cvr
=
pd
.
DataFrame
(
cursor
.
fetchall
())
stat_slotad_cvr
.
columns
=
[
'app_id'
,
'slotid'
,
'advert_id'
,
'cost20d'
,
'pre_cvr_0'
,
'stat_cvr_0'
,
'bias_0'
]
bias
为空置信的部分要改成
5
,
不置信
把置信度拿到线上去算
,
cost
/
后端目标转化出价,后端类型不同置信计算方式不同
看消耗
=
0
特殊处理
,
bias
=
5
并配合消耗值特殊处理,线上决定定向条件
stat_slotad_cvr
[
'cost20d'
]
=
stat_slotad_cvr
[
'cost20d'
]
.
fillna
(
value
=
0
)
stat_slotad_cvr
[
'bias_1'
]
=
None
stat_slotad_cvr
[
'bias_2'
]
=
None
stat_slotad_cvr
[
'bias_3'
]
=
None
stat_slotad_cvr
[
'bias_4'
]
=
None
stat_slotad_cvr
[[
'bias_0'
,
'bias_1'
,
'bias_2'
,
'bias_3'
,
'bias_4'
]]
=
\
stat_slotad_cvr
[[
'bias_0'
,
'bias_1'
,
'bias_2'
,
'bias_3'
,
'bias_4'
]]
.
fillna
(
value
=
99
)
stat_slotad_cvr
[
'bias_0'
]
=
stat_slotad_cvr
[
'bias_0'
]
.
round
(
6
)
.
astype
(
'str'
)
stat_slotad_cvr
[
'bias_1'
]
=
stat_slotad_cvr
[
'bias_1'
]
.
round
(
6
)
.
astype
(
'str'
)
stat_slotad_cvr
[
'bias_2'
]
=
stat_slotad_cvr
[
'bias_2'
]
.
round
(
6
)
.
astype
(
'str'
)
stat_slotad_cvr
[
'bias_3'
]
=
stat_slotad_cvr
[
'bias_3'
]
.
round
(
6
)
.
astype
(
'str'
)
stat_slotad_cvr
[
'bias_4'
]
=
stat_slotad_cvr
[
'bias_4'
]
.
round
(
6
)
.
astype
(
'str'
)
stat_slotad_cvr
[
'biasSet'
]
=
stat_slotad_cvr
[
'bias_0'
]
+
','
+
stat_slotad_cvr
[
'bias_1'
]
+
','
+
stat_slotad_cvr
[
'bias_2'
]
\
+
','
+
stat_slotad_cvr
[
'bias_3'
]
+
','
+
stat_slotad_cvr
[
'bias_4'
]
stat_slotad_cvr
[
'biasSet'
]
=
stat_slotad_cvr
[
'biasSet'
]
.
map
(
lambda
x
:
x
.
split
(
','
))
stat_slotad_cvr
[
'stat_cvr_1'
]
=
None
stat_slotad_cvr
[
'stat_cvr_2'
]
=
None
stat_slotad_cvr
[
'stat_cvr_3'
]
=
None
stat_slotad_cvr
[
'stat_cvr_4'
]
=
None
stat_slotad_cvr
[[
'stat_cvr_0'
,
'stat_cvr_1'
,
'stat_cvr_2'
,
'stat_cvr_3'
,
'stat_cvr_4'
]]
=
\
stat_slotad_cvr
[[
'stat_cvr_0'
,
'stat_cvr_1'
,
'stat_cvr_2'
,
'stat_cvr_3'
,
'stat_cvr_4'
]]
.
fillna
(
value
=
0
)
stat_slotad_cvr
[
'stat_cvr_0'
]
=
stat_slotad_cvr
[
'stat_cvr_0'
]
.
round
(
6
)
.
astype
(
'str'
)
stat_slotad_cvr
[
'stat_cvr_1'
]
=
stat_slotad_cvr
[
'stat_cvr_1'
]
.
round
(
6
)
.
astype
(
'str'
)
stat_slotad_cvr
[
'stat_cvr_2'
]
=
stat_slotad_cvr
[
'stat_cvr_2'
]
.
round
(
6
)
.
astype
(
'str'
)
stat_slotad_cvr
[
'stat_cvr_3'
]
=
stat_slotad_cvr
[
'stat_cvr_3'
]
.
round
(
6
)
.
astype
(
'str'
)
stat_slotad_cvr
[
'stat_cvr_4'
]
=
stat_slotad_cvr
[
'stat_cvr_4'
]
.
round
(
6
)
.
astype
(
'str'
)
stat_slotad_cvr
[
'cvrSet'
]
=
stat_slotad_cvr
[
'stat_cvr_0'
]
+
','
+
stat_slotad_cvr
[
'stat_cvr_1'
]
+
','
+
stat_slotad_cvr
[
'stat_cvr_2'
]
\
+
','
+
stat_slotad_cvr
[
'stat_cvr_3'
]
+
','
+
stat_slotad_cvr
[
'stat_cvr_4'
]
stat_slotad_cvr
[
'cvrSet'
]
=
stat_slotad_cvr
[
'cvrSet'
]
.
map
(
lambda
x
:
x
.
split
(
','
))
stat_slotad_cvr
[[
'slotid'
,
'advert_id'
]]
=
stat_slotad_cvr
[[
'slotid'
,
'advert_id'
]]
.
astype
(
'str'
)
#约定key
stat_slotad_cvr
[
'key'
]
=
"NZ_K76_"
+
stat_slotad_cvr
[
'slotid'
]
+
"_"
+
stat_slotad_cvr
[
'advert_id'
]
stat_slotad_cvr
[
'value'
]
=
stat_slotad_cvr
[[
'cost20d'
,
'cvrSet'
,
'biasSet'
]]
.
apply
(
lambda
x
:
x
.
to_json
(
orient
=
'index'
),
axis
=
1
)
stat_slotad_cvr
.
index
=
range
(
stat_slotad_cvr
.
shape
[
0
])
# 连接nezha-redis
pool
=
redis
.
ConnectionPool
(
host
=
'r-bp18da0abeaddc94285.redis.rds.aliyuncs.com'
,
password
=
'hteK73Zxx3ji9LGCy2jBAZDJ6'
,
port
=
6379
,
db
=
0
)
r
=
redis
.
Redis
(
connection_pool
=
pool
)
pipe
=
r
.
pipeline
(
transaction
=
True
)
#先删除昨日候选集
print
(
'stat_slotad_cvr_good2-----'
)
stat_slotad_cvr_old
=
pd
.
read_csv
(
'stat_slotad_cvr.csv'
)
stat_slotad_cvr_old
.
to_csv
(
'stat_slotad_cvr_old.csv'
,
index
=
False
)
for
i
in
stat_slotad_cvr_old
.
index
:
key
=
stat_slotad_cvr_old
.
ix
[
i
,
'key'
]
value
=
stat_slotad_cvr_old
.
ix
[
i
,
'value'
]
pipe
.
set
(
key
,
value
,
ex
=
900
)
if
i
%
5000
==
0
:
pipe
.
execute
()
print
(
i
)
time
.
sleep
(
0.5
)
pipe
.
execute
()
# 转化成key: NZ_K76_slotId_advertId,value:json cvr&预估偏差&置信度
for
i
in
stat_slotad_cvr
.
head
(
99
)
.
index
:
key
=
stat_slotad_cvr
.
ix
[
i
,
'key'
]
value
=
stat_slotad_cvr
.
ix
[
i
,
'value'
]
pipe
.
set
(
key
,
value
,
ex
=
432000
)
if
i
%
5000
==
0
:
pipe
.
execute
()
print
(
i
)
time
.
sleep
(
0.5
)
pipe
.
execute
()
stat_slotad_cvr
.
to_csv
(
'stat_slotad_cvr.csv'
,
index
=
False
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment