Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tuia-alg-engineering-py
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
mengxiangxuan
tuia-alg-engineering-py
Commits
7216c29b
Commit
7216c29b
authored
Dec 13, 2018
by
mxx
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1
parent
5b5e25c3
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
123 additions
and
4 deletions
+123
-4
roc_ks.py
alg-evaluat-system/model_evaluat/roc_ks.py
+10
-4
test.py
auto-spread/auto_manage/test.py
+113
-0
No files found.
alg-evaluat-system/model_evaluat/roc_ks.py
View file @
7216c29b
...
...
@@ -61,6 +61,9 @@ sql_ctr_roc = '''select ctr_label,pre_ctr,
WHEN rcmd_type=253 THEN 'xDeepFm4'
WHEN rcmd_type=254 THEN 'essm_deep1'
WHEN rcmd_type=255 THEN 'essm_deep2'
WHEN rcmd_type=258 THEN 'ffm001'
WHEN rcmd_type=259 THEN 'ffm004'
WHEN rcmd_type=260 THEN 'esmm003'
END AS rcmd_name
from advert.dws_advert_order_wide_v4_level_6_di
where dt='{}' and pre_ctr is not null
...
...
@@ -110,6 +113,9 @@ sql_cvr_roc = '''select cvr_label,pre_cvr,
WHEN rcmd_type=253 THEN 'xDeepFm4'
WHEN rcmd_type=254 THEN 'essm_deep1'
WHEN rcmd_type=255 THEN 'essm_deep2'
WHEN rcmd_type=258 THEN 'ffm001'
WHEN rcmd_type=259 THEN 'ffm004'
WHEN rcmd_type=260 THEN 'esmm003'
END AS rcmd_name
from
(select * from advert.dws_advert_order_wide_v4_level_6_di where dt='{}' and charge_cnt>0) a
...
...
@@ -145,12 +151,12 @@ alg=['alg-4.0','alg-4.0.1',
'v502'
,
'fix_bias_2'
,
'fix_bias_3'
,
'fnn2'
,
'deepFm2'
,
'dcn2'
,
'dcn3'
,
'xDeepFm2'
,
'xDeepFm3'
,
'essm_online1'
,
'essm_online2'
,
'Material_reform_online'
,
'xDeepFm4'
,
'essm_deep1'
,
'essm_deep2'
]
'xDeepFm4'
,
'essm_deep1'
,
'essm_deep2'
,
'ffm001'
,
'ffm004'
,
'esmm003'
]
########roc曲线###=============================================================================
part
=
[(
0
,
0
),(
0
,
1
),(
0
,
2
),(
0
,
3
),(
1
,
0
),(
1
,
1
),(
1
,
2
),(
1
,
3
),(
2
,
0
),(
2
,
1
),(
2
,
2
),(
2
,
3
),(
3
,
0
),(
3
,
1
),(
3
,
2
),(
3
,
3
),
(
4
,
0
),(
4
,
1
),(
4
,
2
),(
4
,
3
),(
5
,
0
),(
5
,
1
),(
5
,
2
),(
5
,
3
),(
6
,
0
),(
6
,
1
),(
6
,
2
),(
6
,
3
),(
7
,
0
),(
7
,
1
),(
7
,
2
),(
7
,
3
),
]
(
8
,
0
),(
8
,
1
),(
8
,
2
),(
8
,
3
)
]
#ctr
ctr_cnt
=
[]
plt
.
figure
(
figsize
=
(
16
,
24
))
...
...
@@ -162,7 +168,7 @@ for i in range(len(alg)):
ctr_auc
=
auc
(
fpr
,
tpr
)
ctr_logloss
=
log_loss
(
y_ctr
,
scores_ctr
)
ctr_cnt
.
append
([
alg
[
i
],
ctr_auc
,
len
(
y_ctr
)])
plt
.
subplot2grid
((
7
,
4
),
part
[
i
])
plt
.
subplot2grid
((
9
,
4
),
part
[
i
])
plt
.
plot
(
fpr
,
tpr
)
plt
.
title
(
alg
[
i
])
plt
.
text
(
0.5
,
0.5
,
'auc={0}
\n
log_loss={1}
\n
cnt={2}'
.
format
(
round
(
ctr_auc
,
4
),
ctr_logloss
,
len
(
y_ctr
)))
...
...
@@ -185,7 +191,7 @@ for i in range(len(alg)):
cvr_auc
=
auc
(
fpr
,
tpr
)
cvr_logloss
=
log_loss
(
y_cvr
,
scores_cvr
)
cvr_cnt
.
append
([
alg
[
i
],
cvr_auc
,
len
(
y_cvr
)])
plt
.
subplot2grid
((
7
,
4
),
part
[
i
])
plt
.
subplot2grid
((
9
,
4
),
part
[
i
])
plt
.
plot
(
fpr
,
tpr
)
plt
.
title
(
alg
[
i
])
plt
.
text
(
0.5
,
0.5
,
'auc={0}
\n
log_loss={1}
\n
cnt={2}'
.
format
(
round
(
cvr_auc
,
4
),
cvr_logloss
,
len
(
y_cvr
)))
...
...
auto-spread/auto_manage/test.py
0 → 100644
View file @
7216c29b
import
os
import
pandas
as
pd
import
numpy
as
np
from
pyhive
import
hive
from
sqlalchemy
import
create_engine
import
pymysql
import
redis
import
datetime
import
time
os
.
chdir
(
'/home/db_dlp/mengxiangxuan/auto_spread'
)
cursor
=
hive
.
connect
(
host
=
'10.50.10.11'
,
port
=
10000
,
username
=
'mengxiangxuan'
,
database
=
'default'
)
.
cursor
()
now
=
datetime
.
datetime
.
now
()
today
=
now
.
strftime
(
'
%
Y-
%
m-
%
d'
)
delta1
=
datetime
.
timedelta
(
days
=
1
)
delta3
=
datetime
.
timedelta
(
days
=
3
)
delta5
=
datetime
.
timedelta
(
days
=
5
)
delta7
=
datetime
.
timedelta
(
days
=
7
)
delta10
=
datetime
.
timedelta
(
days
=
10
)
delta15
=
datetime
.
timedelta
(
days
=
15
)
yestoday1
=
(
now
-
delta1
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
yestoday3
=
(
now
-
delta3
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
yestoday5
=
(
now
-
delta5
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
yestoday7
=
(
now
-
delta7
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
yestoday10
=
(
now
-
delta10
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
yestoday15
=
(
now
-
delta15
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
sql
=
'''
select a.*,b.id from
(select advert_id,orientation_package_id,app_ids from ods_credits.tb_advert_target_app where dt='2018-12-09') a
left outer join
(select id,advert_id,case when is_default=1 then 0 else id end orientation_package_id,is_default
from advert.dwd_advert_orientation_package_df where dt='2018-12-09') b
on a.advert_id=b.advert_id and a.orientation_package_id=b.orientation_package_id
'''
cursor
.
execute
(
sql
)
ad_orient1
=
pd
.
DataFrame
(
cursor
.
fetchall
())
ad_orient1
.
columns
=
[
'advert_id'
,
'orientation_package_id'
,
'app_ids'
,
'id'
]
##----------------------------
sql
=
'''
select a.advert_id,a.orientation_package_id,b.app_ids from
(select * from ods_credits.tb_advert_app_package where dt='2018-12-09') a
inner join
(select * from ods_credits.tb_app_package where dt='2018-12-09') b
on a.app_package_id=b.id
'''
cursor
.
execute
(
sql
)
ad_orient2
=
pd
.
DataFrame
(
cursor
.
fetchall
())
ad_orient2
.
columns
=
[
'advert_id'
,
'id'
,
'app_ids'
]
ad_orient
=
pd
.
merge
(
ad_orient1
,
ad_orient2
,
how
=
'outer'
,
on
=
[
'advert_id'
,
'id'
])
ad_orient
=
ad_orient
.
fillna
(
value
=
''
)
ad_orient
[
'app_ids_1'
]
=
ad_orient
[
'app_ids_x'
]
.
map
(
lambda
x
:
str
(
x
)
.
split
(
','
))
ad_orient
[
'app_ids_2'
]
=
ad_orient
[
'app_ids_y'
]
.
map
(
lambda
x
:
str
(
x
)
.
split
(
','
))
ad_orient
[
'app_ids'
]
=
ad_orient
[
'app_ids_1'
]
+
ad_orient
[
'app_ids_2'
]
#广告配置的消耗
sql
=
'''
select advert_id,orientation_id,sum(charge_fees) cost
from advert.dws_advert_order_wide_v4_level_3_di
where dt>'2018-12-02' and dt<'2018-12-09'
group by advert_id,orientation_id
'''
cursor
.
execute
(
sql
)
ad_orient_cost
=
pd
.
DataFrame
(
cursor
.
fetchall
())
ad_orient_cost
.
columns
=
[
'advert_id'
,
'orientation_package_id'
,
'cost'
]
ad_orient_cost
[[
'advert_id'
,
'orientation_package_id'
]]
=
ad_orient_cost
[[
'advert_id'
,
'orientation_package_id'
]]
.
astype
(
'str'
)
ad_orient
[[
'advert_id'
,
'orientation_package_id'
]]
=
ad_orient
[[
'advert_id'
,
'orientation_package_id'
]]
.
astype
(
'str'
)
ad_orient
[
'orientation_package_id'
]
=
ad_orient
[
'orientation_package_id'
]
.
map
(
lambda
x
:
x
.
replace
(
'.0'
,
''
))
ad_orient
=
pd
.
merge
(
ad_orient
,
ad_orient_cost
,
how
=
'left'
,
on
=
[
'advert_id'
,
'orientation_package_id'
])
ad_orient_cost_1000
=
ad_orient
.
ix
[
ad_orient
[
'cost'
]
>
100000
]
ad_orient_cost_1000
[
'app_ids_str'
]
=
ad_orient_cost_1000
[
'app_ids'
]
.
map
(
lambda
x
:
str
(
x
))
ad_orient_cost_1000
[
'app_cnt'
]
=
ad_orient_cost_1000
[
'app_ids'
]
.
map
(
lambda
x
:
len
(
x
))
#潜在通投计划 (cost为8天消耗)
ad_orient_app0
=
ad_orient_cost_1000
.
ix
[
ad_orient_cost_1000
[
'app_ids_str'
]
==
"['', '']"
]
ad_orient_0_apps
=
pd
.
DataFrame
(
ad_orient_app0
[
'cost'
]
.
groupby
(
ad_orient_app0
[
'advert_id'
])
.
sum
())
ad_orient_0_apps
[
'advert_id'
]
=
ad_orient_0_apps
.
index
ad_orient0
=
set
(
ad_orient_0_apps
[
'advert_id'
])
ad_orient_0_apps
[
'isall'
]
=
1
#定向媒体大于20个的计划
ad_orient_app20
=
ad_orient_cost_1000
.
ix
[
ad_orient_cost_1000
[
'app_cnt'
]
>
20
]
ad_orient_20_apps
=
pd
.
DataFrame
(
ad_orient_app20
[
'cost'
]
.
groupby
(
ad_orient_app20
[
'advert_id'
])
.
sum
())
ad_orient_20_apps
[
'advert_id'
]
=
ad_orient_20_apps
.
index
ad_orient20
=
set
(
ad_orient_20_apps
[
'advert_id'
])
ad_orient_20_apps
[
'is20'
]
=
1
#合并
orient20_or_all
=
pd
.
merge
(
ad_orient_0_apps
,
ad_orient_20_apps
,
how
=
'outer'
,
on
=
[
'advert_id'
])
orient20_or_all
=
orient20_or_all
.
fillna
(
value
=
0
)
orient20_or_all
[
'cost'
]
=
orient20_or_all
[
'cost_x'
]
+
orient20_or_all
[
'cost_y'
]
orient20_or_all
[[
'advert_id'
,
'isall'
,
'is20'
,
'cost'
]]
.
to_csv
(
'orient20_or_all.csv'
,
index
=
False
)
# #既有通投配置 又有定向大于20的广告 110个
# orient20_and_all=set(ad_orient_20_apps['advert_id']) & set(ad_orient_0_apps['advert_id'])
# #通投广告
# orient0=ad_orient0-orient20_and_all
# #定向大于20的广告
# orient20=ad_orient20-orient20_and_all
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment