Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tuia-alg-engineering-py
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
mengxiangxuan
tuia-alg-engineering-py
Commits
ae9e1e99
Commit
ae9e1e99
authored
Nov 13, 2018
by
mengxiangxuan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
00
parent
47e321d9
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
217 additions
and
91 deletions
+217
-91
misc.xml
.idea/misc.xml
+1
-1
tuia-alg-engineering-py.iml
.idea/tuia-alg-engineering-py.iml
+1
-1
roc_ks.py
alg-evaluat-system/model_evaluat/roc_ks.py
+10
-3
candidate_set.py
auto-spread/auto_manage/candidate_set.py
+1
-4
effect_eva.py
auto-spread/auto_manage/effect_eva.py
+80
-77
effect_eva2.0.py
auto-spread/auto_manage/effect_eva2.0.py
+114
-0
samples_create.py
auto-spread/auto_manage/samples_create.py
+10
-5
No files found.
.idea/misc.xml
View file @
ae9e1e99
<?xml version="1.0" encoding="UTF-8"?>
<project
version=
"4"
>
<component
name=
"ProjectRootManager"
version=
"2"
project-jdk-name=
"Python 3.5.2 (D:\Program Files\
a
naconda3\python.exe)"
project-jdk-type=
"Python SDK"
/>
<component
name=
"ProjectRootManager"
version=
"2"
project-jdk-name=
"Python 3.5.2 (D:\Program Files\
A
naconda3\python.exe)"
project-jdk-type=
"Python SDK"
/>
</project>
\ No newline at end of file
.idea/tuia-alg-engineering-py.iml
View file @
ae9e1e99
...
...
@@ -2,7 +2,7 @@
<module
type=
"PYTHON_MODULE"
version=
"4"
>
<component
name=
"NewModuleRootManager"
>
<content
url=
"file://$MODULE_DIR$"
/>
<orderEntry
type=
"jdk"
jdkName=
"Python 3.5.2 (D:\Program Files\
a
naconda3\python.exe)"
jdkType=
"Python SDK"
/>
<orderEntry
type=
"jdk"
jdkName=
"Python 3.5.2 (D:\Program Files\
A
naconda3\python.exe)"
jdkType=
"Python SDK"
/>
<orderEntry
type=
"sourceFolder"
forTests=
"false"
/>
</component>
</module>
\ No newline at end of file
alg-evaluat-system/model_evaluat/roc_ks.py
View file @
ae9e1e99
...
...
@@ -50,6 +50,9 @@ sql_ctr_roc = '''select ctr_label,pre_ctr,
WHEN rcmd_type=242 THEN 'deepFm2'
WHEN rcmd_type=243 THEN 'dcn'
WHEN rcmd_type=244 THEN 'dcn2'
WHEN rcmd_type=245 THEN 'dcn3'
WHEN rcmd_type=246 THEN 'xDeepFm2'
WHEN rcmd_type=247 THEN 'xDeepFm3'
END AS rcmd_name
from advert.dws_advert_order_wide_v4_level_6_di
where dt='{}' and pre_ctr is not null
...
...
@@ -88,6 +91,9 @@ sql_cvr_roc = '''select cvr_label,pre_cvr,
WHEN rcmd_type=242 THEN 'deepFm2'
WHEN rcmd_type=243 THEN 'dcn'
WHEN rcmd_type=244 THEN 'dcn2'
WHEN rcmd_type=245 THEN 'dcn3'
WHEN rcmd_type=246 THEN 'xDeepFm2'
WHEN rcmd_type=247 THEN 'xDeepFm3'
END AS rcmd_name
from
(select * from advert.dws_advert_order_wide_v4_level_6_di where dt='{}' and charge_cnt>0) a
...
...
@@ -116,15 +122,16 @@ cvr_roc_data=pd.DataFrame(cursor.fetchall(),
alg
=
[
'alg-4.0'
,
'alg-4.0.1'
,
'alg-4.0.2'
,
'alg-4.0.4'
,
'alg-4.1'
,
'alg-4.2.1'
,
'alg-610'
,
'alg-act-tab'
,
'alg-online-learn
'
,
'alg-online-learn
223'
,
'alg-610'
,
'alg-act-tab'
,
'alg-online-learn223'
,
'alg-online-learn224'
,
'alg-online-weight'
,
'alg-app-optimize'
,
'alg-act-count'
,
'alg-fm-backend'
,
'BTM_AND_PC_31'
,
'Material_reform'
,
'v501'
,
'v502'
,
'fix_bias_2'
,
'fix_bias_3'
,
'fnn2'
,
'deepFm2'
,
'dcn2'
]
'v501'
,
'v502'
,
'fix_bias_2'
,
'fix_bias_3'
,
'fnn2'
,
'deepFm2'
,
'dcn2'
,
'dcn3'
,
'xDeepFm2'
,
'xDeepFm3'
]
########roc曲线###=============================================================================
part
=
[(
0
,
0
),(
0
,
1
),(
0
,
2
),(
0
,
3
),(
1
,
0
),(
1
,
1
),(
1
,
2
),(
1
,
3
),(
2
,
0
),(
2
,
1
),(
2
,
2
),(
2
,
3
),(
3
,
0
),(
3
,
1
),(
3
,
2
),(
3
,
3
),
(
4
,
0
),(
4
,
1
),(
4
,
2
),(
4
,
3
),(
5
,
0
),(
5
,
1
),(
5
,
2
),(
5
,
3
),(
6
,
0
),(
6
,
1
),(
6
,
2
)]
(
4
,
0
),(
4
,
1
),(
4
,
2
),(
4
,
3
),(
5
,
0
),(
5
,
1
),(
5
,
2
),(
5
,
3
),(
6
,
0
),(
6
,
1
),(
6
,
2
)
,(
6
,
3
)
]
#ctr
ctr_cnt
=
[]
plt
.
figure
(
figsize
=
(
16
,
24
))
...
...
auto-spread/auto_manage/candidate_set.py
View file @
ae9e1e99
...
...
@@ -129,8 +129,6 @@ try:
except
:
xiaoding
.
send_text
(
msg
=
'候选集(无历史数据部分存储)程序异常!!!请排查!'
,
at_mobiles
=
at_mobiles
)
else
:
xiaoding
.
send_text
(
msg
=
'候选集(无历史数据部分)存储成功'
,
at_mobiles
=
at_mobiles
)
###########################################################################################
...
...
@@ -324,8 +322,7 @@ try:
except
:
xiaoding
.
send_text
(
msg
=
'候选集(有历史数据部分存储)程序异常!!!请排查!'
,
at_mobiles
=
at_mobiles
)
else
:
xiaoding
.
send_text
(
msg
=
'候选集(有历史数据部分)存储成功'
,
at_mobiles
=
at_mobiles
)
##############################################################################################
###############################################################################################
...
...
auto-spread/auto_manage/effect_eva.py
View file @
ae9e1e99
This diff is collapsed.
Click to expand it.
auto-spread/auto_manage/effect_eva2.0.py
0 → 100644
View file @
ae9e1e99
import
os
import
pandas
as
pd
import
numpy
as
np
from
pyhive
import
hive
from
sqlalchemy
import
create_engine
import
pymysql
import
redis
import
datetime
import
time
os
.
chdir
(
'/home/db_dlp/mengxiangxuan/auto_spread'
)
cursor
=
hive
.
connect
(
host
=
'10.50.10.11'
,
port
=
10000
,
username
=
'mengxiangxuan'
,
database
=
'default'
)
.
cursor
()
featrue_id
=
[
'f101001'
,
'f106001'
,
'f102001'
,
'f201001'
,
'f108001'
,
'f301001'
,
'f501001'
,
'f611001'
,
'f110001'
,
'f502001'
,
'f505001'
,
'f502002'
,
'f601001'
,
'f603001'
,
'f602001'
,
'f604001'
,
'f605001'
,
'f606001'
,
'f607001'
,
'f608001'
,
'f609001'
,
'f503001'
,
'f306001'
,
'f610001'
]
#无历史数据准确性预估 用历史数据做样本测试模型准确性
sql1
=
'''
select a.* from
(select
advert_id,
account_id,
match_tag_nums,
app_id,
slotid,
activity_id,
ua,
put_index,
times,
gmt_create_hour,
price_section,
gmt_create_weekday,
day_order_rank_level,
day_activity_order_rank_level,
order_rank_level,
activity_order_rank_level,
order_gmt_intervel_level,
activity_order_gmt_intervel_level,
activity_last_charge_status,
last_charge_status,
last_activity_equal_status,
city_id,
activity_use_type,
day_last_match_tag_nums_equal_status
from advert.dws_advert_order_wide_v4_level_6_di
where dt='2018-11-11'
and rand()>0.5) a
inner join
(select advert_id,count(1) cnt
from advert.dws_advert_order_wide_v4_level_6_di
where dt='2018-11-11'
group by advert_id limit 200) b
on a.advert_id=b.advert_id
'''
cursor
.
execute
(
sql1
)
samples_df
=
pd
.
DataFrame
(
cursor
.
fetchall
())
samples_df
=
samples_df
.
fillna
(
value
=
'.0'
)
samples_df
.
columns
=
featrue_id
samples_df
.
to_csv
(
"samples_eva.csv"
,
index
=
False
,
sep
=
'|'
)
nolunch_pre_cvr
=
pd
.
read_table
(
'slot_ad_cvr_eva.txt'
,
sep
=
','
)
nolunch_pre_cvr
.
columns
=
[
'slotad'
,
'ctr'
,
'cvr'
]
nolunch_pre_cvr
[[
'slotid'
,
'advert_id'
]]
=
nolunch_pre_cvr
[
'slotad'
]
.
str
.
split
(
'_'
,
expand
=
True
)
sql
=
'''
select slotid,advert_id,avg(stat_cvr),avg(pre_cvr)/avg(stat_cvr) from
advert.dws_advert_order_wide_v4_level_6_di
where dt='2018-11-11' and order_id is not null
group by slotid,advert_id
'''
cursor
.
execute
(
sql
)
ad_pre_eva
=
pd
.
DataFrame
(
cursor
.
fetchall
())
ad_pre_eva
.
columns
=
[
'slotid'
,
'advert_id'
,
'ad_cvr'
,
'diff'
]
ad_pre_eva
[
'slotid'
]
=
ad_pre_eva
[
'slotid'
]
.
astype
(
'str'
)
.
map
(
lambda
x
:
x
.
replace
(
'.0'
,
''
))
ad_pre_eva
[
'advert_id'
]
=
ad_pre_eva
[
'advert_id'
]
.
astype
(
'str'
)
ad_pre_eva
.
ix
[
ad_pre_eva
[
'diff'
]
<
1.5
]
.
shape
[
0
]
/
ad_pre_eva
.
ix
[
ad_pre_eva
[
'diff'
]
>
0
]
.
shape
[
0
]
ad_pre_cvr_eva
=
pd
.
merge
(
nolunch_pre_cvr
,
ad_pre_eva
,
how
=
'inner'
,
on
=
[
'slotid'
,
'advert_id'
])
ad_pre_cvr_eva
[
'ad_pre_diff'
]
=
ad_pre_cvr_eva
[
'cvr'
]
/
ad_pre_cvr_eva
[
'ad_cvr'
]
#综合80% 高cvr 81% 低cvr<0.02 35% cvr<0.01 10%
ad_pre_cvr_eva
.
ix
[
ad_pre_cvr_eva
[
'ad_pre_diff'
]
<
1.5
]
.
shape
[
0
]
/
ad_pre_cvr_eva
.
ix
[
ad_pre_cvr_eva
[
'ad_pre_diff'
]
>
0
]
.
shape
[
0
]
#26%的准确率
ad_pre_cvr_eva
.
ix
[(
ad_pre_cvr_eva
[
'ad_pre_diff'
]
<
1.5
)
&
(
ad_pre_cvr_eva
[
'ad_cvr'
]
>
0.05
)]
.
shape
[
0
]
/
ad_pre_cvr_eva
.
ix
[(
ad_pre_cvr_eva
[
'ad_pre_diff'
]
>
0
)
&
(
ad_pre_cvr_eva
[
'ad_cvr'
]
>
0.05
)]
.
shape
[
0
]
ad_pre_cvr_eva
.
ix
[(
ad_pre_cvr_eva
[
'ad_pre_diff'
]
<
1.5
)
&
(
ad_pre_cvr_eva
[
'ad_cvr'
]
<=
0.02
)]
.
shape
[
0
]
/
ad_pre_cvr_eva
.
ix
[(
ad_pre_cvr_eva
[
'ad_pre_diff'
]
>
0
)
&
(
ad_pre_cvr_eva
[
'ad_cvr'
]
<=
0.02
)]
.
shape
[
0
]
auto-spread/auto_manage/samples_create.py
View file @
ae9e1e99
...
...
@@ -43,7 +43,8 @@ sql2='''CREATE TABLE if not exists advert.dws_not_luanch_create_samples_mxx as
last_charge_status,
last_activity_equal_status,
city_id,
activity_use_type
activity_use_type,
day_last_match_tag_nums_equal_status
from (
select
app_id,
...
...
@@ -65,7 +66,7 @@ sql2='''CREATE TABLE if not exists advert.dws_not_luanch_create_samples_mxx as
last_charge_status,
last_activity_equal_status,
city_id,
activity_use_type
,
activity_use_type
rank_num,ROW_NUMBER() OVER(PARTITION BY slotid ORDER BY rank_num) rnb
from
(select *,cast(rand()*100000000 as double) rank_num from advert.dws_advert_order_wide_v4_level_6_di
...
...
@@ -142,7 +143,9 @@ featrue_id=[
"f505001"
,
"f611001"
,
"f110001"
,
"f306001"
]
"f306001"
,
"f610001"
]
featrue_name
=
[
"ua"
,
...
...
@@ -167,7 +170,8 @@ featrue_name=[
"price_section"
,
"put_index"
,
"times"
,
"activity_use_type"
]
"activity_use_type"
,
"day_last_match_tag_nums_equal_status"
]
d
=
pd
.
DataFrame
([
featrue_id
,
featrue_name
])
.
T
d
.
index
=
d
[
1
]
...
...
@@ -191,7 +195,8 @@ l=['app_id',
'last_charge_status'
,
'last_activity_equal_status'
,
'city_id'
,
"activity_use_type"
]
"activity_use_type"
,
"day_last_match_tag_nums_equal_status"
]
la
=
list
(
d
.
ix
[
l
]
.
ix
[:,
0
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment