Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tuia-alg-engineering-py
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
mengxiangxuan
tuia-alg-engineering-py
Commits
e7b2c71b
Commit
e7b2c71b
authored
Oct 19, 2018
by
mengxiangxuan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
00
parent
a9dc207f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
23 additions
and
17 deletions
+23
-17
roc_ks.py
alg-evaluat-system/model_evaluat/roc_ks.py
+12
-2
ad_slot_set.py
auto-spread/auto_manage/ad_slot_set.py
+4
-8
samples_create.py
auto-spread/auto_manage/samples_create.py
+7
-7
No files found.
alg-evaluat-system/model_evaluat/roc_ks.py
View file @
e7b2c71b
...
...
@@ -49,6 +49,7 @@ sql_ctr_roc = '''select ctr_label,pre_ctr,
WHEN rcmd_type=241 THEN 'fnn2'
WHEN rcmd_type=242 THEN 'deepFm2'
WHEN rcmd_type=243 THEN 'dcn'
WHEN rcmd_type=244 THEN 'dcn2'
END AS rcmd_name
from advert.dws_advert_order_wide_v4_level_6_di
where dt='{}' and pre_ctr is not null
...
...
@@ -86,6 +87,7 @@ sql_cvr_roc = '''select cvr_label,pre_cvr,
WHEN rcmd_type=241 THEN 'fnn2'
WHEN rcmd_type=242 THEN 'deepFm2'
WHEN rcmd_type=243 THEN 'dcn'
WHEN rcmd_type=244 THEN 'dcn2'
END AS rcmd_name
from
(select * from advert.dws_advert_order_wide_v4_level_6_di where dt='{}' and charge_cnt>0) a
...
...
@@ -104,13 +106,21 @@ cursor.execute(sql_cvr_roc)
cvr_roc_data
=
pd
.
DataFrame
(
cursor
.
fetchall
(),
columns
=
[
'cvr_label'
,
'pre_cvr'
,
'rcmd_name'
])
# alg=['alg-4.0','alg-4.0.1',
# 'alg-4.0.2','alg-4.0.4','alg-4.1','alg-4.2.1',
# 'alg-610','alg-act-tab','alg-online-learn','alg-online-learn223',
# 'alg-online-learn224','alg-online-weight',
# 'alg-app-optimize', 'alg-act-count',
# 'alg-fm-backend','BTM_AND_PC_31','BTM_AND_PC_32','BTM_AND_PC_33','Material_reform',
# 'v501','v502','fix_bias_2','fix_bias_3','fnn2','deepFm2','dcn']
alg
=
[
'alg-4.0'
,
'alg-4.0.1'
,
'alg-4.0.2'
,
'alg-4.0.4'
,
'alg-4.1'
,
'alg-4.2.1'
,
'alg-610'
,
'alg-act-tab'
,
'alg-online-learn'
,
'alg-online-learn223'
,
'alg-online-learn224'
,
'alg-online-weight'
,
'alg-app-optimize'
,
'alg-act-count'
,
'alg-fm-backend'
,
'BTM_AND_PC_31'
,
'
BTM_AND_PC_32'
,
'BTM_AND_PC_33'
,
'
Material_reform'
,
'v501'
,
'v502'
,
'fix_bias_2'
,
'fix_bias_3'
,
'fnn2'
,
'deepFm2'
,
'dcn'
]
'alg-fm-backend'
,
'BTM_AND_PC_31'
,
'Material_reform'
,
'v501'
,
'v502'
,
'fix_bias_2'
,
'fix_bias_3'
,
'fnn2'
,
'deepFm2'
,
'dcn
2
'
]
########roc曲线###=============================================================================
part
=
[(
0
,
0
),(
0
,
1
),(
0
,
2
),(
0
,
3
),(
1
,
0
),(
1
,
1
),(
1
,
2
),(
1
,
3
),(
2
,
0
),(
2
,
1
),(
2
,
2
),(
2
,
3
),(
3
,
0
),(
3
,
1
),(
3
,
2
),(
3
,
3
),
...
...
auto-spread/auto_manage/ad_slot_set.py
View file @
e7b2c71b
...
...
@@ -9,12 +9,12 @@ cursor = hive.connect(host='10.50.10.11', port=10000, username='mengxiangxuan',
now
=
datetime
.
datetime
.
now
()
today
=
now
.
strftime
(
'
%
Y-
%
m-
%
d'
)
delta1
=
datetime
.
timedelta
(
days
=
1
)
deltan
=
datetime
.
timedelta
(
days
=
7
)
deltan
=
datetime
.
timedelta
(
days
=
5
)
yestoday1
=
(
now
-
delta1
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
yestodayn
=
(
now
-
deltan
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
#所有ad+slot(ad有转化埋点且fee!=0)
#有发券广告位id约
35
00
#有发券广告位id约
23
00
print
(
'all slotid'
)
sql11
=
'''
select slotid,app_id,count(1) cnt from advert.dws_advert_order_wide_v4_level_3_di
...
...
@@ -25,7 +25,7 @@ cursor.execute(sql11)
df11
=
pd
.
DataFrame
(
cursor
.
fetchall
())
df11
.
columns
=
[
'slotid'
,
'app_id'
,
'cnt'
]
df11
=
df11
.
ix
[
df11
[
'slotid'
]
.
notnull
()]
.
astype
(
'int'
)
df11
=
df11
.
ix
[
df11
[
'cnt'
]
>
10
]
#剔除流量很低的广告位
df11
=
df11
.
ix
[
df11
[
'cnt'
]
>
10
0
]
#剔除流量很低的广告位
df11
=
df11
.
drop
([
'cnt'
],
axis
=
1
)
#全部有转化埋点非免费广告约1000
...
...
@@ -52,16 +52,12 @@ sql='''
select advert_id,slotid
from (
select a.advert_id,a.slotid,
sum(if(act_click_cnt is not null ,1,0)) as act_click_cnt,
count(1) as launch_cnt
from
(select * from advert.dws_advert_order_wide_v4_level_3_di
where dt>='{0}' and dt<='{1}') a
left outer join
tmp.tmp_cpc_act_advert_df b
on a.advert_id = b.advert_id
group by a.advert_id,a.slotid) t
where launch_cnt>
10
0
where launch_cnt>
5
0
'''
.
format
(
yestodayn
,
yestoday1
)
cursor
.
execute
(
sql
)
ad_slot_launch
=
pd
.
DataFrame
(
cursor
.
fetchall
())
...
...
auto-spread/auto_manage/samples_create.py
View file @
e7b2c71b
...
...
@@ -12,11 +12,11 @@ delta1 = datetime.timedelta(days=1)
deltan
=
datetime
.
timedelta
(
days
=
7
)
yestoday1
=
(
now
-
delta1
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
yestodayn
=
(
now
-
deltan
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
#from hdfs.client import Client
from
ad_slot_set
import
*
#from ad_slot_not_orient import *
cursor
=
hive
.
connect
(
host
=
'10.50.10.11'
,
port
=
10000
,
username
=
'mengxiangxuan'
,
database
=
'default'
)
.
cursor
()
#from hdfs.client import Client
#from ad_slot_not_orient import *
from
ad_slot_set
import
*
#流量信息
...
...
@@ -115,8 +115,8 @@ ad_info = pd.DataFrame(cursor.fetchall())
ad_info
.
columns
=
[
'advert_id'
,
'account_id'
,
'match_tag_nums'
,
'fee'
]
ad_info
=
ad_info
.
ix
[
ad_info
[
'fee'
]
>
0
]
ad_info_match_slot
=
pd
.
merge
(
ad_slot_df
,
ad_info
,
how
=
'
left
'
,
on
=
'advert_id'
)
ad_info_match_slot
[
'account_id'
]
=
ad_info_match_slot
[
'account_id'
]
.
fillna
(
value
=
0
)
.
astype
(
'int'
)
.
astype
(
'str'
)
ad_info_match_slot
=
pd
.
merge
(
ad_slot_df
,
ad_info
,
how
=
'
inner
'
,
on
=
'advert_id'
)
ad_info_match_slot
=
ad_info_match_slot
.
dropna
(
)
featrue_id
=
[
...
...
@@ -213,9 +213,9 @@ df2.to_csv('ad_info.csv',index=False,sep='|')
#[slot,配置]发券
最小
arpu
#[slot,配置]发券
平均
arpu
sql_min_arpu
=
'''
select slotid,
min
(arpu) min_arpu from
select slotid,
avg
(arpu) min_arpu from
(select slotid,advert_id,orientation_id,
sum(charge_fees) cost,
count(1) launch_cnt,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment