Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tuia-alg-engineering-py
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
mengxiangxuan
tuia-alg-engineering-py
Commits
a9dc207f
Commit
a9dc207f
authored
Oct 15, 2018
by
mengxiangxuan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
00
parent
38c4dde2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
42 additions
and
14 deletions
+42
-14
ad_slot_set.py
auto-spread/auto_manage/ad_slot_set.py
+4
-3
samples_create.py
auto-spread/auto_manage/samples_create.py
+38
-11
No files found.
auto-spread/auto_manage/ad_slot_set.py
View file @
a9dc207f
...
@@ -25,6 +25,7 @@ cursor.execute(sql11)
...
@@ -25,6 +25,7 @@ cursor.execute(sql11)
df11
=
pd
.
DataFrame
(
cursor
.
fetchall
())
df11
=
pd
.
DataFrame
(
cursor
.
fetchall
())
df11
.
columns
=
[
'slotid'
,
'app_id'
,
'cnt'
]
df11
.
columns
=
[
'slotid'
,
'app_id'
,
'cnt'
]
df11
=
df11
.
ix
[
df11
[
'slotid'
]
.
notnull
()]
.
astype
(
'int'
)
df11
=
df11
.
ix
[
df11
[
'slotid'
]
.
notnull
()]
.
astype
(
'int'
)
df11
=
df11
.
ix
[
df11
[
'cnt'
]
>
10
]
#剔除流量很低的广告位
df11
=
df11
.
drop
([
'cnt'
],
axis
=
1
)
df11
=
df11
.
drop
([
'cnt'
],
axis
=
1
)
#全部有转化埋点非免费广告约1000
#全部有转化埋点非免费广告约1000
...
@@ -37,7 +38,7 @@ cursor.execute(sql12)
...
@@ -37,7 +38,7 @@ cursor.execute(sql12)
df12
=
pd
.
DataFrame
(
cursor
.
fetchall
())
df12
=
pd
.
DataFrame
(
cursor
.
fetchall
())
df12
.
columns
=
[
'advert_id'
]
df12
.
columns
=
[
'advert_id'
]
#全部ad_slot组合约
25
0w
#全部ad_slot组合约
30
0w
print
(
'all ad_slot'
)
print
(
'all ad_slot'
)
df1
=
pd
.
DataFrame
(
columns
=
[
'slotid'
,
'advert_id'
])
df1
=
pd
.
DataFrame
(
columns
=
[
'slotid'
,
'advert_id'
])
for
i
in
df12
[
'advert_id'
]:
for
i
in
df12
[
'advert_id'
]:
...
@@ -46,7 +47,7 @@ for i in df12['advert_id']:
...
@@ -46,7 +47,7 @@ for i in df12['advert_id']:
df1
=
df1
.
append
(
df_temp
)
df1
=
df1
.
append
(
df_temp
)
#有历史发券且置信或能拿到预估cvr的 ad+slot 约1
2
w
#有历史发券且置信或能拿到预估cvr的 ad+slot 约1
0
w
sql
=
'''
sql
=
'''
select advert_id,slotid
select advert_id,slotid
from (
from (
...
@@ -60,7 +61,7 @@ left outer join
...
@@ -60,7 +61,7 @@ left outer join
tmp.tmp_cpc_act_advert_df b
tmp.tmp_cpc_act_advert_df b
on a.advert_id = b.advert_id
on a.advert_id = b.advert_id
group by a.advert_id,a.slotid) t
group by a.advert_id,a.slotid) t
where
act_click_cnt>2 or
launch_cnt>100
where launch_cnt>100
'''
.
format
(
yestodayn
,
yestoday1
)
'''
.
format
(
yestodayn
,
yestoday1
)
cursor
.
execute
(
sql
)
cursor
.
execute
(
sql
)
ad_slot_launch
=
pd
.
DataFrame
(
cursor
.
fetchall
())
ad_slot_launch
=
pd
.
DataFrame
(
cursor
.
fetchall
())
...
...
auto-spread/auto_manage/samples_create.py
View file @
a9dc207f
...
@@ -72,7 +72,7 @@ sql2='''CREATE TABLE if not exists advert.dws_not_luanch_create_samples_mxx as
...
@@ -72,7 +72,7 @@ sql2='''CREATE TABLE if not exists advert.dws_not_luanch_create_samples_mxx as
where dt>='{0}' and dt<='{1}') c
where dt>='{0}' and dt<='{1}') c
distribute by slotid sort by slotid,rank_num desc
distribute by slotid sort by slotid,rank_num desc
)a
)a
where rnb<=
5
00'''
.
format
(
yestodayn
,
yestoday1
)
where rnb<=
2
00'''
.
format
(
yestodayn
,
yestoday1
)
#未定向组合
#未定向组合
# sql3='''drop table advert.dws_not_luanch_slot_ad_mxx'''
# sql3='''drop table advert.dws_not_luanch_slot_ad_mxx'''
# sql4='''create table advert.dws_not_luanch_slot_ad_mxx (slotid string,advert_id string)
# sql4='''create table advert.dws_not_luanch_slot_ad_mxx (slotid string,advert_id string)
...
@@ -100,15 +100,20 @@ cursor.execute(sql2)
...
@@ -100,15 +100,20 @@ cursor.execute(sql2)
###广告相关信息
###广告相关信息
sql
=
'''select distinct advert_id,account_id,case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
sql
=
'''select advert_id,account_id,
case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
when length(match_tag_nums)=22 then substr(match_tag_nums,13)
when length(match_tag_nums)=22 then substr(match_tag_nums,13)
else match_tag_nums end match_tag_nums
else match_tag_nums end match_tag_nums
,avg(fee) fee
from advert.dws_advert_order_wide_v4_level_6_di
from advert.dws_advert_order_wide_v4_level_6_di
where dt>='{0}' and dt<='{1}'
where dt>='{0}' and dt<='{1}' and advert_id is not null
and advert_id is not null'''
.
format
(
yestodayn
,
yestoday1
)
group by advert_id,account_id,
case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
when length(match_tag_nums)=22 then substr(match_tag_nums,13)
else match_tag_nums end'''
.
format
(
yestodayn
,
yestoday1
)
cursor
.
execute
(
sql
)
cursor
.
execute
(
sql
)
ad_info
=
pd
.
DataFrame
(
cursor
.
fetchall
())
ad_info
=
pd
.
DataFrame
(
cursor
.
fetchall
())
ad_info
.
columns
=
[
'advert_id'
,
'account_id'
,
'match_tag_nums'
]
ad_info
.
columns
=
[
'advert_id'
,
'account_id'
,
'match_tag_nums'
,
'fee'
]
ad_info
=
ad_info
.
ix
[
ad_info
[
'fee'
]
>
0
]
ad_info_match_slot
=
pd
.
merge
(
ad_slot_df
,
ad_info
,
how
=
'left'
,
on
=
'advert_id'
)
ad_info_match_slot
=
pd
.
merge
(
ad_slot_df
,
ad_info
,
how
=
'left'
,
on
=
'advert_id'
)
ad_info_match_slot
[
'account_id'
]
=
ad_info_match_slot
[
'account_id'
]
.
fillna
(
value
=
0
)
.
astype
(
'int'
)
.
astype
(
'str'
)
ad_info_match_slot
[
'account_id'
]
=
ad_info_match_slot
[
'account_id'
]
.
fillna
(
value
=
0
)
.
astype
(
'int'
)
.
astype
(
'str'
)
...
@@ -198,16 +203,38 @@ cursor.execute(sql)
...
@@ -198,16 +203,38 @@ cursor.execute(sql)
df1
=
pd
.
DataFrame
(
cursor
.
fetchall
())
df1
=
pd
.
DataFrame
(
cursor
.
fetchall
())
df1
.
columns
=
list
(
la
)
df1
.
columns
=
list
(
la
)
df1
=
df1
.
astype
(
'str'
)
df1
=
df1
.
astype
(
'str'
)
df1
.
to_csv
(
'
not_luanch_scene
.csv'
,
index
=
False
,
sep
=
'|'
)
df1
.
to_csv
(
'
scene_info
.csv'
,
index
=
False
,
sep
=
'|'
)
#广告特征数据
#广告特征数据
df2
=
ad_info_match_slot
df2
=
ad_info_match_slot
df2
.
columns
=
[
'f108001'
,
'f101001'
,
'f106001'
,
'f102001'
]
df2
.
columns
=
[
'f108001'
,
'f101001'
,
'f106001'
,
'f102001'
,
'fee'
]
df2
=
df2
.
astype
(
'str'
)
df2
=
df2
.
astype
(
'str'
)
df2
.
to_csv
(
'not_luanch_ad_info.csv'
,
index
=
False
,
sep
=
'|'
)
df2
.
to_csv
(
'ad_info.csv'
,
index
=
False
,
sep
=
'|'
)
slotid
=
list
(
set
(
df1
[
'f108001'
])
&
set
(
df2
[
'f108001'
]))
pd
.
DataFrame
(
slotid
)
.
to_csv
(
'slotid.csv'
,
index
=
None
)
#[slot,配置]发券最小arpu
sql_min_arpu
=
'''
select slotid,min(arpu) min_arpu from
(select slotid,advert_id,orientation_id,
sum(charge_fees) cost,
count(1) launch_cnt,
sum(charge_fees)/count(1) arpu
from advert.dws_advert_order_wide_v4_level_6_di
where dt>='{0}' and dt<='{1}'
group by slotid,advert_id,orientation_id) a
where arpu>0 and launch_cnt>5
group by slotid
'''
.
format
(
yestodayn
,
yestoday1
)
cursor
.
execute
(
sql_min_arpu
)
slot_min_arpu
=
pd
.
DataFrame
(
cursor
.
fetchall
())
slot_min_arpu
.
columns
=
[
'slotid'
,
'min_arpu'
]
slot_min_arpu
=
slot_min_arpu
.
ix
[
pd
.
notnull
(
slot_min_arpu
[
'slotid'
])]
slot_min_arpu
[
'slotid'
]
=
slot_min_arpu
[
'slotid'
]
.
astype
(
'int'
)
.
astype
(
'str'
)
slotid_df
=
pd
.
DataFrame
(
list
(
set
(
df1
[
'f108001'
])
&
set
(
df2
[
'f108001'
])),
columns
=
[
'slotid'
])
slot_min_arpu
=
pd
.
merge
(
slot_min_arpu
,
slotid_df
,
how
=
'inner'
,
on
=
[
'slotid'
])
slot_min_arpu
.
to_csv
(
'slot_min_arpu.csv'
,
index
=
None
)
#############################
#############################
#java读取 场景info,广告info,slotid,拼接样本,流式处理
#java读取 场景info,广告info,slotid,拼接样本,流式处理
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment