Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tuia-alg-engineering-py
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
mengxiangxuan
tuia-alg-engineering-py
Commits
f1f0bbbf
Commit
f1f0bbbf
authored
Oct 29, 2018
by
mxx
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1
parent
18dbee01
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
85 additions
and
166 deletions
+85
-166
candidate_set.py
auto-spread/auto_manage/candidate_set.py
+52
-133
candidate_set_temp.py
auto-spread/auto_manage/candidate_set_temp.py
+5
-5
params.py
auto-spread/auto_manage/params.py
+28
-28
No files found.
auto-spread/auto_manage/candidate_set.py
View file @
f1f0bbbf
...
...
@@ -30,136 +30,56 @@ yestoday15 = (now - delta15).strftime('%Y-%m-%d')
# 历史无数据---------------------------------------------------
# 解出广告位-广告维度预估cvr
nolunch_pre_cvr
=
pd
.
read_table
(
r'slot_ad_cvr.txt'
)
nolunch_pre_cvr
.
columns
=
[
'c'
]
a
=
nolunch_pre_cvr
[
'c'
]
.
map
(
lambda
x
:
x
.
replace
(
'{'
,
''
)
.
replace
(
'}'
,
''
)
.
split
(
' '
))
# a=nolunch_pre_cvr['c'].map(lambda x:x.replace('=',':'))
l
=
[]
for
i
in
a
:
l
.
extend
(
i
)
ll
=
[
x
[:
-
1
]
.
split
(
'='
)
for
x
in
l
]
ll
=
np
.
array
(
ll
)
pre_slotad_stat_cvr
=
pd
.
DataFrame
()
b
=
[
s
.
split
(
','
)
for
s
in
ll
[:,
0
]]
bb
=
np
.
array
(
b
)
pre_slotad_stat_cvr
[
'slotid'
]
=
bb
[:,
0
]
pre_slotad_stat_cvr
[
'advert_id'
]
=
bb
[:,
1
]
pre_slotad_stat_cvr
[
'cvr'
]
=
ll
[:,
1
]
# pre_slotad_stat_cvr.head()
pre_slotad_stat_cvr
[
'cvr'
]
=
pre_slotad_stat_cvr
[
'cvr'
]
.
astype
(
'float'
)
# 匹配广告行业
sql
=
'''select id,
case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
when length(match_tag_nums)=22 then substr(match_tag_nums,13)
else match_tag_nums end match_tag_nums
from advert.dwd_advert_df
where dt='{0}' and length(match_tag_nums) in (10,16,22) '''
.
format
(
yestoday1
)
cursor
.
execute
(
sql
)
advert_trid
=
pd
.
DataFrame
(
cursor
.
fetchall
())
advert_trid
.
columns
=
[
'advert_id'
,
'match_tag_nums'
]
advert_trid
[
'advert_id'
]
=
advert_trid
[
'advert_id'
]
.
astype
(
'str'
)
pre_slotad_stat_cvr
=
pd
.
merge
(
pre_slotad_stat_cvr
,
advert_trid
,
how
=
'left'
,
on
=
[
'advert_id'
])
# 纠偏
# 1 历史数据行业+广告位维度预估值和统计值偏差,来纠偏
nolunch_pre_cvr
=
pd
.
read_table
(
r'slot_ad_cvr.txt'
,
sep
=
','
)
nolunch_pre_cvr
.
columns
=
[
'slotad'
,
'ctr'
,
'cvr'
]
nolunch_pre_cvr
[[
'slotid'
,
'advert_id'
]]
=
nolunch_pre_cvr
[
'slotad'
]
.
str
.
split
(
'_'
,
expand
=
True
)
#
# # 匹配广告行业
# sql = '''select id,
# case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
# when length(match_tag_nums)=22 then substr(match_tag_nums,13)
# else match_tag_nums end match_tag_nums
# from advert.dwd_advert_df
# where dt='{0}' and length(match_tag_nums) in (10,16,22) '''.format(yestoday1)
# cursor.execute(sql)
# advert_trid = pd.DataFrame(cursor.fetchall())
# advert_trid.columns = ['advert_id', 'match_tag_nums']
# advert_trid['advert_id'] = advert_trid['advert_id'].astype('str')
# pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, advert_trid, how='left', on=['advert_id'])
#
# # 纠偏
# # 2 历史数据广告维度预估值和统计值偏差纠偏
# sql = '''
# select match_tag_nums,app_id,avg(pre_cvr)/avg(stat_cvr) pre_diff from
# (select advert_id,app_id,pre_cvr,stat_cvr
# select advert_id,avg(pre_cvr)/avg(stat_cvr) pre_diff
# from logs.dwd_nezha_result_log_di
# where dt>='{0}' and dt<='{1}' and order_id is not null ) p1
# left outer join
# (select id,case when length(match_tag_nums)>10 then substr(match_tag_nums,7)
# else match_tag_nums end match_tag_nums
# from advert.dwd_advert_df where dt>='{0}' and dt<='{1}') p2
# on p1.advert_id=p2.id
# group by match_tag_nums,app_id
# where dt>='{0}' and dt<='{1}' and order_id is not null
# group by advert_id
# '''.format(yestoday7,yestoday1)
# cursor.execute(sql)
# trid_slot_pre_diff = pd.DataFrame(cursor.fetchall())
# trid_slot_pre_diff.columns = ['match_tag_nums', 'slotid', 'trid_slot_diff']
# # trid_slot_pre_diff=trid_slot_pre_diff.ix[trid_slot_pre_diff['trid_slot_diff']>2]
# trid_slot_pre_diff['slotid'] = trid_slot_pre_diff['slotid'].fillna(value=-11).astype('int').astype('str')
# 2 历史数据广告维度预估值和统计值偏差纠偏
sql
=
'''
select advert_id,avg(pre_cvr)/avg(stat_cvr) pre_diff
from logs.dwd_nezha_result_log_di
where dt>='{0}' and dt<='{1}' and order_id is not null
group by advert_id
'''
.
format
(
yestoday3
,
yestoday1
)
cursor
.
execute
(
sql
)
ad_pre_diff
=
pd
.
DataFrame
(
cursor
.
fetchall
())
ad_pre_diff
.
columns
=
[
'advert_id'
,
'ad_diff'
]
# ad_pre_diff=ad_pre_diff.ix[ad_pre_diff['ad_diff']>2]
ad_pre_diff
[
'advert_id'
]
=
ad_pre_diff
[
'advert_id'
]
.
astype
(
'str'
)
# 预估cvr纠偏############
#pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, trid_slot_pre_diff, how='left', on=['slotid', 'match_tag_nums'])
pre_slotad_stat_cvr
=
pd
.
merge
(
pre_slotad_stat_cvr
,
ad_pre_diff
,
how
=
'left'
,
on
=
[
'advert_id'
])
# pre_slotad_stat_cvr.ix[pd.isnull(pre_slotad_stat_cvr['trid_slot_diff']), 'trid_slot_diff'] = pre_slotad_stat_cvr.ix[
# pd.isnull(pre_slotad_stat_cvr['trid_slot_diff']), 'ad_diff']
# pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['trid_slot_diff'] > 2, 'fix_cvr'] = \
# pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['trid_slot_diff'] > 2, 'cvr'] / pre_slotad_stat_cvr.ix[
# pre_slotad_stat_cvr['trid_slot_diff'] > 2, 'trid_slot_diff']
pre_slotad_stat_cvr
.
ix
[
pre_slotad_stat_cvr
[
'ad_diff'
]
>
1.2
,
'fix_cvr'
]
=
\
pre_slotad_stat_cvr
.
ix
[
pre_slotad_stat_cvr
[
'ad_diff'
]
>
1.2
,
'cvr'
]
/
pre_slotad_stat_cvr
.
ix
[
pre_slotad_stat_cvr
[
'ad_diff'
]
>
1.2
,
'ad_diff'
]
pre_slotad_stat_cvr
.
ix
[
pd
.
isnull
(
pre_slotad_stat_cvr
[
'fix_cvr'
]),
'fix_cvr'
]
=
pre_slotad_stat_cvr
.
ix
[
pd
.
isnull
(
pre_slotad_stat_cvr
[
'fix_cvr'
]),
'cvr'
]
#####匹配行业+slot出价
sql_fee1
=
'''
select match_tag_nums,slotid,avg(fee) fee from
(select case when length(match_tag_nums)>10 then substr(match_tag_nums,7)
else match_tag_nums end match_tag_nums,
slotid,fee
from advert.dws_advert_order_wide_v4_level_3_di
where dt="{0}") a
group by match_tag_nums,slotid
'''
.
format
(
yestoday1
)
cursor
.
execute
(
sql_fee1
)
tride_slot_fee
=
pd
.
DataFrame
(
cursor
.
fetchall
())
tride_slot_fee
.
columns
=
[
'match_tag_nums'
,
'slotid'
,
'm_s_fee'
]
tride_slot_fee
[
'slotid'
]
=
tride_slot_fee
[
'slotid'
]
.
astype
(
'str'
)
.
map
(
lambda
x
:
x
.
replace
(
'.0'
,
''
))
pre_slotad_stat_cvr
=
pd
.
merge
(
pre_slotad_stat_cvr
,
tride_slot_fee
,
on
=
[
'match_tag_nums'
,
'slotid'
],
how
=
'left'
)
#####匹配广告出价
sql_fee2
=
'''
select advert_id,avg(fee) fee
from advert.dws_advert_order_wide_v4_level_3_di
where dt="{0}"
group by advert_id
'''
.
format
(
yestoday1
)
cursor
.
execute
(
sql_fee2
)
advert_fee
=
pd
.
DataFrame
(
cursor
.
fetchall
())
advert_fee
.
columns
=
[
'advert_id'
,
'ad_fee'
]
advert_fee
[
'advert_id'
]
=
advert_fee
[
'advert_id'
]
.
astype
(
'str'
)
pre_slotad_stat_cvr
=
pd
.
merge
(
pre_slotad_stat_cvr
,
advert_fee
,
on
=
[
'advert_id'
],
how
=
'left'
)
pre_slotad_stat_cvr
[
'pre_launch_r'
]
=
pre_slotad_stat_cvr
[
'ad_fee'
]
/
pre_slotad_stat_cvr
[
'm_s_fee'
]
####筛选预估可以发券的组合
pre_slotad_cvr
=
pre_slotad_stat_cvr
.
ix
[(
pre_slotad_stat_cvr
[
'pre_launch_r'
]
>
1
)
&
(
pre_slotad_stat_cvr
[
'cvr'
]
>
0.01
)]
pre_slotad_cvr
=
pre_slotad_cvr
[[
'slotid'
,
'advert_id'
,
'fix_cvr'
,
'ad_diff'
]]
pre_slotad_cvr
[
'confidence'
]
=
0
pre_slotad_cvr
.
columns
=
[
'slotid'
,
'advert_id'
,
'cvr'
,
'bias'
,
'confidence'
]
pre_slotad_cvr_good
=
pre_slotad_cvr
.
ix
[
pre_slotad_cvr
[
'bias'
]
<
1.1
]
pre_slotad_cvr_good
=
pre_slotad_cvr_good
.
sort_index
(
by
=
[
'bias'
])
#pre_slotad_cvr_good.groupby('advert_id').size()
pre_slotad_cvr_good
=
pre_slotad_cvr_good
.
groupby
(
'advert_id'
)
.
head
(
20
)
pre_slotad_cvr_good
[[
'slotid'
,
'advert_id'
]]
=
pre_slotad_cvr_good
[[
'slotid'
,
'advert_id'
]]
.
astype
(
'str'
)
pre_slotad_cvr_good
[
'key'
]
=
"NZ_K76_"
+
pre_slotad_cvr_good
[
'slotid'
]
+
"_"
+
pre_slotad_cvr_good
[
'advert_id'
]
pre_slotad_cvr_good
[
'value'
]
=
pre_slotad_cvr_good
[[
'cvr'
,
'bias'
,
'confidence'
]]
.
apply
(
lambda
x
:
x
.
to_json
(
orient
=
'index'
),
axis
=
1
)
# ad_pre_diff = pd.DataFrame(cursor.fetchall())
# ad_pre_diff.columns = ['advert_id', 'ad_diff']
# ad_pre_diff['advert_id'] = ad_pre_diff['advert_id'].astype('str')
#
# # 预估cvr纠偏############
# pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, ad_pre_diff, how='left', on=['advert_id'])
#
# pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['ad_diff'] > 1.2, 'fix_cvr'] = \
# pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['ad_diff'] > 1.2, 'cvr'] / pre_slotad_stat_cvr.ix[
# pre_slotad_stat_cvr['ad_diff'] > 1.2, 'ad_diff']
#
# pre_slotad_stat_cvr.ix[pd.isnull(pre_slotad_stat_cvr['fix_cvr']), 'fix_cvr'] = pre_slotad_stat_cvr.ix[
# pd.isnull(pre_slotad_stat_cvr['fix_cvr']), 'cvr']
nolunch_pre_cvr
=
nolunch_pre_cvr
.
sort_index
(
by
=
[
'cvr'
],
ascending
=
False
)
pre_slotad_cvr_good
=
nolunch_pre_cvr
.
groupby
(
'advert_id'
)
.
head
(
500
)
pre_slotad_cvr_good
[
'cvrSet'
]
=
pre_slotad_cvr_good
[
'cvr'
]
.
map
(
lambda
x
:[
round
(
x
,
6
)]
*
3
)
pre_slotad_cvr_good
[
'biasSet'
]
=
[[
-
1.0
,
-
1.0
,
-
1.0
]]
*
pre_slotad_cvr_good
.
shape
[
0
]
pre_slotad_cvr_good
[
'confidenceSet'
]
=
[[
0.0
,
0.0
,
0.0
]]
*
pre_slotad_cvr_good
.
shape
[
0
]
pre_slotad_cvr_good
[
'priceSection'
]
=
[[
0.0
,
0.0
]]
*
pre_slotad_cvr_good
.
shape
[
0
]
pre_slotad_cvr_good
[
'key'
]
=
"NZ_K076_"
+
pre_slotad_cvr_good
[
'slotad'
]
pre_slotad_cvr_good
[
'value'
]
=
pre_slotad_cvr_good
[[
'cvrSet'
,
'biasSet'
,
'confidenceSet'
,
'priceSection'
]]
.
apply
(
lambda
x
:
x
.
to_json
(
orient
=
'index'
),
axis
=
1
)
pre_slotad_cvr_good
.
index
=
range
(
pre_slotad_cvr_good
.
shape
[
0
])
...
...
@@ -172,9 +92,9 @@ pipe = r.pipeline(transaction=True)
#先删除昨日候选集
print
(
'pre_slotad_cvr_good-----'
)
pre_slotad_cvr_good_old
=
pd
.
read_csv
(
'pre_slotad_cvr_good.csv'
)
pre_slotad_cvr_good_old
.
to_csv
(
'pre_slotad_cvr_good_old.csv'
,
index
=
False
)
print
(
'pre_slotad_cvr_good
2
-----'
)
pre_slotad_cvr_good_old
=
pd
.
read_csv
(
'pre_slotad_cvr_good
2
.csv'
)
pre_slotad_cvr_good_old
.
to_csv
(
'pre_slotad_cvr_good_old
2
.csv'
,
index
=
False
)
for
i
in
pre_slotad_cvr_good_old
.
index
:
key
=
pre_slotad_cvr_good_old
.
ix
[
i
,
'key'
]
value
=
pre_slotad_cvr_good_old
.
ix
[
i
,
'value'
]
...
...
@@ -197,7 +117,7 @@ for i in pre_slotad_cvr_good.index:
pipe
.
execute
()
pre_slotad_cvr_good
.
to_csv
(
'pre_slotad_cvr_good.csv'
,
index
=
False
)
pre_slotad_cvr_good
.
to_csv
(
'pre_slotad_cvr_good
2
.csv'
,
index
=
False
)
...
...
@@ -205,7 +125,6 @@ pre_slotad_cvr_good.to_csv('pre_slotad_cvr_good.csv', index=False)
##--------------------------------------------------------------------------------------
###----广告粒度历史数据
#广告高中低出价预测发券 统计cvr, bias
sql_ad_fee0
=
'''
select app_id,slotid,advert_id,
sum(charge_fees) cost,
...
...
@@ -215,7 +134,7 @@ avg(pre_cvr) pre_cvr,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias ,
avg(b.fee0) fee0
from
(select * from advert.dws_advert_order_wide_v4_level_
3
_di where dt>='{0}' and dt<='{1}') a
(select * from advert.dws_advert_order_wide_v4_level_
6
_di where dt>='{0}' and dt<='{1}') a
left outer join
(select advert_id,percentile(fee,0.33) fee0
from advert.dws_advert_order_wide_v4_level_3_di
...
...
@@ -370,7 +289,7 @@ stat_slotad_cvr_good_old.to_csv('stat_slotad_cvr_good_old2.csv', index=False)
for
i
in
stat_slotad_cvr_good_old
.
index
:
key
=
stat_slotad_cvr_good_old
.
ix
[
i
,
'key'
]
value
=
stat_slotad_cvr_good_old
.
ix
[
i
,
'value'
]
pipe
.
set
(
key
,
value
,
ex
=
2
00
)
pipe
.
set
(
key
,
value
,
ex
=
6
00
)
if
i
%
2000
==
0
:
pipe
.
execute
()
print
(
i
)
...
...
auto-spread/auto_manage/candidate_set_temp.py
View file @
f1f0bbbf
...
...
@@ -179,7 +179,7 @@ for i in pre_slotad_cvr_good_old.index:
key
=
pre_slotad_cvr_good_old
.
ix
[
i
,
'key'
]
value
=
pre_slotad_cvr_good_old
.
ix
[
i
,
'value'
]
pipe
.
set
(
key
,
value
,
ex
=
200
)
if
i
%
2
000
==
0
:
if
i
%
5
000
==
0
:
pipe
.
execute
()
print
(
i
)
time
.
sleep
(
0.5
)
...
...
@@ -189,8 +189,8 @@ pipe.execute()
for
i
in
pre_slotad_cvr_good
.
index
:
key
=
pre_slotad_cvr_good
.
ix
[
i
,
'key'
]
value
=
pre_slotad_cvr_good
.
ix
[
i
,
'value'
]
pipe
.
set
(
key
,
value
,
ex
=
90
000
)
if
i
%
2
000
==
0
:
pipe
.
set
(
key
,
value
,
ex
=
432
000
)
if
i
%
5
000
==
0
:
pipe
.
execute
()
print
(
i
)
time
.
sleep
(
0.5
)
...
...
@@ -288,7 +288,7 @@ stat_slotad_cvr_good_old.to_csv('stat_slotad_cvr_good_old.csv', index=False)
for
i
in
stat_slotad_cvr_good_old
.
index
:
key
=
stat_slotad_cvr_good_old
.
ix
[
i
,
'key'
]
value
=
stat_slotad_cvr_good_old
.
ix
[
i
,
'value'
]
pipe
.
set
(
key
,
value
,
ex
=
2
00
)
pipe
.
set
(
key
,
value
,
ex
=
6
00
)
if
i
%
2000
==
0
:
pipe
.
execute
()
print
(
i
)
...
...
@@ -299,7 +299,7 @@ pipe.execute()
for
i
in
stat_slotad_cvr_good
.
index
:
key
=
stat_slotad_cvr_good
.
ix
[
i
,
'key'
]
value
=
stat_slotad_cvr_good
.
ix
[
i
,
'value'
]
pipe
.
set
(
key
,
value
,
ex
=
1872
00
)
pipe
.
set
(
key
,
value
,
ex
=
4320
00
)
if
i
%
5000
==
0
:
pipe
.
execute
()
print
(
i
)
...
...
auto-spread/auto_manage/params.py
View file @
f1f0bbbf
import
redis
import
json
params_dict
=
{
#recommend方法
'startFacter'
:
0.5
,
'cpaBiasRatioFacter'
:
1.0
,
'cpaOrientRatioFacter'
:
0.5
,
'cpaBiasThresholdFacter'
:
2.0
,
'cpcTargetRatioFacter'
:
0.5
,
'cpcOrientRatioFacter'
:
0.01
,
'cpcBiasThresholdFacter'
:
1.0
,
#熔断
'fuseOrientCostG1dFacter'
:
50000.0
,
'fuseOrientCostConvertbiasFacter'
:
2.0
,
#白名单参数
'wSlotOrientationConfidenceFacter'
:
0.2
,
'wSlotOrientationCostConvertBiasFacter'
:
1.2
,
#高置信黑名单参数
'bOrientConfidenceFacter1'
:
1.0
,
'bOrientCostConvertbiasFacter1'
:
1.2
,
'bSlotOrientationConfidenceFacter1'
:
0.5
,
'bSlotOrientationCostConvertBiasFacter1'
:
3.5
,
#低置信黑名单参数
'bOrientConfidenceFacter2'
:
1.0
,
'bOrientCostConvertbiasFacter2'
:
1.5
,
'bSlotOrientationConfidenceFacter2'
:
0.5
,
'bSlotOrientRadioFacter2'
:
2.0
,
"feedBackParams"
:
{
"bOrientConfidenceFactor1"
:
1
,
"bOrientConfidenceFactor2"
:
1
,
"bOrientCostConvertBiasFactor1"
:
1.2
,
"bOrientCostConvertBiasFactor2"
:
1.5
,
"bSlotOrientRadioFactor2"
:
2
,
"bSlotOrientationConfidenceFactor1"
:
0.5
,
"bSlotOrientationConfidenceFactor2"
:
0.5
,
"bSlotOrientationCostConvertBiasFactor1"
:
3.5
,
"fuseOrientCostConvertBiasFactor"
:
2
,
"fuseOrientCostG1dFactor"
:
50000
,
"wSlotOrientationConfidenceFactor"
:
0.2
,
"wSlotOrientationCostConvertBiasFactor"
:
1.2
},
"slotRecommendParams"
:
{
"cpaBiasRatioFactor"
:
1
,
"cpaBiasThresholdFactor"
:
2
,
"cpaOrientRatioFactor"
:
0.6
,
"cpcBiasThresholdFactor"
:
1
,
"cpcOrientRatioFactor"
:
0.1
,
"cpcTargetRatioFactor"
:
0.5
,
"startFactor"
:
0.5
}
}
params_key
=
"NZ_K??_auto_manage_params"
params_key
=
"NZ_K86_trusteeship_params"
params_value
=
json
.
dumps
(
params_dict
)
pool
=
redis
.
ConnectionPool
(
host
=
'r-bp18da0abeaddc94285.redis.rds.aliyuncs.com'
,
password
=
'hteK73Zxx3ji9LGCy2jBAZDJ6'
,
port
=
6379
,
db
=
0
)
r
=
redis
.
Redis
(
connection_pool
=
pool
)
r
.
set
(
params_key
,
params_value
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment