Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tuia-alg-engineering-py
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
mengxiangxuan
tuia-alg-engineering-py
Commits
e69f4ef2
Commit
e69f4ef2
authored
Oct 31, 2018
by
mxx
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1
parent
c408b2a6
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
519 additions
and
293 deletions
+519
-293
misc.xml
.idea/misc.xml
+1
-1
tuia-alg-engineering-py.iml
.idea/tuia-alg-engineering-py.iml
+1
-1
candidate_set.py
auto-spread/auto_manage/candidate_set.py
+14
-14
candidate_set_temp.py
auto-spread/auto_manage/candidate_set_temp.py
+292
-277
test_dingding.py
auto-spread/auto_manage/test_dingding.py
+211
-0
No files found.
.idea/misc.xml
View file @
e69f4ef2
<?xml version="1.0" encoding="UTF-8"?>
<project
version=
"4"
>
<component
name=
"ProjectRootManager"
version=
"2"
project-jdk-name=
"Python 3.5.2 (D:\Program Files\
A
naconda3\python.exe)"
project-jdk-type=
"Python SDK"
/>
<component
name=
"ProjectRootManager"
version=
"2"
project-jdk-name=
"Python 3.5.2 (D:\Program Files\
a
naconda3\python.exe)"
project-jdk-type=
"Python SDK"
/>
</project>
\ No newline at end of file
.idea/tuia-alg-engineering-py.iml
View file @
e69f4ef2
...
...
@@ -2,7 +2,7 @@
<module
type=
"PYTHON_MODULE"
version=
"4"
>
<component
name=
"NewModuleRootManager"
>
<content
url=
"file://$MODULE_DIR$"
/>
<orderEntry
type=
"jdk"
jdkName=
"Python 3.5.2 (D:\Program Files\
A
naconda3\python.exe)"
jdkType=
"Python SDK"
/>
<orderEntry
type=
"jdk"
jdkName=
"Python 3.5.2 (D:\Program Files\
a
naconda3\python.exe)"
jdkType=
"Python SDK"
/>
<orderEntry
type=
"sourceFolder"
forTests=
"false"
/>
</component>
</module>
\ No newline at end of file
auto-spread/auto_manage/candidate_set.py
View file @
e69f4ef2
...
...
@@ -98,8 +98,8 @@ pre_slotad_cvr_good_old.to_csv('pre_slotad_cvr_good_old2.csv', index=False)
for
i
in
pre_slotad_cvr_good_old
.
index
:
key
=
pre_slotad_cvr_good_old
.
ix
[
i
,
'key'
]
value
=
pre_slotad_cvr_good_old
.
ix
[
i
,
'value'
]
pipe
.
set
(
key
,
value
,
ex
=
2
00
)
if
i
%
2
000
==
0
:
pipe
.
set
(
key
,
value
,
ex
=
9
00
)
if
i
%
5
000
==
0
:
pipe
.
execute
()
print
(
i
)
time
.
sleep
(
0.5
)
...
...
@@ -109,8 +109,8 @@ pipe.execute()
for
i
in
pre_slotad_cvr_good
.
index
:
key
=
pre_slotad_cvr_good
.
ix
[
i
,
'key'
]
value
=
pre_slotad_cvr_good
.
ix
[
i
,
'value'
]
pipe
.
set
(
key
,
value
,
ex
=
90
000
)
if
i
%
2
000
==
0
:
pipe
.
set
(
key
,
value
,
ex
=
432
000
)
if
i
%
5
000
==
0
:
pipe
.
execute
()
print
(
i
)
time
.
sleep
(
0.5
)
...
...
@@ -137,7 +137,7 @@ from
(select * from advert.dws_advert_order_wide_v4_level_6_di where dt>='{0}' and dt<='{1}') a
left outer join
(select advert_id,percentile(fee,0.33) fee0
from advert.dws_advert_order_wide_v4_level_
3
_di
from advert.dws_advert_order_wide_v4_level_
6
_di
where dt>='{0}' and dt<='{1}' and fee>0
group by advert_id) b
on a.advert_id=b.advert_id
...
...
@@ -154,10 +154,10 @@ avg(pre_cvr) pre_cvr,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias,
avg(b.fee1) fee1
from
(select * from advert.dws_advert_order_wide_v4_level_
3
_di where dt>='{0}' and dt<='{1}') a
(select * from advert.dws_advert_order_wide_v4_level_
6
_di where dt>='{0}' and dt<='{1}') a
left outer join
(select advert_id,percentile(fee,0.66) fee1
from advert.dws_advert_order_wide_v4_level_
3
_di
from advert.dws_advert_order_wide_v4_level_
6
_di
where dt>='{0}' and dt<='{1}' and fee>0
group by advert_id) b
on a.advert_id=b.advert_id
...
...
@@ -171,14 +171,14 @@ sum(charge_fees)/sum(act_click_cnt) costconvert,
sum(act_click_cnt)/sum(charge_cnt) stat_cvr,
avg(pre_cvr) pre_cvr,
avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias
from advert.dws_advert_order_wide_v4_level_
3
_di
from advert.dws_advert_order_wide_v4_level_
6
_di
where dt>='{0}' and dt<='{1}' and slotid is not null
group by app_id,slotid,advert_id
'''
.
format
(
yestoday15
,
yestoday1
)
sql_ad_costconvert
=
'''select advert_id,
sum(charge_fees)/sum(act_click_cnt) ad_costconvert
from advert.dws_advert_order_wide_v4_level_
3
_di
from advert.dws_advert_order_wide_v4_level_
6
_di
where dt>='{0}' and dt<='{1}'
group by advert_id
'''
.
format
(
yestoday15
,
yestoday1
)
...
...
@@ -289,19 +289,19 @@ stat_slotad_cvr_good_old.to_csv('stat_slotad_cvr_good_old2.csv', index=False)
for
i
in
stat_slotad_cvr_good_old
.
index
:
key
=
stat_slotad_cvr_good_old
.
ix
[
i
,
'key'
]
value
=
stat_slotad_cvr_good_old
.
ix
[
i
,
'value'
]
pipe
.
set
(
key
,
value
,
ex
=
6
00
)
if
i
%
2
000
==
0
:
pipe
.
set
(
key
,
value
,
ex
=
9
00
)
if
i
%
5
000
==
0
:
pipe
.
execute
()
print
(
i
)
time
.
sleep
(
0.5
)
pipe
.
execute
()
# 转化成key: NZ_K76_slotId_advertId, value:json cvr&预估偏差&置信度
# 转化成key: NZ_K
0
76_slotId_advertId, value:json cvr&预估偏差&置信度
for
i
in
stat_slotad_cvr_good
.
index
:
key
=
stat_slotad_cvr_good
.
ix
[
i
,
'key'
]
value
=
stat_slotad_cvr_good
.
ix
[
i
,
'value'
]
pipe
.
set
(
key
,
value
,
ex
=
1872
00
)
if
i
%
2
000
==
0
:
pipe
.
set
(
key
,
value
,
ex
=
4320
00
)
if
i
%
5
000
==
0
:
pipe
.
execute
()
print
(
i
)
time
.
sleep
(
0.5
)
...
...
auto-spread/auto_manage/candidate_set_temp.py
View file @
e69f4ef2
This diff is collapsed.
Click to expand it.
auto-spread/auto_manage/test_dingding.py
0 → 100644
View file @
e69f4ef2
import
os
import
pandas
as
pd
import
numpy
as
np
from
pyhive
import
hive
from
sqlalchemy
import
create_engine
import
pymysql
import
redis
import
datetime
import
time
from
dingtalkchatbot.chatbot
import
DingtalkChatbot
# WebHook地址
webhook
=
'https://oapi.dingtalk.com/robot/send?access_token=4f28ce996ab4f2601c0362fbfd0d48f58b0250a76953ff117ca41e9f1ec8e565'
# 初始化机器人小丁
xiaoding
=
DingtalkChatbot
(
webhook
)
at_mobiles
=
[
'18668032242'
]
os
.
chdir
(
'/home/db_dlp/mengxiangxuan/auto_spread'
)
cursor
=
hive
.
connect
(
host
=
'10.50.10.11'
,
port
=
10000
,
username
=
'mengxiangxuan'
,
database
=
'default'
)
.
cursor
()
now
=
datetime
.
datetime
.
now
()
today
=
now
.
strftime
(
'
%
Y-
%
m-
%
d'
)
delta1
=
datetime
.
timedelta
(
days
=
1
)
delta3
=
datetime
.
timedelta
(
days
=
3
)
delta5
=
datetime
.
timedelta
(
days
=
5
)
delta7
=
datetime
.
timedelta
(
days
=
7
)
delta10
=
datetime
.
timedelta
(
days
=
10
)
delta15
=
datetime
.
timedelta
(
days
=
15
)
yestoday1
=
(
now
-
delta1
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
yestoday3
=
(
now
-
delta3
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
yestoday5
=
(
now
-
delta5
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
yestoday7
=
(
now
-
delta7
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
yestoday10
=
(
now
-
delta10
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
yestoday15
=
(
now
-
delta15
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
try
:
# 历史无数据---------------------------------------------------
# 解出广告位-广告维度预估cvr
nolunch_pre_cvr
=
pd
.
read_table
(
r'slot_ad_stat_cvr.txt'
)
nolunch_pre_cvr
.
columns
=
[
'c'
]
a
=
nolunch_pre_cvr
[
'c'
]
.
map
(
lambda
x
:
x
.
replace
(
'{'
,
''
)
.
replace
(
'}'
,
''
)
.
split
(
' '
))
# a=nolunch_pre_cvr['c'].map(lambda x:x.replace('=',':'))
l
=
[]
for
i
in
a
:
l
.
extend
(
i
)
ll
=
[
x
[:
-
1
]
.
split
(
'='
)
for
x
in
l
]
ll
=
np
.
array
(
ll
)
pre_slotad_stat_cvr
=
pd
.
DataFrame
()
b
=
[
s
.
split
(
','
)
for
s
in
ll
[:,
0
]]
bb
=
np
.
array
(
b
)
pre_slotad_stat_cvr
[
'slotid'
]
=
bb
[:,
0
]
pre_slotad_stat_cvr
[
'advert_id'
]
=
bb
[:,
1
]
pre_slotad_stat_cvr
[
'cvr'
]
=
ll
[:,
1
]
# pre_slotad_stat_cvr.head()
pre_slotad_stat_cvr
[
'cvr'
]
=
pre_slotad_stat_cvr
[
'cvr'
]
.
astype
(
'float'
)
# 匹配广告行业
sql
=
'''select id,
case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
when length(match_tag_nums)=22 then substr(match_tag_nums,13)
else match_tag_nums end match_tag_nums
from advert.dwd_advert_df
where dt='{0}' and length(match_tag_nums) in (10,16,22) '''
.
format
(
yestoday1
)
cursor
.
execute
(
sql
)
advert_trid
=
pd
.
DataFrame
(
cursor
.
fetchall
())
advert_trid
.
columns
=
[
'advert_id'
,
'match_tag_nums'
]
advert_trid
[
'advert_id'
]
=
advert_trid
[
'advert_id'
]
.
astype
(
'str'
)
pre_slotad_stat_cvr
=
pd
.
merge
(
pre_slotad_stat_cvr
,
advert_trid
,
how
=
'left'
,
on
=
[
'advert_id'
])
# 纠偏
# 1 历史数据行业+广告位维度预估值和统计值偏差,来纠偏
# sql = '''
# select match_tag_nums,app_id,avg(pre_cvr)/avg(stat_cvr) pre_diff from
# (select advert_id,app_id,pre_cvr,stat_cvr
# from logs.dwd_nezha_result_log_di
# where dt>='{0}' and dt<='{1}' and order_id is not null ) p1
# left outer join
# (select id,case when length(match_tag_nums)>10 then substr(match_tag_nums,7)
# else match_tag_nums end match_tag_nums
# from advert.dwd_advert_df where dt>='{0}' and dt<='{1}') p2
# on p1.advert_id=p2.id
# group by match_tag_nums,app_id
# '''.format(yestoday7,yestoday1)
# cursor.execute(sql)
# trid_slot_pre_diff = pd.DataFrame(cursor.fetchall())
# trid_slot_pre_diff.columns = ['match_tag_nums', 'slotid', 'trid_slot_diff']
# # trid_slot_pre_diff=trid_slot_pre_diff.ix[trid_slot_pre_diff['trid_slot_diff']>2]
# trid_slot_pre_diff['slotid'] = trid_slot_pre_diff['slotid'].fillna(value=-11).astype('int').astype('str')
# 2 历史数据广告维度预估值和统计值偏差纠偏
sql
=
'''
select advert_id,avg(pre_cvr)/avg(stat_cvr) pre_diff
from logs.dwd_nezha_result_log_di
where dt>='{0}' and dt<='{1}' and order_id is not null
group by advert_id
'''
.
format
(
yestoday3
,
yestoday1
)
cursor
.
execute
(
sql
)
ad_pre_diff
=
pd
.
DataFrame
(
cursor
.
fetchall
())
ad_pre_diff
.
columns
=
[
'advert_id'
,
'ad_diff'
]
# ad_pre_diff=ad_pre_diff.ix[ad_pre_diff['ad_diff']>2]
ad_pre_diff
[
'advert_id'
]
=
ad_pre_diff
[
'advert_id'
]
.
astype
(
'str'
)
# 预估cvr纠偏############
#pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, trid_slot_pre_diff, how='left', on=['slotid', 'match_tag_nums'])
pre_slotad_stat_cvr
=
pd
.
merge
(
pre_slotad_stat_cvr
,
ad_pre_diff
,
how
=
'left'
,
on
=
[
'advert_id'
])
# pre_slotad_stat_cvr.ix[pd.isnull(pre_slotad_stat_cvr['trid_slot_diff']), 'trid_slot_diff'] = pre_slotad_stat_cvr.ix[
# pd.isnull(pre_slotad_stat_cvr['trid_slot_diff']), 'ad_diff']
# pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['trid_slot_diff'] > 2, 'fix_cvr'] = \
# pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['trid_slot_diff'] > 2, 'cvr'] / pre_slotad_stat_cvr.ix[
# pre_slotad_stat_cvr['trid_slot_diff'] > 2, 'trid_slot_diff']
pre_slotad_stat_cvr
.
ix
[
pre_slotad_stat_cvr
[
'ad_diff'
]
>
1.2
,
'fix_cvr'
]
=
\
pre_slotad_stat_cvr
.
ix
[
pre_slotad_stat_cvr
[
'ad_diff'
]
>
1.2
,
'cvr'
]
/
pre_slotad_stat_cvr
.
ix
[
pre_slotad_stat_cvr
[
'ad_diff'
]
>
1.2
,
'ad_diff'
]
pre_slotad_stat_cvr
.
ix
[
pd
.
isnull
(
pre_slotad_stat_cvr
[
'fix_cvr'
]),
'fix_cvr'
]
=
pre_slotad_stat_cvr
.
ix
[
pd
.
isnull
(
pre_slotad_stat_cvr
[
'fix_cvr'
]),
'cvr'
]
#####匹配行业+slot出价
sql_fee1
=
'''
select match_tag_nums,slotid,avg(fee) fee from
(select case when length(match_tag_nums)>10 then substr(match_tag_nums,7)
else match_tag_nums end match_tag_nums,
slotid,fee
from advert.dws_advert_order_wide_v4_level_3_di
where dt="{0}") a
group by match_tag_nums,slotid
'''
.
format
(
yestoday1
)
cursor
.
execute
(
sql_fee1
)
tride_slot_fee
=
pd
.
DataFrame
(
cursor
.
fetchall
())
tride_slot_fee
.
columns
=
[
'match_tag_nums'
,
'slotid'
,
'm_s_fee'
]
tride_slot_fee
[
'slotid'
]
=
tride_slot_fee
[
'slotid'
]
.
astype
(
'str'
)
.
map
(
lambda
x
:
x
.
replace
(
'.0'
,
''
))
pre_slotad_stat_cvr
=
pd
.
merge
(
pre_slotad_stat_cvr
,
tride_slot_fee
,
on
=
[
'match_tag_nums'
,
'slotid'
],
how
=
'left'
)
#####匹配广告出价
sql_fee2
=
'''
select advert_id,avg(fee) fee
from advert.dws_advert_order_wide_v4_level_3_di
where dt="{0}"
group by advert_id
'''
.
format
(
yestoday1
)
cursor
.
execute
(
sql_fee2
)
advert_fee
=
pd
.
DataFrame
(
cursor
.
fetchall
())
advert_fee
.
columns
=
[
'advert_id'
,
'ad_fee'
]
advert_fee
[
'advert_id'
]
=
advert_fee
[
'advert_id'
]
.
astype
(
'str'
)
pre_slotad_stat_cvr
=
pd
.
merge
(
pre_slotad_stat_cvr
,
advert_fee
,
on
=
[
'advert_id'
],
how
=
'left'
)
pre_slotad_stat_cvr
[
'pre_launch_r'
]
=
pre_slotad_stat_cvr
[
'ad_fee'
]
/
pre_slotad_stat_cvr
[
'm_s_fee'
]
####筛选预估可以发券的组合
pre_slotad_cvr
=
pre_slotad_stat_cvr
.
ix
[(
pre_slotad_stat_cvr
[
'pre_launch_r'
]
>
1
)
&
(
pre_slotad_stat_cvr
[
'cvr'
]
>
0.01
)]
pre_slotad_cvr
=
pre_slotad_cvr
[[
'slotid'
,
'advert_id'
,
'fix_cvr'
,
'ad_diff'
]]
pre_slotad_cvr
[
'confidence'
]
=
0
pre_slotad_cvr
.
columns
=
[
'slotid'
,
'advert_id'
,
'cvr'
,
'bias'
,
'confidence'
]
pre_slotad_cvr_good
=
pre_slotad_cvr
.
ix
[
pre_slotad_cvr
[
'bias'
]
<
1.1
]
pre_slotad_cvr_good
=
pre_slotad_cvr_good
.
sort_index
(
by
=
[
'bias'
])
#pre_slotad_cvr_good.groupby('advert_id').size()
pre_slotad_cvr_good
=
pre_slotad_cvr_good
.
groupby
(
'advert_id'
)
.
head
(
100
)
pre_slotad_cvr_good
[[
'slotid'
,
'advert_id'
]]
=
pre_slotad_cvr_good
[[
'slotid'
,
'advert_id'
]]
.
astype
(
'str'
)
pre_slotad_cvr_good
[
'key'
]
=
"NZ_K76_"
+
pre_slotad_cvr_good
[
'slotid'
]
+
"_"
+
pre_slotad_cvr_good
[
'advert_id'
]
pre_slotad_cvr_good
[
'value'
]
=
pre_slotad_cvr_good
[[
'cvr'
,
'bias'
,
'confidence'
]]
.
apply
(
lambda
x
:
x
.
to_json
(
orient
=
'index'
),
axis
=
1
)
pre_slotad_cvr_good
.
index
=
range
(
pre_slotad_cvr_good
.
shape
[
0
])
# 连接nezha-redis
pool
=
redis
.
ConnectionPool
(
host
=
'r-bp18da0abeaddc94285.redis.rds.aliyuncs.com'
,
password
=
'hteK73Zxx3ji9LGCy2jBAZDJ6'
,
port
=
6379
,
db
=
0
)
r
=
redis
.
Redis
(
connection_pool
=
pool
)
pipe
=
r
.
pipeline
(
transaction
=
True
)
#先删除昨日候选集
print
(
'pre_slotad_cvr_good-----'
)
pre_slotad_cvr_good_old
=
pd
.
read_csv
(
'pre_slotad_cvr_good.csv'
)
pre_slotad_cvr_good_old
.
to_csv
(
'pre_slotad_cvr_good_old.csv'
,
index
=
False
)
for
i
in
pre_slotad_cvr_good_old
.
index
:
key
=
pre_slotad_cvr_good_old
.
ix
[
i
,
'key'
]
value
=
pre_slotad_cvr_good_old
.
ix
[
i
,
'value'
]
pipe
.
set
(
key
,
value
,
ex
=
200
)
if
i
%
5000
==
0
:
pipe
.
execute
()
print
(
i
)
time
.
sleep
(
0.5
)
pipe
.
execute
()
# 转化成key: NZ_K76_slotId_advertId, value:json cvr&预估偏差&置信度
for
i
in
pre_slotad_cvr_good
.
index
:
key
=
pre_slotad_cvr_good
.
ix
[
i
,
'key'
]
value
=
pre_slotad_cvr_good
.
ix
[
i
,
'value'
]
pipe
.
set
(
key
,
value
,
ex
=
432000
)
if
i
%
5000
==
0
:
pipe
.
execute
()
print
(
i
)
time
.
sleep
(
0.5
)
pipe
.
execute
()
pre_slotad_cvr_good
.
to_csv
(
'pre_slotad_cvr_good.csv'
,
index
=
False
)
except
:
xiaoding
.
send_text
(
msg
=
'候选集(无历史数据部分存储)程序异常!!!请排查!'
,
at_mobiles
=
at_mobiles
)
else
:
xiaoding
.
send_text
(
msg
=
'候选集(无历史数据部分)存储成功'
,
at_mobiles
=
at_mobiles
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment