1

f1f0bbbf · mxx · 18dbee01 · f1f0bbbf · f1f0bbbf · f1f0bbbf
Commit f1f0bbbf authored Oct 29, 2018 by mxx
Showing with 85 additions and 166 deletions

candidate_set.py auto-spread/auto_manage/candidate_set.py +52 -133

candidate_set_temp.py auto-spread/auto_manage/candidate_set_temp.py +5 -5

params.py auto-spread/auto_manage/params.py +28 -28

No files found.
--- a/auto-spread/auto_manage/candidate_set.py
+++ b/auto-spread/auto_manage/candidate_set.py
@@ -30,136 +30,56 @@ yestoday15 = (now - delta15).strftime('%Y-%m-%d')

 # 历史无数据---------------------------------------------------
 # 解出广告位-广告维度预估cvr
-nolunch_pre_cvr = pd.read_table(r'slot_ad_cvr.txt')
-nolunch_pre_cvr.columns = ['c']
-a = nolunch_pre_cvr['c'].map(lambda x: x.replace('{', '').replace('}', '').split(' '))
-# a=nolunch_pre_cvr['c'].map(lambda x:x.replace('=',':'))
-
-l = []
-for i in a:
-    l.extend(i)
-
-
-ll = [x[:-1].split('=') for x in l]
-ll = np.array(ll)
-pre_slotad_stat_cvr = pd.DataFrame()
-b = [s.split(',') for s in ll[:, 0]]
-bb = np.array(b)
-pre_slotad_stat_cvr['slotid'] = bb[:, 0]
-pre_slotad_stat_cvr['advert_id'] = bb[:, 1]
-pre_slotad_stat_cvr['cvr'] = ll[:, 1]
-# pre_slotad_stat_cvr.head()
-pre_slotad_stat_cvr['cvr'] = pre_slotad_stat_cvr['cvr'].astype('float')
-
-# 匹配广告行业
-sql = '''select id,
-case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
-     when length(match_tag_nums)=22 then substr(match_tag_nums,13)
-     else match_tag_nums end match_tag_nums
-from advert.dwd_advert_df
-where dt='{0}' and length(match_tag_nums) in (10,16,22) '''.format(yestoday1)
-cursor.execute(sql)
-advert_trid = pd.DataFrame(cursor.fetchall())
-advert_trid.columns = ['advert_id', 'match_tag_nums']
-advert_trid['advert_id'] = advert_trid['advert_id'].astype('str')
-pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, advert_trid, how='left', on=['advert_id'])
-
-# 纠偏
-# 1 历史数据行业+广告位维度预估值和统计值偏差，来纠偏
+nolunch_pre_cvr = pd.read_table(r'slot_ad_cvr.txt',sep=',')
+nolunch_pre_cvr.columns = ['slotad','ctr','cvr']
+nolunch_pre_cvr[['slotid','advert_id']]=nolunch_pre_cvr['slotad'].str.split('_',expand=True)
+
+#
+# # 匹配广告行业
+# sql = '''select id,
+# case when length(match_tag_nums)=16 then substr(match_tag_nums,7)
+#      when length(match_tag_nums)=22 then substr(match_tag_nums,13)
+#      else match_tag_nums end match_tag_nums
+# from advert.dwd_advert_df
+# where dt='{0}' and length(match_tag_nums) in (10,16,22) '''.format(yestoday1)
+# cursor.execute(sql)
+# advert_trid = pd.DataFrame(cursor.fetchall())
+# advert_trid.columns = ['advert_id', 'match_tag_nums']
+# advert_trid['advert_id'] = advert_trid['advert_id'].astype('str')
+# pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, advert_trid, how='left', on=['advert_id'])
+#
+# # 纠偏
+# # 2 历史数据广告维度预估值和统计值偏差纠偏
 # sql = '''
-# select match_tag_nums,app_id,avg(pre_cvr)/avg(stat_cvr) pre_diff from
-# (select advert_id,app_id,pre_cvr,stat_cvr
+# select advert_id,avg(pre_cvr)/avg(stat_cvr) pre_diff
 # from logs.dwd_nezha_result_log_di
-# where dt>='{0}' and dt<='{1}' and order_id is not null ) p1
-# left outer join
-# (select id,case when length(match_tag_nums)>10 then substr(match_tag_nums,7)
-#            else match_tag_nums end  match_tag_nums
-#  from advert.dwd_advert_df where dt>='{0}' and dt<='{1}') p2
-# on p1.advert_id=p2.id
-# group by match_tag_nums,app_id
+# where dt>='{0}' and dt<='{1}' and order_id is not null
+# group by advert_id
 # '''.format(yestoday7,yestoday1)
 # cursor.execute(sql)
-# trid_slot_pre_diff = pd.DataFrame(cursor.fetchall())
-# trid_slot_pre_diff.columns = ['match_tag_nums', 'slotid', 'trid_slot_diff']
-# # trid_slot_pre_diff=trid_slot_pre_diff.ix[trid_slot_pre_diff['trid_slot_diff']>2]
-# trid_slot_pre_diff['slotid'] = trid_slot_pre_diff['slotid'].fillna(value=-11).astype('int').astype('str')
-
-# 2 历史数据广告维度预估值和统计值偏差纠偏
-sql = '''
-select advert_id,avg(pre_cvr)/avg(stat_cvr) pre_diff
-from logs.dwd_nezha_result_log_di
-where dt>='{0}' and dt<='{1}' and order_id is not null
-group by advert_id
-'''.format(yestoday3,yestoday1)
-cursor.execute(sql)
-ad_pre_diff = pd.DataFrame(cursor.fetchall())
-ad_pre_diff.columns = ['advert_id', 'ad_diff']
-# ad_pre_diff=ad_pre_diff.ix[ad_pre_diff['ad_diff']>2]
-ad_pre_diff['advert_id'] = ad_pre_diff['advert_id'].astype('str')
-
-# 预估cvr纠偏############
-#pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, trid_slot_pre_diff, how='left', on=['slotid', 'match_tag_nums'])
-pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, ad_pre_diff, how='left', on=['advert_id'])
-
-# pre_slotad_stat_cvr.ix[pd.isnull(pre_slotad_stat_cvr['trid_slot_diff']), 'trid_slot_diff'] = pre_slotad_stat_cvr.ix[
-#     pd.isnull(pre_slotad_stat_cvr['trid_slot_diff']), 'ad_diff']
-
-# pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['trid_slot_diff'] > 2, 'fix_cvr'] = \
-#     pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['trid_slot_diff'] > 2, 'cvr'] / pre_slotad_stat_cvr.ix[
-#         pre_slotad_stat_cvr['trid_slot_diff'] > 2, 'trid_slot_diff']
-
-pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['ad_diff'] > 1.2, 'fix_cvr'] = \
-    pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['ad_diff'] > 1.2, 'cvr'] / pre_slotad_stat_cvr.ix[
-        pre_slotad_stat_cvr['ad_diff'] > 1.2, 'ad_diff']
-
-pre_slotad_stat_cvr.ix[pd.isnull(pre_slotad_stat_cvr['fix_cvr']), 'fix_cvr'] = pre_slotad_stat_cvr.ix[
-    pd.isnull(pre_slotad_stat_cvr['fix_cvr']), 'cvr']
-
-#####匹配行业+slot出价
-sql_fee1='''
-select match_tag_nums,slotid,avg(fee) fee from 
-(select case when length(match_tag_nums)>10 then substr(match_tag_nums,7)
-           else match_tag_nums end  match_tag_nums,
-           slotid,fee 
-from advert.dws_advert_order_wide_v4_level_3_di
-where  dt="{0}") a
-group by match_tag_nums,slotid 
-'''.format(yestoday1)
-cursor.execute(sql_fee1)
-tride_slot_fee = pd.DataFrame(cursor.fetchall())
-tride_slot_fee.columns = ['match_tag_nums','slotid','m_s_fee']
-tride_slot_fee['slotid']=tride_slot_fee['slotid'].astype('str').map(lambda x:x.replace('.0',''))
-pre_slotad_stat_cvr=pd.merge(pre_slotad_stat_cvr,tride_slot_fee,on=['match_tag_nums','slotid'],how='left')
-
-#####匹配广告出价
-sql_fee2='''
-select advert_id,avg(fee) fee
-from advert.dws_advert_order_wide_v4_level_3_di 
-where dt="{0}" 
-group by advert_id
-'''.format(yestoday1)
-cursor.execute(sql_fee2)
-advert_fee = pd.DataFrame(cursor.fetchall())
-advert_fee.columns = ['advert_id','ad_fee']
-advert_fee['advert_id']=advert_fee['advert_id'].astype('str')
-pre_slotad_stat_cvr=pd.merge(pre_slotad_stat_cvr,advert_fee,on=['advert_id'],how='left')
-pre_slotad_stat_cvr['pre_launch_r']=pre_slotad_stat_cvr['ad_fee']/pre_slotad_stat_cvr['m_s_fee']
-
-####筛选预估可以发券的组合
-pre_slotad_cvr=pre_slotad_stat_cvr.ix[(pre_slotad_stat_cvr['pre_launch_r']>1) & (pre_slotad_stat_cvr['cvr']>0.01)]
-pre_slotad_cvr = pre_slotad_cvr[['slotid', 'advert_id', 'fix_cvr', 'ad_diff']]
-pre_slotad_cvr['confidence'] = 0
-pre_slotad_cvr.columns = ['slotid', 'advert_id', 'cvr', 'bias','confidence']
-
-pre_slotad_cvr_good=pre_slotad_cvr.ix[pre_slotad_cvr['bias']<1.1]
-pre_slotad_cvr_good=pre_slotad_cvr_good.sort_index(by=['bias'])
-#pre_slotad_cvr_good.groupby('advert_id').size()
-pre_slotad_cvr_good=pre_slotad_cvr_good.groupby('advert_id').head(20)
-
-pre_slotad_cvr_good[['slotid', 'advert_id']]=pre_slotad_cvr_good[['slotid', 'advert_id']].astype('str')
-
-pre_slotad_cvr_good['key'] = "NZ_K76_" + pre_slotad_cvr_good['slotid'] + "_" + pre_slotad_cvr_good['advert_id']
-pre_slotad_cvr_good['value'] = pre_slotad_cvr_good[['cvr', 'bias', 'confidence']].apply(lambda x: x.to_json(orient='index'), axis=1)
+# ad_pre_diff = pd.DataFrame(cursor.fetchall())
+# ad_pre_diff.columns = ['advert_id', 'ad_diff']
+# ad_pre_diff['advert_id'] = ad_pre_diff['advert_id'].astype('str')
+#
+# # 预估cvr纠偏############
+# pre_slotad_stat_cvr = pd.merge(pre_slotad_stat_cvr, ad_pre_diff, how='left', on=['advert_id'])
+#
+# pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['ad_diff'] > 1.2, 'fix_cvr'] = \
+#     pre_slotad_stat_cvr.ix[pre_slotad_stat_cvr['ad_diff'] > 1.2, 'cvr'] / pre_slotad_stat_cvr.ix[
+#         pre_slotad_stat_cvr['ad_diff'] > 1.2, 'ad_diff']
+#
+# pre_slotad_stat_cvr.ix[pd.isnull(pre_slotad_stat_cvr['fix_cvr']), 'fix_cvr'] = pre_slotad_stat_cvr.ix[
+#     pd.isnull(pre_slotad_stat_cvr['fix_cvr']), 'cvr']
+
+nolunch_pre_cvr=nolunch_pre_cvr.sort_index(by=['cvr'],ascending=False)
+pre_slotad_cvr_good=nolunch_pre_cvr.groupby('advert_id').head(500)
+pre_slotad_cvr_good['cvrSet']=pre_slotad_cvr_good['cvr'].map(lambda x:[round(x,6)]*3)
+pre_slotad_cvr_good['biasSet']=[[-1.0,-1.0,-1.0]]*pre_slotad_cvr_good.shape[0]
+pre_slotad_cvr_good['confidenceSet']=[[0.0,0.0,0.0]]*pre_slotad_cvr_good.shape[0]
+pre_slotad_cvr_good['priceSection']=[[0.0,0.0]]*pre_slotad_cvr_good.shape[0]
+
+pre_slotad_cvr_good['key'] = "NZ_K076_" + pre_slotad_cvr_good['slotad']
+pre_slotad_cvr_good['value'] = pre_slotad_cvr_good[[ 'cvrSet', 'biasSet', 'confidenceSet','priceSection']].apply(lambda x: x.to_json(orient='index'), axis=1)
 pre_slotad_cvr_good.index = range(pre_slotad_cvr_good.shape[0])


@@ -172,9 +92,9 @@ pipe = r.pipeline(transaction=True)


 #先删除昨日候选集
-print('pre_slotad_cvr_good-----')
-pre_slotad_cvr_good_old=pd.read_csv('pre_slotad_cvr_good.csv')
-pre_slotad_cvr_good_old.to_csv('pre_slotad_cvr_good_old.csv', index=False)
+print('pre_slotad_cvr_good2-----')
+pre_slotad_cvr_good_old=pd.read_csv('pre_slotad_cvr_good2.csv')
+pre_slotad_cvr_good_old.to_csv('pre_slotad_cvr_good_old2.csv', index=False)
 for i in pre_slotad_cvr_good_old.index:
    key = pre_slotad_cvr_good_old.ix[i, 'key']
    value = pre_slotad_cvr_good_old.ix[i, 'value']
@@ -197,7 +117,7 @@ for i in pre_slotad_cvr_good.index:

 pipe.execute()

-pre_slotad_cvr_good.to_csv('pre_slotad_cvr_good.csv', index=False)
+pre_slotad_cvr_good.to_csv('pre_slotad_cvr_good2.csv', index=False)



@@ -205,7 +125,6 @@ pre_slotad_cvr_good.to_csv('pre_slotad_cvr_good.csv', index=False)
 ##--------------------------------------------------------------------------------------
 ###----广告粒度历史数据
 #广告高中低出价预测发券 统计cvr, bias
-
 sql_ad_fee0 = '''
 select app_id,slotid,advert_id,
 sum(charge_fees) cost,
@@ -215,7 +134,7 @@ avg(pre_cvr) pre_cvr,
 avg(pre_cvr)/(sum(act_click_cnt)/sum(charge_cnt)) bias ,
 avg(b.fee0) fee0
 from
-(select * from advert.dws_advert_order_wide_v4_level_3_di where dt>='{0}' and dt<='{1}') a
+(select * from advert.dws_advert_order_wide_v4_level_6_di where dt>='{0}' and dt<='{1}') a
 left outer join 
 (select advert_id,percentile(fee,0.33) fee0
 from advert.dws_advert_order_wide_v4_level_3_di 
@@ -370,7 +289,7 @@ stat_slotad_cvr_good_old.to_csv('stat_slotad_cvr_good_old2.csv', index=False)
 for i in stat_slotad_cvr_good_old.index:
    key = stat_slotad_cvr_good_old.ix[i, 'key']
    value = stat_slotad_cvr_good_old.ix[i, 'value']
-    pipe.set(key, value, ex=200)
+    pipe.set(key, value, ex=600)
    if i % 2000 == 0:
        pipe.execute()
        print(i)

--- a/auto-spread/auto_manage/candidate_set_temp.py
+++ b/auto-spread/auto_manage/candidate_set_temp.py
@@ -179,7 +179,7 @@ for i in pre_slotad_cvr_good_old.index:
    key = pre_slotad_cvr_good_old.ix[i, 'key']
    value = pre_slotad_cvr_good_old.ix[i, 'value']
    pipe.set(key, value, ex=200)
-    if i % 2000 == 0:
+    if i % 5000 == 0:
        pipe.execute()
        print(i)
        time.sleep(0.5)
@@ -189,8 +189,8 @@ pipe.execute()
 for i in pre_slotad_cvr_good.index:
    key = pre_slotad_cvr_good.ix[i, 'key']
    value = pre_slotad_cvr_good.ix[i, 'value']
-    pipe.set(key, value, ex=90000)
-    if i % 2000 == 0:
+    pipe.set(key, value, ex=432000)
+    if i % 5000 == 0:
        pipe.execute()
        print(i)
        time.sleep(0.5)
@@ -288,7 +288,7 @@ stat_slotad_cvr_good_old.to_csv('stat_slotad_cvr_good_old.csv', index=False)
 for i in stat_slotad_cvr_good_old.index:
    key = stat_slotad_cvr_good_old.ix[i, 'key']
    value = stat_slotad_cvr_good_old.ix[i, 'value']
-    pipe.set(key, value, ex=200)
+    pipe.set(key, value, ex=600)
    if i % 2000 == 0:
        pipe.execute()
        print(i)
@@ -299,7 +299,7 @@ pipe.execute()
 for i in stat_slotad_cvr_good.index:
    key = stat_slotad_cvr_good.ix[i, 'key']
    value = stat_slotad_cvr_good.ix[i, 'value']
-    pipe.set(key, value, ex=187200)
+    pipe.set(key, value, ex=432000)
    if i % 5000 == 0:
        pipe.execute()
        print(i)

--- a/auto-spread/auto_manage/params.py
+++ b/auto-spread/auto_manage/params.py
 import redis
 import json

-
 params_dict={
-#recommend方法
-'startFacter':0.5,'cpaBiasRatioFacter':1.0,
-'cpaOrientRatioFacter':0.5,
-'cpaBiasThresholdFacter':2.0,
-'cpcTargetRatioFacter':0.5,
-'cpcOrientRatioFacter':0.01,
-'cpcBiasThresholdFacter':1.0,
-
-#熔断
-'fuseOrientCostG1dFacter':50000.0,
-'fuseOrientCostConvertbiasFacter':2.0,
-#白名单参数
-'wSlotOrientationConfidenceFacter':0.2,
-'wSlotOrientationCostConvertBiasFacter':1.2,
-#高置信黑名单参数
-'bOrientConfidenceFacter1':1.0,
-'bOrientCostConvertbiasFacter1':1.2,
-'bSlotOrientationConfidenceFacter1':0.5,
-'bSlotOrientationCostConvertBiasFacter1':3.5,
-#低置信黑名单参数
-'bOrientConfidenceFacter2':1.0,
-'bOrientCostConvertbiasFacter2':1.5,
-'bSlotOrientationConfidenceFacter2':0.5,
-'bSlotOrientRadioFacter2':2.0,
+    "feedBackParams": {
+        "bOrientConfidenceFactor1": 1,
+        "bOrientConfidenceFactor2": 1,
+        "bOrientCostConvertBiasFactor1": 1.2,
+        "bOrientCostConvertBiasFactor2": 1.5,
+        "bSlotOrientRadioFactor2": 2,
+        "bSlotOrientationConfidenceFactor1": 0.5,
+        "bSlotOrientationConfidenceFactor2": 0.5,
+        "bSlotOrientationCostConvertBiasFactor1": 3.5,
+        "fuseOrientCostConvertBiasFactor": 2,
+        "fuseOrientCostG1dFactor": 50000,
+        "wSlotOrientationConfidenceFactor": 0.2,
+        "wSlotOrientationCostConvertBiasFactor": 1.2
+    },
+    "slotRecommendParams": {
+        "cpaBiasRatioFactor": 1,
+        "cpaBiasThresholdFactor": 2,
+        "cpaOrientRatioFactor": 0.6,
+        "cpcBiasThresholdFactor": 1,
+        "cpcOrientRatioFactor": 0.1,
+        "cpcTargetRatioFactor": 0.5,
+        "startFactor": 0.5
+    }
 }

-
-params_key="NZ_K??_auto_manage_params"
+params_key="NZ_K86_trusteeship_params"
 params_value=json.dumps(params_dict)

+pool = redis.ConnectionPool(host='r-bp18da0abeaddc94285.redis.rds.aliyuncs.com',
+                            password='hteK73Zxx3ji9LGCy2jBAZDJ6', port=6379, db=0)

-
+r = redis.Redis(connection_pool=pool)
+r.set(params_key,params_value)
\ No newline at end of file