Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
hive-udf
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
黄贵华
hive-udf
Commits
9a1c87f7
Commit
9a1c87f7
authored
Feb 23, 2021
by
黄贵华
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
udf
parents
Changes
23
Show whitespace changes
Inline
Side-by-side
Showing
23 changed files
with
1027 additions
and
0 deletions
+1027
-0
.gitignore
.gitignore
+14
-0
README.md
README.md
+5
-0
build.gradle
build.gradle
+63
-0
settings.gradle
settings.gradle
+2
-0
AvailVectorUDF.java
src/main/java/cn/com/duiba/udf/AvailVectorUDF.java
+49
-0
CheckASCII.java
src/main/java/cn/com/duiba/udf/CheckASCII.java
+26
-0
DuplicateRemove.java
src/main/java/cn/com/duiba/udf/DuplicateRemove.java
+42
-0
GetUrlPara.java
src/main/java/cn/com/duiba/udf/GetUrlPara.java
+38
-0
HbaseRowkeyUDF.java
src/main/java/cn/com/duiba/udf/HbaseRowkeyUDF.java
+15
-0
IpConvertLong2.java
src/main/java/cn/com/duiba/udf/IpConvertLong2.java
+29
-0
IsDxmV5.java
src/main/java/cn/com/duiba/udf/IsDxmV5.java
+47
-0
IsImei.java
src/main/java/cn/com/duiba/udf/IsImei.java
+51
-0
LongConvertIp.java
src/main/java/cn/com/duiba/udf/LongConvertIp.java
+25
-0
MtDecrypt.java
src/main/java/cn/com/duiba/udf/MtDecrypt.java
+133
-0
MtEncrypt.java
src/main/java/cn/com/duiba/udf/MtEncrypt.java
+132
-0
ParseUaUDF.java
src/main/java/cn/com/duiba/udf/ParseUaUDF.java
+60
-0
StringDiffUDF.java
src/main/java/cn/com/duiba/udf/StringDiffUDF.java
+38
-0
StringIntersectionUDF.java
src/main/java/cn/com/duiba/udf/StringIntersectionUDF.java
+30
-0
TimeDiff.java
src/main/java/cn/com/duiba/udf/TimeDiff.java
+51
-0
UrlDecode.java
src/main/java/cn/com/duiba/udf/UrlDecode.java
+20
-0
VaildClickCount2.java
src/main/java/cn/com/duiba/udf/VaildClickCount2.java
+44
-0
VectorAvgUDF.java
src/main/java/cn/com/duiba/udf/VectorAvgUDF.java
+59
-0
GetJSONArrayUDTF.java
src/main/java/cn/com/duiba/udtf/GetJSONArrayUDTF.java
+54
-0
No files found.
.gitignore
0 → 100644
View file @
9a1c87f7
/target
.settings
.gradle
.idea
/gradle
/bin/
/build/
/out/
*.iml
gradlew
gradlew.bat
.classpath
.project
README.md
0 → 100644
View file @
9a1c87f7
负责人:徐贵锋
项目简介:
hive自定义函数库
\ No newline at end of file
build.gradle
0 → 100644
View file @
9a1c87f7
buildscript
{
repositories
{
mavenLocal
()
maven
{
url
"http://nexus.dui88.com:8081/nexus/content/groups/public/"
}
mavenCentral
()
jcenter
()
}
dependencies
{
classpath
(
"com.github.jengelman.gradle.plugins:shadow:2.0.1"
)
}
}
apply
plugin:
'java'
apply
plugin:
'idea'
apply
plugin:
'maven'
apply
plugin:
'com.github.johnrengelman.shadow'
group
'cn.com.duiba'
version
'1.0-SNAPSHOT'
sourceCompatibility
=
1.8
targetCompatibility
=
1.8
repositories
{
mavenLocal
()
maven
{
url
"http://nexus.dui88.com:8081/nexus/content/groups/public/"
}
maven
{
url
'http://repository.cloudera.com/artifactory/cloudera-repos'
}
mavenCentral
()
}
dependencies
{
testCompile
group:
'junit'
,
name:
'junit'
,
version:
'4.12'
compile
(
'com.alibaba:fastjson:1.2.58'
)
compile
(
'ch.qos.logback:logback-classic:1.2.3'
)
compile
(
'org.slf4j:slf4j-api:1.7.25'
)
compile
(
'cn.com.duiba:bigdata-common:0.0.18'
)
{
transitive
=
false
}
compile
(
'com.google.guava:guava:18.0'
)
{
transitive
=
false
}
// compileOnly('org.apache.hive:hive-exec:1.1.0-cdh5.14.0')
// compile group: 'org.apache.hive', name: 'hive-exec', version: '1.1.0'
compileOnly
(
'org.apache.hive:hive-exec:1.1.0'
)
// compile group: 'cz.mallat.uasparser', name: 'uasparser', version: '0.6.2'
compile
group:
'nl.basjes.parse.useragent'
,
name:
'yauaa'
,
version:
'5.21'
}
configurations
{
all
*.
exclude
group:
'log4j'
,
module:
'log4j'
all
*.
exclude
group:
'org.slf4j'
,
module:
'slf4j-log4j12'
}
//shadowJar {
// baseName = project.name
// version = '0.1-SNAPSHOT'
// classifier = null
// configurations = [project.configurations.compile]
//}
tasks
.
withType
(
JavaCompile
)
{
options
.
encoding
=
"UTF-8"
}
//build.dependsOn shadowJar
settings.gradle
0 → 100644
View file @
9a1c87f7
rootProject
.
name
=
'hive-udf'
src/main/java/cn/com/duiba/udf/AvailVectorUDF.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
com.google.common.collect.Lists
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.commons.lang3.ArrayUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
import
java.util.*
;
/**
* 获取B中key在A里面的value
*/
public
class
AvailVectorUDF
extends
UDF
{
/**
* @param strA 1,2,3,4
* @param strB 1:2,3,4|2:2,3,5
* @param delimiter |
* @param separator ,
* @param keySep :
* @return 2, 3, 4|2,3,5
*/
public
String
evaluate
(
String
strA
,
String
strB
,
String
delimiter
,
String
separator
,
String
keySep
)
{
if
(
StringUtils
.
isBlank
(
strA
)
||
StringUtils
.
isBlank
(
strB
)
||
StringUtils
.
isBlank
(
separator
))
{
return
null
;
}
String
[]
strASplit
=
StringUtils
.
split
(
strA
,
separator
);
if
(
ArrayUtils
.
isEmpty
(
strASplit
))
{
return
null
;
}
String
[]
strBSplit
=
StringUtils
.
split
(
strB
,
delimiter
);
if
(
ArrayUtils
.
isEmpty
(
strBSplit
))
{
return
null
;
}
List
<
String
>
keyList
=
Arrays
.
asList
(
strASplit
);
List
<
String
>
vectorList
=
Lists
.
newArrayList
();
for
(
String
kv
:
strBSplit
)
{
String
[]
split
=
StringUtils
.
split
(
kv
,
keySep
);
if
(
keyList
.
contains
(
split
[
0
]))
{
vectorList
.
add
(
split
[
1
]);
}
}
return
StringUtils
.
join
(
vectorList
,
delimiter
);
}
}
src/main/java/cn/com/duiba/udf/CheckASCII.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
/**
* Created by ZhihuiGe on 2017/5/25.
*/
public
class
CheckASCII
extends
UDF
{
public
String
evaluate
(
String
str
){
if
(
StringUtils
.
isBlank
(
str
)){
return
str
;
}
for
(
int
i
=
0
;
i
<
str
.
length
();
i
++){
char
c
=
str
.
charAt
(
i
);
if
(
c
>
126
||
c
<
32
){
return
null
;
}
}
return
str
;
}
}
src/main/java/cn/com/duiba/udf/DuplicateRemove.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
org.apache.commons.lang.StringUtils
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
import
java.util.Arrays
;
import
java.util.HashSet
;
import
java.util.Set
;
/**
* Created by clemac on 17/3/7.
*/
public
class
DuplicateRemove
extends
UDF
{
public
String
evaluate
(
String
str1
,
String
str2
,
String
split
)
{
if
((
StringUtils
.
isBlank
(
split
))){
return
null
;
}
if
(
StringUtils
.
isBlank
(
str1
)
&&
StringUtils
.
isBlank
(
str2
))
{
return
null
;
}
else
if
(
StringUtils
.
isBlank
(
str1
)
&&
StringUtils
.
isNotBlank
(
str2
))
{
return
str2
;
}
else
if
(
StringUtils
.
isNotBlank
(
str1
)
&&
StringUtils
.
isBlank
(
str2
))
{
return
str1
;
}
else
{
Set
<
String
>
mySet
=
new
HashSet
<>(
Arrays
.
asList
(
str1
.
split
(
split
)));
mySet
.
addAll
(
Arrays
.
asList
(
str2
.
split
(
split
)));
StringBuilder
csvBuilder
=
new
StringBuilder
();
for
(
String
str
:
mySet
)
{
csvBuilder
.
append
(
str
);
csvBuilder
.
append
(
split
);
}
String
csv
=
csvBuilder
.
toString
();
if
(
csv
.
length
()
-
split
.
length
()
<
0
){
return
csv
;
}
csv
=
csv
.
substring
(
0
,
csv
.
length
()
-
split
.
length
());
return
csv
;
}
}
}
src/main/java/cn/com/duiba/udf/GetUrlPara.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
org.apache.commons.lang.StringUtils
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
/**
* Created by clemac on 17/1/6.
*/
public
class
GetUrlPara
extends
UDF
{
public
String
evaluate
(
String
str
,
String
key
)
{
if
(
StringUtils
.
isBlank
(
str
)){
return
null
;
}
else
{
if
(
str
.
contains
(
"="
)){
String
[]
rs
=
str
.
split
(
"&"
);
for
(
String
tmp
:
rs
){
String
[]
par
=
tmp
.
split
(
"="
);
if
(
par
.
length
>
1
){
if
(
key
.
equals
(
par
[
0
])){
return
par
[
1
];
}
}
}
}
else
{
String
[]
ids
=
str
.
split
(
"/"
);
if
(
ids
.
length
>
1
){
if
(
StringUtils
.
isNumeric
(
ids
[
ids
.
length
-
1
])){
return
ids
[
ids
.
length
-
1
];
}
}
}
return
null
;
}
}
}
\ No newline at end of file
src/main/java/cn/com/duiba/udf/HbaseRowkeyUDF.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
cn.com.duiba.bigdata.common.biz.utils.BigdataUtil
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
/**
* @author xugf 2019-11-28
* 获取hbase中的rowkey主键
*/
public
class
HbaseRowkeyUDF
extends
UDF
{
public
String
evaluate
(
Object
...
strs
)
{
return
BigdataUtil
.
getMD5HbaseRowkey
(
strs
);
}
}
src/main/java/cn/com/duiba/udf/IpConvertLong2.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
/**
* Created by clemac on 17/2/16.
*/
public
class
IpConvertLong2
extends
UDF
{
public
Long
evaluate
(
String
strip
){
if
(
strip
.
split
(
"\\."
).
length
!=
4
){
return
null
;
}
try
{
long
[]
ip
=
new
long
[
4
];
int
position1
=
strip
.
indexOf
(
"."
);
int
position2
=
strip
.
indexOf
(
"."
,
position1
+
1
);
int
position3
=
strip
.
indexOf
(
"."
,
position2
+
1
);
ip
[
0
]=
Long
.
parseLong
(
strip
.
substring
(
0
,
position1
));
ip
[
1
]=
Long
.
parseLong
(
strip
.
substring
(
position1
+
1
,
position2
));
ip
[
2
]=
Long
.
parseLong
(
strip
.
substring
(
position2
+
1
,
position3
));
ip
[
3
]=
Long
.
parseLong
(
strip
.
substring
(
position3
+
1
));
return
(
ip
[
0
]<<
24
)+(
ip
[
1
]<<
16
)+(
ip
[
2
]<<
8
)+
ip
[
3
];
//ip1*256*256*256+ip2*256*256+ip3*256+ip4
}
catch
(
Exception
e
)
{
return
null
;
}
}
}
src/main/java/cn/com/duiba/udf/IsDxmV5.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
org.apache.commons.lang.StringUtils
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
/**
* Created by clemac on 17/2/16.
*/
public
class
IsDxmV5
extends
UDF
{
public
String
evaluate
(
String
str
){
if
(
StringUtils
.
isBlank
(
str
)){
return
str
;
}
int
pointcount
=
0
;
for
(
int
i
=
0
;
i
<
str
.
length
();
i
++){
int
chr
=
str
.
charAt
(
i
);
if
(
chr
==
46
){
// 符号 .
pointcount
++;
continue
;
}
if
(
chr
==
95
){
//符号 _
continue
;
}
if
(
chr
==
45
){
//符号 -
continue
;
}
if
(
chr
>=
48
&&
chr
<=
57
){
// 0-9
continue
;
}
if
(
chr
>=
65
&&
chr
<=
90
){
// a-z
continue
;
}
if
(
chr
>=
97
&&
chr
<=
122
){
//A-Z
continue
;
}
return
null
;
}
if
(
pointcount
==
3
)
{
return
str
;
}
else
{
return
null
;
}
}
}
\ No newline at end of file
src/main/java/cn/com/duiba/udf/IsImei.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
org.apache.commons.lang.StringUtils
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
/**
* Created by clemac on 17/1/6.
*/
public
class
IsImei
extends
UDF
{
public
Boolean
evaluate
(
String
str
)
{
if
(
StringUtils
.
isBlank
(
str
)
||
str
.
length
()
!=
15
||
!
StringUtils
.
isNumeric
(
str
)){
return
false
;
}
else
{
String
data
=
str
.
substring
(
0
,
14
);
int
sum1
=
0
;
int
sum2
=
0
;
for
(
int
i
=
0
;
i
<
data
.
length
();
i
++)
{
int
num
=
data
.
charAt
(
i
)
-
'0'
;
// ascii to num
//System.out.println(num);
/*(1)将奇数位数字相加(从1开始计数)*/
if
(
i
%
2
==
0
)
{
sum1
=
sum1
+
num
;
}
else
{
/*(2)将偶数位数字分别乘以2,分别计算个位数和十位数之和(从1开始计数)*/
int
temp
=
num
*
2
;
if
(
temp
<
10
)
{
sum2
=
sum2
+
temp
;
}
else
{
sum2
=
sum2
+
temp
+
1
-
10
;
}
}
}
int
total
=
sum1
+
sum2
;
/*如果得出的数个位是0则校验位为0,否则为10减去个位数 */
String
resultStr
;
if
(
total
%
10
==
0
)
{
resultStr
=
data
+
"0"
;
}
else
{
resultStr
=
data
+
(
10
-
(
total
%
10
))+
""
;
}
if
(
str
.
equals
(
resultStr
)){
return
true
;
}
else
{
return
false
;
}
}
}
}
\ No newline at end of file
src/main/java/cn/com/duiba/udf/LongConvertIp.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
/**
* Created by ZhihuiGe on 2017/5/27.
*/
public
class
LongConvertIp
extends
UDF
{
public
String
evaluate
(
Long
longIp
)
{
if
(
longIp
==
null
){
return
null
;
}
StringBuffer
sb
=
new
StringBuffer
();
sb
.
append
(
String
.
valueOf
((
longIp
>>>
24
)));
sb
.
append
(
"."
);
sb
.
append
(
String
.
valueOf
((
longIp
&
0x00FFFFFF
)
>>>
16
));
sb
.
append
(
"."
);
sb
.
append
(
String
.
valueOf
((
longIp
&
0x0000FFFF
)
>>>
8
));
sb
.
append
(
"."
);
sb
.
append
(
String
.
valueOf
((
longIp
&
0x000000FF
)));
return
sb
.
toString
();
}
}
src/main/java/cn/com/duiba/udf/MtDecrypt.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
com.google.common.collect.Maps
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
import
javax.crypto.Cipher
;
import
javax.crypto.spec.SecretKeySpec
;
import
java.nio.charset.StandardCharsets
;
import
java.util.Map
;
/**
* Created by clemac on 19/7/15.
*/
public
class
MtDecrypt
extends
UDF
{
/**
* AES加密时的秘钥
*/
private
final
static
String
AES_KEY
=
"waimaiad_aes_key"
;
/**
* AES专用秘钥
*/
private
static
SecretKeySpec
KEY
=
null
;
/**
* key因为在没改变AES_KEY时是不变的,所以进行一次初始化操作
*/
static
{
// 创建AES的秘钥生成器
KEY
=
new
SecretKeySpec
(
AES_KEY
.
getBytes
(),
"AES"
);
}
/**
* 加密文本
*/
public
static
String
encrypt
(
String
content
)
throws
Exception
{
Cipher
cipher
=
Cipher
.
getInstance
(
"AES"
);
byte
[]
byteContent
=
content
.
getBytes
(
StandardCharsets
.
UTF_8
);
cipher
.
init
(
Cipher
.
ENCRYPT_MODE
,
KEY
);
// 进行AES加密
byte
[]
result
=
cipher
.
doFinal
(
byteContent
);
// 将byte数组转为十六进制的字符串
return
parseByte2HexStr
(
result
);
}
/**
* 解码文本
*/
public
static
String
decrypt
(
String
encryptContent
)
throws
Exception
{
// 将十六进制的加密文本转换为byte数组
byte
[]
content
=
parseHexStr2Byte
(
encryptContent
);
if
(
content
==
null
)
{
return
null
;
}
Cipher
cipher
=
Cipher
.
getInstance
(
"AES"
);
cipher
.
init
(
Cipher
.
DECRYPT_MODE
,
KEY
);
byte
[]
result
=
cipher
.
doFinal
(
content
);
// 将二进制转为字符串
return
new
String
(
result
);
}
/**
* 将byte数组转换成16进制的字符串
*
* @param buf
* @return 16进制的字符串
*/
public
static
String
parseByte2HexStr
(
byte
[]
buf
)
{
StringBuilder
sb
=
new
StringBuilder
();
for
(
int
i
=
0
;
i
<
buf
.
length
;
i
++)
{
//将每个字节都转成16进制的
String
hex
=
Integer
.
toHexString
(
buf
[
i
]
&
0xFF
);
if
(
hex
.
length
()
==
1
)
{
//为保证格式统一,用两位16进制的表示一个字节
hex
=
'0'
+
hex
;
}
sb
.
append
(
hex
.
toUpperCase
());
}
return
sb
.
toString
();
}
/**
* 将16进制的字符串转换为byte数组
*
* @param hexStr
* @return byte数组
*/
public
static
byte
[]
parseHexStr2Byte
(
String
hexStr
)
{
if
(
hexStr
.
length
()
<
1
)
{
return
null
;
}
byte
[]
result
=
new
byte
[
hexStr
.
length
()
/
2
];
for
(
int
i
=
0
;
i
<
hexStr
.
length
()
/
2
;
i
++)
{
int
num
=
Integer
.
parseInt
(
hexStr
.
substring
(
i
*
2
,
i
*
2
+
2
),
16
);
result
[
i
]
=
(
byte
)
num
;
}
return
result
;
}
/**
* 解析美团的参数
*/
public
static
Map
<
String
,
Object
>
analysisMeituanParams
(
String
params
)
throws
Exception
{
if
(
StringUtils
.
isBlank
(
params
))
{
return
null
;
}
String
dectypt
=
decrypt
(
params
);
String
[]
split
=
dectypt
.
split
(
"\\|"
);
Map
<
String
,
Object
>
map
=
Maps
.
newHashMap
();
for
(
String
pa
:
split
)
{
String
[]
split1
=
pa
.
split
(
"="
);
map
.
put
(
split1
[
0
],
split1
[
1
]);
}
return
map
;
}
public
String
evaluate
(
String
code
)
{
if
(
StringUtils
.
isBlank
(
code
)){
return
code
;
}
else
{
try
{
return
decrypt
(
code
);
}
catch
(
Exception
e
){
return
null
;
}
}
}
}
src/main/java/cn/com/duiba/udf/MtEncrypt.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
com.google.common.collect.Maps
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
import
javax.crypto.Cipher
;
import
javax.crypto.spec.SecretKeySpec
;
import
java.nio.charset.StandardCharsets
;
import
java.util.Map
;
/**
* Created by clemac on 19/7/15.
*/
public
class
MtEncrypt
extends
UDF
{
/**
* AES加密时的秘钥
*/
private
final
static
String
AES_KEY
=
"waimaiad_aes_key"
;
/**
* AES专用秘钥
*/
private
static
SecretKeySpec
KEY
=
null
;
/**
* key因为在没改变AES_KEY时是不变的,所以进行一次初始化操作
*/
static
{
// 创建AES的秘钥生成器
KEY
=
new
SecretKeySpec
(
AES_KEY
.
getBytes
(),
"AES"
);
}
/**
* 加密文本
*/
public
static
String
encrypt
(
String
content
)
throws
Exception
{
Cipher
cipher
=
Cipher
.
getInstance
(
"AES"
);
byte
[]
byteContent
=
content
.
getBytes
(
StandardCharsets
.
UTF_8
);
cipher
.
init
(
Cipher
.
ENCRYPT_MODE
,
KEY
);
// 进行AES加密
byte
[]
result
=
cipher
.
doFinal
(
byteContent
);
// 将byte数组转为十六进制的字符串
return
parseByte2HexStr
(
result
);
}
/**
* 解码文本
*/
public
static
String
decrypt
(
String
encryptContent
)
throws
Exception
{
// 将十六进制的加密文本转换为byte数组
byte
[]
content
=
parseHexStr2Byte
(
encryptContent
);
if
(
content
==
null
)
{
return
null
;
}
Cipher
cipher
=
Cipher
.
getInstance
(
"AES"
);
cipher
.
init
(
Cipher
.
DECRYPT_MODE
,
KEY
);
byte
[]
result
=
cipher
.
doFinal
(
content
);
// 将二进制转为字符串
return
new
String
(
result
);
}
/**
* 将byte数组转换成16进制的字符串
*
* @param buf
* @return 16进制的字符串
*/
public
static
String
parseByte2HexStr
(
byte
[]
buf
)
{
StringBuilder
sb
=
new
StringBuilder
();
for
(
int
i
=
0
;
i
<
buf
.
length
;
i
++)
{
//将每个字节都转成16进制的
String
hex
=
Integer
.
toHexString
(
buf
[
i
]
&
0xFF
);
if
(
hex
.
length
()
==
1
)
{
//为保证格式统一,用两位16进制的表示一个字节
hex
=
'0'
+
hex
;
}
sb
.
append
(
hex
.
toUpperCase
());
}
return
sb
.
toString
();
}
/**
* 将16进制的字符串转换为byte数组
*
* @param hexStr
* @return byte数组
*/
public
static
byte
[]
parseHexStr2Byte
(
String
hexStr
)
{
if
(
hexStr
.
length
()
<
1
)
{
return
null
;
}
byte
[]
result
=
new
byte
[
hexStr
.
length
()
/
2
];
for
(
int
i
=
0
;
i
<
hexStr
.
length
()
/
2
;
i
++)
{
int
num
=
Integer
.
parseInt
(
hexStr
.
substring
(
i
*
2
,
i
*
2
+
2
),
16
);
result
[
i
]
=
(
byte
)
num
;
}
return
result
;
}
/**
* 解析美团的参数
*/
public
static
Map
<
String
,
Object
>
analysisMeituanParams
(
String
params
)
throws
Exception
{
if
(
StringUtils
.
isBlank
(
params
))
{
return
null
;
}
String
dectypt
=
decrypt
(
params
);
String
[]
split
=
dectypt
.
split
(
"\\|"
);
Map
<
String
,
Object
>
map
=
Maps
.
newHashMap
();
for
(
String
pa
:
split
)
{
String
[]
split1
=
pa
.
split
(
"="
);
map
.
put
(
split1
[
0
],
split1
[
1
]);
}
return
map
;
}
public
String
evaluate
(
String
code
)
{
if
(
StringUtils
.
isBlank
(
code
)){
return
code
;
}
else
{
try
{
return
encrypt
(
code
);
}
catch
(
Exception
e
){
return
null
;
}
}
}
}
src/main/java/cn/com/duiba/udf/ParseUaUDF.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
nl.basjes.parse.useragent.UserAgent
;
import
nl.basjes.parse.useragent.UserAgentAnalyzer
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
import
org.json.JSONObject
;
import
java.util.HashMap
;
import
java.util.Map
;
public
class
ParseUaUDF
extends
UDF
{
private
static
UserAgentAnalyzer
userAgentAnalyzer
;
static
{
// 置于静态代码块初始化,要不然每次初始化,很影响性能
userAgentAnalyzer
=
UserAgentAnalyzer
.
newBuilder
()
.
hideMatcherLoadStats
()
.
delayInitialization
()
.
build
();
}
public
String
evaluate
(
String
strUa
)
{
try
{
int
UaLengthLimit
=
5
;
if
(
StringUtils
.
isBlank
(
strUa
)
&&
strUa
.
length
()
<=
UaLengthLimit
&&
StringUtils
.
isEmpty
(
strUa
))
{
return
null
;
}
UserAgent
userAgent
=
userAgentAnalyzer
.
parse
(
strUa
);
String
DeviceClass
=
userAgent
.
getValue
(
"DeviceClass"
);
String
DeviceBrand
=
userAgent
.
getValue
(
"DeviceBrand"
);
String
DeviceName
=
userAgent
.
getValue
(
"DeviceName"
);
String
OperatingSystemClass
=
userAgent
.
getValue
(
"OperatingSystemClass"
);
String
OperatingSystemName
=
userAgent
.
getValue
(
"OperatingSystemName"
);
String
OperatingSystemVersion
=
userAgent
.
getValue
(
"OperatingSystemVersion"
);
// 基本网络类型解析不出来
// String NETWORK_TYPE = userAgent.getValue("NETWORK_TYPE");
Map
map
=
new
HashMap
();
map
.
put
(
"device_class"
,
DeviceClass
);
map
.
put
(
"device_brand"
,
DeviceBrand
);
map
.
put
(
"device_name"
,
DeviceName
);
map
.
put
(
"operating_system_class"
,
OperatingSystemClass
);
map
.
put
(
"operating_system_name"
,
OperatingSystemName
);
map
.
put
(
"operating_system_version"
,
OperatingSystemVersion
);
// map.put("network_type", NETWORK_TYPE);
JSONObject
json
=
new
JSONObject
(
map
);
return
json
.
toString
();
}
catch
(
Exception
e
){
e
.
printStackTrace
();
return
null
;
}
}
}
src/main/java/cn/com/duiba/udf/StringDiffUDF.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
com.google.common.collect.Sets
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
import
java.util.*
;
/**
* 字符串求差值
*/
public
class
StringDiffUDF
extends
UDF
{
public
String
evaluate
(
String
stra
,
String
strb
,
String
delimiter
)
{
if
((
StringUtils
.
isBlank
(
stra
)
&&
StringUtils
.
isBlank
(
strb
))
||
"|"
.
equals
(
delimiter
))
{
return
null
;
}
if
(
StringUtils
.
isBlank
(
stra
)
||
StringUtils
.
isBlank
(
strb
))
{
return
(
stra
==
null
?
""
:
stra
)
+
"|"
+
(
strb
==
null
?
""
:
strb
);
}
Set
<
String
>
setA
=
Sets
.
newHashSet
(
StringUtils
.
split
(
stra
,
delimiter
));
Set
<
String
>
setB
=
Sets
.
newHashSet
(
StringUtils
.
split
(
strb
,
delimiter
));
//去掉重复数据
Set
<
String
>
resultA
=
new
HashSet
(
setA
);
Set
<
String
>
resultB
=
new
HashSet
<>(
setB
);
//用set的特性去做
resultA
.
removeAll
(
setB
);
resultB
.
removeAll
(
setA
);
String
resultStrA
=
StringUtils
.
join
(
resultA
,
delimiter
);
String
resultStrB
=
StringUtils
.
join
(
resultB
,
delimiter
);
return
resultStrA
+
"|"
+
resultStrB
;
}
}
src/main/java/cn/com/duiba/udf/StringIntersectionUDF.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
com.google.common.collect.Sets
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
import
java.util.Set
;
/**
* 字符串交集
*/
public
class
StringIntersectionUDF
extends
UDF
{
//参数 a b 字符串 单字符串信息需要测试下
public
String
evaluate
(
String
stra
,
String
strb
,
String
delimiter
)
{
//判断输入字符串不为null
if
(
StringUtils
.
isBlank
(
stra
)
||
StringUtils
.
isBlank
(
strb
)
||
"|"
.
equals
(
delimiter
))
{
return
null
;
}
//共同的元素值
Set
<
String
>
setA
=
Sets
.
newHashSet
(
StringUtils
.
split
(
stra
,
delimiter
));
Set
<
String
>
setB
=
Sets
.
newHashSet
(
StringUtils
.
split
(
strb
,
delimiter
));
//去重复数据
setA
.
retainAll
(
setB
);
return
setA
.
size
()
==
0
?
null
:
StringUtils
.
join
(
setA
,
delimiter
)
+
"|"
+
setA
.
size
();
}
}
src/main/java/cn/com/duiba/udf/TimeDiff.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
org.apache.commons.lang.StringUtils
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
import
java.text.SimpleDateFormat
;
import
java.util.Date
;
/**
* Created by clemac on 17/2/16.
*/
public
class
TimeDiff
extends
UDF
{
public
Long
evaluate
(
String
sTimes
,
String
eTimes
,
String
type
){
try
{
if
(
StringUtils
.
isBlank
(
sTimes
)
||
StringUtils
.
isBlank
(
eTimes
)
||
StringUtils
.
isBlank
(
type
)){
return
null
;
}
SimpleDateFormat
sdf
=
new
SimpleDateFormat
(
"yyyy-MM-dd HH:mm:ss"
);
Date
sdate
=
sdf
.
parse
(
sTimes
);
Date
edate
=
sdf
.
parse
(
eTimes
);
long
from
=
sdate
.
getTime
();
long
to
=
edate
.
getTime
();
if
(
to
-
from
<
0
){
return
null
;
}
if
(
"ss"
.
equals
(
type
)){
return
(
to
-
from
)/(
1000
);
}
if
(
"mm"
.
equals
(
type
)){
return
(
to
-
from
)/(
1000
*
60
);
}
if
(
"hh"
.
equals
(
type
)){
return
(
to
-
from
)/(
1000
*
60
*
60
);
}
if
(
"dd"
.
equals
(
type
)){
return
(
to
-
from
)/(
1000
*
60
*
60
*
24
);
}
return
null
;
}
catch
(
Exception
e
)
{
return
null
;
}
}
}
\ No newline at end of file
src/main/java/cn/com/duiba/udf/UrlDecode.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
import
java.net.URLDecoder
;
/**
* Created by clemac on 17/3/7.
*/
public
class
UrlDecode
extends
UDF
{
public
String
evaluate
(
String
str
,
String
code
)
{
try
{
return
URLDecoder
.
decode
(
str
,
code
);
}
catch
(
Exception
e
)
{
return
null
;
}
}
}
src/main/java/cn/com/duiba/udf/VaildClickCount2.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
org.apache.commons.lang.StringUtils
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
import
java.text.SimpleDateFormat
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
/**
* Created by clemac on 17/2/16.
*/
public
class
VaildClickCount2
extends
UDF
{
public
String
evaluate
(
String
strTimes
,
Long
interval
){
try
{
if
(
StringUtils
.
isBlank
(
strTimes
)){
return
""
;
}
SimpleDateFormat
sdf
=
new
SimpleDateFormat
(
"yyyy-MM-dd HH:mm:ss"
);
List
<
String
>
result
=
new
ArrayList
<
String
>();
String
[]
times
=
strTimes
.
split
(
","
);
if
(
times
.
length
<=
0
){
return
""
;
}
Date
now
=
sdf
.
parse
(
times
[
0
]);
result
.
add
(
times
[
0
]);
Date
after
=
new
Date
(
now
.
getTime
()
+
60000
*
interval
);
for
(
String
tstr
:
times
){
if
(
tstr
.
compareTo
(
sdf
.
format
(
after
))
>
0
){
now
=
sdf
.
parse
(
tstr
);
after
.
setTime
(
now
.
getTime
()
+
60000
*
interval
);
result
.
add
(
tstr
);
}
}
return
StringUtils
.
join
(
result
,
','
);
}
catch
(
Exception
e
)
{
return
null
;
}
}
}
\ No newline at end of file
src/main/java/cn/com/duiba/udf/VectorAvgUDF.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udf
;
import
com.google.common.collect.Lists
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.commons.lang3.ArrayUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.hadoop.hive.ql.exec.UDF
;
import
java.util.List
;
/**
* 求向量的平均
*/
public
class
VectorAvgUDF
extends
UDF
{
public
String
evaluate
(
String
str
,
String
delimiter
,
String
separator
)
{
if
(
StringUtils
.
isBlank
(
str
)
||
StringUtils
.
isBlank
(
delimiter
)
||
StringUtils
.
isBlank
(
separator
))
{
return
null
;
}
String
[]
strArray
=
StringUtils
.
split
(
str
,
delimiter
);
if
(
ArrayUtils
.
isEmpty
(
strArray
))
{
return
null
;
}
long
maxLength
=
0L
;
//分割向量
List
<
String
[]>
vectorList
=
Lists
.
newArrayList
();
for
(
String
s
:
strArray
)
{
String
[]
split
=
StringUtils
.
split
(
s
,
separator
);
maxLength
=
getMaxLength
(
maxLength
,
split
);
vectorList
.
add
(
split
);
}
if
(
vectorList
.
size
()
==
0
)
{
return
null
;
}
//计算平均值
List
<
Double
>
avgList
=
Lists
.
newArrayList
();
for
(
int
i
=
0
;
i
<
maxLength
;
i
++)
{
Double
sum
=
0
D
;
for
(
String
[]
vector
:
vectorList
)
{
if
(
vector
!=
null
&&
vector
.
length
>
i
)
{
sum
+=
Double
.
valueOf
(
vector
[
i
]);
}
}
avgList
.
add
(
sum
/
vectorList
.
size
());
}
return
StringUtils
.
join
(
avgList
,
separator
);
}
private
long
getMaxLength
(
long
maxLength
,
String
[]
split
)
{
if
(
split
==
null
)
{
return
maxLength
;
}
return
split
.
length
>
maxLength
?
split
.
length
:
maxLength
;
}
}
src/main/java/cn/com/duiba/udtf/GetJSONArrayUDTF.java
0 → 100644
View file @
9a1c87f7
package
cn
.
com
.
duiba
.
udtf
;
import
com.alibaba.fastjson.JSON
;
import
com.alibaba.fastjson.JSONArray
;
import
org.apache.hadoop.hive.ql.exec.UDFArgumentException
;
import
org.apache.hadoop.hive.ql.metadata.HiveException
;
import
org.apache.hadoop.hive.ql.udf.generic.GenericUDF
;
import
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
;
import
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory
;
import
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector
;
import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
;
import
java.util.ArrayList
;
import
java.util.List
;
/**
* @author xugf
* 解析数组类型的数据
*/
public
class
GetJSONArrayUDTF
extends
GenericUDF
{
@Override
public
ObjectInspector
initialize
(
ObjectInspector
[]
arguments
)
throws
UDFArgumentException
{
//参数校验
if
(
arguments
.
length
!=
1
)
{
throw
new
UDFArgumentException
(
"arguments.length != 1, and must be jsonArray String."
);
}
//声明返回的数据类型
ObjectInspector
returnOi
=
PrimitiveObjectInspectorFactory
.
getPrimitiveJavaObjectInspector
(
PrimitiveObjectInspector
.
PrimitiveCategory
.
STRING
);
return
ObjectInspectorFactory
.
getStandardListObjectInspector
(
returnOi
);
}
@Override
public
Object
evaluate
(
DeferredObject
[]
arguments
)
throws
HiveException
{
Object
obj
=
arguments
[
0
].
get
();
if
(
obj
==
null
)
{
return
null
;
}
List
<
String
>
list
=
new
ArrayList
<>();
String
str
=
obj
.
toString
();
JSONArray
jsonArray
=
JSON
.
parseArray
(
str
);
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
list
.
add
(
jsonArray
.
getString
(
i
));
}
return
list
;
}
@Override
public
String
getDisplayString
(
String
[]
children
)
{
return
"Usage:GetJSONArray(String str), return ArrayList<String> "
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment