Commit 9a1c87f7 authored by 黄贵华's avatar 黄贵华

udf

parents
/target
.settings
.gradle
.idea
/gradle
/bin/
/build/
/out/
*.iml
gradlew
gradlew.bat
.classpath
.project
负责人:徐贵锋
项目简介:
hive自定义函数库
\ No newline at end of file
buildscript {
repositories {
mavenLocal()
maven { url "http://nexus.dui88.com:8081/nexus/content/groups/public/" }
mavenCentral()
jcenter()
}
dependencies {
classpath("com.github.jengelman.gradle.plugins:shadow:2.0.1")
}
}
apply plugin: 'java'
apply plugin: 'idea'
apply plugin: 'maven'
apply plugin: 'com.github.johnrengelman.shadow'
group 'cn.com.duiba'
version '1.0-SNAPSHOT'
sourceCompatibility = 1.8
targetCompatibility = 1.8
repositories {
mavenLocal()
maven { url "http://nexus.dui88.com:8081/nexus/content/groups/public/" }
maven { url 'http://repository.cloudera.com/artifactory/cloudera-repos' }
mavenCentral()
}
dependencies {
testCompile group: 'junit', name: 'junit', version: '4.12'
compile('com.alibaba:fastjson:1.2.58')
compile('ch.qos.logback:logback-classic:1.2.3')
compile('org.slf4j:slf4j-api:1.7.25')
compile('cn.com.duiba:bigdata-common:0.0.18') { transitive = false }
compile('com.google.guava:guava:18.0') { transitive = false }
// compileOnly('org.apache.hive:hive-exec:1.1.0-cdh5.14.0')
// compile group: 'org.apache.hive', name: 'hive-exec', version: '1.1.0'
compileOnly('org.apache.hive:hive-exec:1.1.0')
// compile group: 'cz.mallat.uasparser', name: 'uasparser', version: '0.6.2'
compile group: 'nl.basjes.parse.useragent', name: 'yauaa', version: '5.21'
}
configurations {
all*.exclude group: 'log4j', module: 'log4j'
all*.exclude group: 'org.slf4j', module: 'slf4j-log4j12'
}
//shadowJar {
// baseName = project.name
// version = '0.1-SNAPSHOT'
// classifier = null
// configurations = [project.configurations.compile]
//}
tasks.withType(JavaCompile) {
options.encoding = "UTF-8"
}
//build.dependsOn shadowJar
rootProject.name = 'hive-udf'
package cn.com.duiba.udf;
import com.google.common.collect.Lists;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.util.*;
/**
* 获取B中key在A里面的value
*/
public class AvailVectorUDF extends UDF {
/**
* @param strA 1,2,3,4
* @param strB 1:2,3,4|2:2,3,5
* @param delimiter |
* @param separator ,
* @param keySep :
* @return 2, 3, 4|2,3,5
*/
public String evaluate(String strA, String strB, String delimiter, String separator, String keySep) {
if (StringUtils.isBlank(strA) || StringUtils.isBlank(strB) || StringUtils.isBlank(separator)) {
return null;
}
String[] strASplit = StringUtils.split(strA, separator);
if (ArrayUtils.isEmpty(strASplit)) {
return null;
}
String[] strBSplit = StringUtils.split(strB, delimiter);
if (ArrayUtils.isEmpty(strBSplit)) {
return null;
}
List<String> keyList = Arrays.asList(strASplit);
List<String> vectorList = Lists.newArrayList();
for (String kv : strBSplit) {
String[] split = StringUtils.split(kv, keySep);
if (keyList.contains(split[0])) {
vectorList.add(split[1]);
}
}
return StringUtils.join(vectorList, delimiter);
}
}
package cn.com.duiba.udf;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* Created by ZhihuiGe on 2017/5/25.
*/
public class CheckASCII extends UDF{
public String evaluate(String str){
if(StringUtils.isBlank(str)){
return str;
}
for(int i=0;i<str.length();i++){
char c = str.charAt(i);
if(c > 126 || c < 32){
return null;
}
}
return str;
}
}
package cn.com.duiba.udf;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
/**
* Created by clemac on 17/3/7.
*/
public class DuplicateRemove extends UDF {
public String evaluate(String str1,String str2,String split) {
if((StringUtils.isBlank(split))){
return null;
}
if (StringUtils.isBlank(str1) && StringUtils.isBlank(str2)) {
return null;
} else if (StringUtils.isBlank(str1) && StringUtils.isNotBlank(str2)) {
return str2;
} else if (StringUtils.isNotBlank(str1) && StringUtils.isBlank(str2)) {
return str1;
} else {
Set<String> mySet = new HashSet<>(Arrays.asList(str1.split(split)));
mySet.addAll(Arrays.asList(str2.split(split)));
StringBuilder csvBuilder = new StringBuilder();
for (String str : mySet) {
csvBuilder.append(str);
csvBuilder.append(split);
}
String csv = csvBuilder.toString();
if(csv.length() - split.length() < 0){
return csv;
}
csv = csv.substring(0, csv.length() - split.length());
return csv;
}
}
}
package cn.com.duiba.udf;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* Created by clemac on 17/1/6.
*/
public class GetUrlPara extends UDF {
public String evaluate(String str, String key) {
if(StringUtils.isBlank(str)){
return null;
}else{
if(str.contains("=")){
String[] rs = str.split("&");
for(String tmp : rs){
String[] par = tmp.split("=");
if(par.length>1){
if(key.equals(par[0])){
return par[1];
}
}
}
}else{
String[] ids = str.split("/");
if(ids.length > 1){
if(StringUtils.isNumeric(ids[ids.length - 1])){
return ids[ids.length - 1];
}
}
}
return null;
}
}
}
\ No newline at end of file
package cn.com.duiba.udf;
import cn.com.duiba.bigdata.common.biz.utils.BigdataUtil;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* @author xugf 2019-11-28
* 获取hbase中的rowkey主键
*/
public class HbaseRowkeyUDF extends UDF {
public String evaluate(Object... strs) {
return BigdataUtil.getMD5HbaseRowkey(strs);
}
}
package cn.com.duiba.udf;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* Created by clemac on 17/2/16.
*/
public class IpConvertLong2 extends UDF {
public Long evaluate(String strip){
if(strip.split("\\.").length != 4){
return null;
}
try {
long[]ip=new long[4];
int position1=strip.indexOf(".");
int position2=strip.indexOf(".",position1+1);
int position3=strip.indexOf(".",position2+1);
ip[0]=Long.parseLong(strip.substring(0,position1));
ip[1]=Long.parseLong(strip.substring(position1+1,position2));
ip[2]=Long.parseLong(strip.substring(position2+1,position3));
ip[3]=Long.parseLong(strip.substring(position3+1));
return(ip[0]<<24)+(ip[1]<<16)+(ip[2]<<8)+ip[3];//ip1*256*256*256+ip2*256*256+ip3*256+ip4
} catch (Exception e) {
return null;
}
}
}
package cn.com.duiba.udf;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* Created by clemac on 17/2/16.
*/
public class IsDxmV5 extends UDF {
public String evaluate(String str){
if(StringUtils.isBlank(str)){
return str;
}
int pointcount=0;
for(int i=0;i<str.length();i++){
int chr=str.charAt(i);
if(chr==46){// 符号 .
pointcount++;
continue;
}
if(chr==95){//符号 _
continue;
}
if(chr==45){//符号 -
continue;
}
if(chr>=48 && chr <=57){// 0-9
continue;
}
if(chr>=65 && chr <=90){// a-z
continue;
}
if(chr>=97 && chr <=122){//A-Z
continue;
}
return null;
}
if(pointcount==3) {
return str;
}else{
return null;
}
}
}
\ No newline at end of file
package cn.com.duiba.udf;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* Created by clemac on 17/1/6.
*/
public class IsImei extends UDF {
public Boolean evaluate(String str) {
if(StringUtils.isBlank(str) || str.length() != 15 || !StringUtils.isNumeric(str)){
return false;
}else{
String data = str.substring(0,14);
int sum1=0;
int sum2=0;
for (int i = 0; i < data.length(); i++) {
int num = data.charAt(i) - '0'; // ascii to num
//System.out.println(num);
/*(1)将奇数位数字相加(从1开始计数)*/
if (i%2==0) {
sum1 = sum1 + num;
}else{
/*(2)将偶数位数字分别乘以2,分别计算个位数和十位数之和(从1开始计数)*/
int temp = num * 2 ;
if (temp < 10) {
sum2=sum2+temp;
}else{
sum2 = sum2 + temp + 1 -10;
}
}
}
int total = sum1+sum2;
/*如果得出的数个位是0则校验位为0,否则为10减去个位数 */
String resultStr;
if (total % 10 ==0) {
resultStr=data+"0";
}else{
resultStr=data+ (10 - (total %10))+"";
}
if (str.equals(resultStr)){
return true;
} else {
return false;
}
}
}
}
\ No newline at end of file
package cn.com.duiba.udf;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* Created by ZhihuiGe on 2017/5/27.
*/
public class LongConvertIp extends UDF {
public String evaluate(Long longIp) {
if(longIp == null){
return null;
}
StringBuffer sb = new StringBuffer();
sb.append(String.valueOf((longIp >>> 24)));
sb.append(".");
sb.append(String.valueOf((longIp & 0x00FFFFFF) >>> 16));
sb.append(".");
sb.append(String.valueOf((longIp & 0x0000FFFF) >>> 8));
sb.append(".");
sb.append(String.valueOf((longIp & 0x000000FF)));
return sb.toString();
}
}
package cn.com.duiba.udf;
import com.google.common.collect.Maps;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
import javax.crypto.Cipher;
import javax.crypto.spec.SecretKeySpec;
import java.nio.charset.StandardCharsets;
import java.util.Map;
/**
* Created by clemac on 19/7/15.
*/
public class MtDecrypt extends UDF {
/**
* AES加密时的秘钥
*/
private final static String AES_KEY = "waimaiad_aes_key";
/**
* AES专用秘钥
*/
private static SecretKeySpec KEY = null;
/**
* key因为在没改变AES_KEY时是不变的,所以进行一次初始化操作
*/
static {
// 创建AES的秘钥生成器
KEY = new SecretKeySpec(AES_KEY.getBytes(), "AES");
}
/**
* 加密文本
*/
public static String encrypt(String content) throws Exception {
Cipher cipher = Cipher.getInstance("AES");
byte[] byteContent = content.getBytes(StandardCharsets.UTF_8);
cipher.init(Cipher.ENCRYPT_MODE, KEY);
// 进行AES加密
byte[] result = cipher.doFinal(byteContent);
// 将byte数组转为十六进制的字符串
return parseByte2HexStr(result);
}
/**
* 解码文本
*/
public static String decrypt(String encryptContent) throws Exception {
// 将十六进制的加密文本转换为byte数组
byte[] content = parseHexStr2Byte(encryptContent);
if (content == null) {
return null;
}
Cipher cipher = Cipher.getInstance("AES");
cipher.init(Cipher.DECRYPT_MODE, KEY);
byte[] result = cipher.doFinal(content);
// 将二进制转为字符串
return new String(result);
}
/**
* 将byte数组转换成16进制的字符串
*
* @param buf
* @return 16进制的字符串
*/
public static String parseByte2HexStr(byte[] buf) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < buf.length; i++) {
//将每个字节都转成16进制的
String hex = Integer.toHexString(buf[i] & 0xFF);
if (hex.length() == 1) {
//为保证格式统一,用两位16进制的表示一个字节
hex = '0' + hex;
}
sb.append(hex.toUpperCase());
}
return sb.toString();
}
/**
* 将16进制的字符串转换为byte数组
*
* @param hexStr
* @return byte数组
*/
public static byte[] parseHexStr2Byte(String hexStr) {
if (hexStr.length() < 1) {
return null;
}
byte[] result = new byte[hexStr.length() / 2];
for (int i = 0; i < hexStr.length() / 2; i++) {
int num = Integer.parseInt(hexStr.substring(i * 2, i * 2 + 2), 16);
result[i] = (byte) num;
}
return result;
}
/**
* 解析美团的参数
*/
public static Map<String, Object> analysisMeituanParams(String params) throws Exception {
if (StringUtils.isBlank(params)) {
return null;
}
String dectypt = decrypt(params);
String[] split = dectypt.split("\\|");
Map<String, Object> map = Maps.newHashMap();
for (String pa : split) {
String[] split1 = pa.split("=");
map.put(split1[0], split1[1]);
}
return map;
}
public String evaluate(String code) {
if(StringUtils.isBlank(code)){
return code;
}else{
try {
return decrypt(code);
}catch (Exception e){
return null;
}
}
}
}
package cn.com.duiba.udf;
import com.google.common.collect.Maps;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
import javax.crypto.Cipher;
import javax.crypto.spec.SecretKeySpec;
import java.nio.charset.StandardCharsets;
import java.util.Map;
/**
* Created by clemac on 19/7/15.
*/
public class MtEncrypt extends UDF {
/**
* AES加密时的秘钥
*/
private final static String AES_KEY = "waimaiad_aes_key";
/**
* AES专用秘钥
*/
private static SecretKeySpec KEY = null;
/**
* key因为在没改变AES_KEY时是不变的,所以进行一次初始化操作
*/
static {
// 创建AES的秘钥生成器
KEY = new SecretKeySpec(AES_KEY.getBytes(), "AES");
}
/**
* 加密文本
*/
public static String encrypt(String content) throws Exception {
Cipher cipher = Cipher.getInstance("AES");
byte[] byteContent = content.getBytes(StandardCharsets.UTF_8);
cipher.init(Cipher.ENCRYPT_MODE, KEY);
// 进行AES加密
byte[] result = cipher.doFinal(byteContent);
// 将byte数组转为十六进制的字符串
return parseByte2HexStr(result);
}
/**
* 解码文本
*/
public static String decrypt(String encryptContent) throws Exception {
// 将十六进制的加密文本转换为byte数组
byte[] content = parseHexStr2Byte(encryptContent);
if (content == null) {
return null;
}
Cipher cipher = Cipher.getInstance("AES");
cipher.init(Cipher.DECRYPT_MODE, KEY);
byte[] result = cipher.doFinal(content);
// 将二进制转为字符串
return new String(result);
}
/**
* 将byte数组转换成16进制的字符串
*
* @param buf
* @return 16进制的字符串
*/
public static String parseByte2HexStr(byte[] buf) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < buf.length; i++) {
//将每个字节都转成16进制的
String hex = Integer.toHexString(buf[i] & 0xFF);
if (hex.length() == 1) {
//为保证格式统一,用两位16进制的表示一个字节
hex = '0' + hex;
}
sb.append(hex.toUpperCase());
}
return sb.toString();
}
/**
* 将16进制的字符串转换为byte数组
*
* @param hexStr
* @return byte数组
*/
public static byte[] parseHexStr2Byte(String hexStr) {
if (hexStr.length() < 1) {
return null;
}
byte[] result = new byte[hexStr.length() / 2];
for (int i = 0; i < hexStr.length() / 2; i++) {
int num = Integer.parseInt(hexStr.substring(i * 2, i * 2 + 2), 16);
result[i] = (byte) num;
}
return result;
}
/**
* 解析美团的参数
*/
public static Map<String, Object> analysisMeituanParams(String params) throws Exception {
if (StringUtils.isBlank(params)) {
return null;
}
String dectypt = decrypt(params);
String[] split = dectypt.split("\\|");
Map<String, Object> map = Maps.newHashMap();
for (String pa : split) {
String[] split1 = pa.split("=");
map.put(split1[0], split1[1]);
}
return map;
}
public String evaluate(String code) {
if(StringUtils.isBlank(code)){
return code;
}else{
try {
return encrypt(code);
}catch (Exception e){
return null;
}
}
}
}
package cn.com.duiba.udf;
import nl.basjes.parse.useragent.UserAgent;
import nl.basjes.parse.useragent.UserAgentAnalyzer;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.json.JSONObject;
import java.util.HashMap;
import java.util.Map;
public class ParseUaUDF extends UDF {
private static UserAgentAnalyzer userAgentAnalyzer ;
static {
// 置于静态代码块初始化,要不然每次初始化,很影响性能
userAgentAnalyzer = UserAgentAnalyzer
.newBuilder()
.hideMatcherLoadStats()
.delayInitialization()
.build();
}
public String evaluate(String strUa) {
try{
int UaLengthLimit = 5;
if (StringUtils.isBlank(strUa) && strUa.length() <= UaLengthLimit && StringUtils.isEmpty(strUa)) {
return null;
}
UserAgent userAgent = userAgentAnalyzer.parse(strUa);
String DeviceClass = userAgent.getValue("DeviceClass");
String DeviceBrand = userAgent.getValue("DeviceBrand");
String DeviceName = userAgent.getValue("DeviceName");
String OperatingSystemClass = userAgent.getValue("OperatingSystemClass");
String OperatingSystemName = userAgent.getValue("OperatingSystemName");
String OperatingSystemVersion = userAgent.getValue("OperatingSystemVersion");
// 基本网络类型解析不出来
// String NETWORK_TYPE = userAgent.getValue("NETWORK_TYPE");
Map map = new HashMap();
map.put("device_class", DeviceClass);
map.put("device_brand", DeviceBrand);
map.put("device_name", DeviceName);
map.put("operating_system_class", OperatingSystemClass);
map.put("operating_system_name", OperatingSystemName);
map.put("operating_system_version", OperatingSystemVersion);
// map.put("network_type", NETWORK_TYPE);
JSONObject json = new JSONObject(map);
return json.toString();
}catch (Exception e){
e.printStackTrace();
return null ;
}
}
}
package cn.com.duiba.udf;
import com.google.common.collect.Sets;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.util.*;
/**
* 字符串求差值
*/
public class StringDiffUDF extends UDF {
public String evaluate(String stra, String strb, String delimiter) {
if ((StringUtils.isBlank(stra) && StringUtils.isBlank(strb)) || "|".equals(delimiter)) {
return null;
}
if (StringUtils.isBlank(stra) || StringUtils.isBlank(strb)) {
return (stra == null ? "" : stra) + "|" + (strb == null ? "" : strb);
}
Set<String> setA = Sets.newHashSet(StringUtils.split(stra, delimiter));
Set<String> setB = Sets.newHashSet(StringUtils.split(strb, delimiter));
//去掉重复数据
Set<String> resultA = new HashSet(setA);
Set<String> resultB = new HashSet<>(setB);
//用set的特性去做
resultA.removeAll(setB);
resultB.removeAll(setA);
String resultStrA = StringUtils.join(resultA, delimiter);
String resultStrB = StringUtils.join(resultB, delimiter);
return resultStrA + "|" + resultStrB;
}
}
package cn.com.duiba.udf;
import com.google.common.collect.Sets;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.util.Set;
/**
* 字符串交集
*/
public class StringIntersectionUDF extends UDF {
//参数 a b 字符串 单字符串信息需要测试下
public String evaluate(String stra, String strb, String delimiter) {
//判断输入字符串不为null
if (StringUtils.isBlank(stra) || StringUtils.isBlank(strb) || "|".equals(delimiter)) {
return null;
}
//共同的元素值
Set<String> setA = Sets.newHashSet(StringUtils.split(stra, delimiter));
Set<String> setB = Sets.newHashSet(StringUtils.split(strb, delimiter));
//去重复数据
setA.retainAll(setB);
return setA.size() == 0 ? null : StringUtils.join(setA, delimiter) + "|" + setA.size();
}
}
package cn.com.duiba.udf;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.text.SimpleDateFormat;
import java.util.Date;
/**
* Created by clemac on 17/2/16.
*/
public class TimeDiff extends UDF {
public Long evaluate(String sTimes, String eTimes, String type){
try {
if(StringUtils.isBlank(sTimes) || StringUtils.isBlank(eTimes) || StringUtils.isBlank(type)){
return null;
}
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date sdate = sdf.parse(sTimes);
Date edate = sdf.parse(eTimes);
long from = sdate.getTime();
long to = edate.getTime();
if (to - from < 0){
return null;
}
if("ss".equals(type)){
return (to - from)/(1000);
}
if("mm".equals(type)){
return (to - from)/(1000 * 60);
}
if("hh".equals(type)){
return (to - from)/(1000 * 60 * 60);
}
if("dd".equals(type)){
return (to - from)/(1000 * 60 * 60 * 24);
}
return null;
} catch (Exception e) {
return null;
}
}
}
\ No newline at end of file
package cn.com.duiba.udf;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.net.URLDecoder;
/**
* Created by clemac on 17/3/7.
*/
public class UrlDecode extends UDF {
public String evaluate(String str,String code) {
try {
return URLDecoder.decode(str, code);
} catch (Exception e) {
return null;
}
}
}
package cn.com.duiba.udf;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
/**
* Created by clemac on 17/2/16.
*/
public class VaildClickCount2 extends UDF {
public String evaluate(String strTimes, Long interval){
try {
if(StringUtils.isBlank(strTimes)){
return "";
}
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
List<String> result = new ArrayList<String>();
String[] times = strTimes.split(",");
if(times.length <= 0){
return "";
}
Date now = sdf.parse(times[0]);
result.add(times[0]);
Date after = new Date(now.getTime() + 60000*interval);
for(String tstr : times){
if(tstr.compareTo(sdf.format(after)) > 0){
now = sdf.parse(tstr);
after.setTime(now.getTime() + 60000*interval);
result.add(tstr);
}
}
return StringUtils.join(result, ',');
} catch (Exception e) {
return null;
}
}
}
\ No newline at end of file
package cn.com.duiba.udf;
import com.google.common.collect.Lists;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.util.List;
/**
* 求向量的平均
*/
public class VectorAvgUDF extends UDF {
public String evaluate(String str, String delimiter, String separator) {
if (StringUtils.isBlank(str) || StringUtils.isBlank(delimiter) || StringUtils.isBlank(separator)) {
return null;
}
String[] strArray = StringUtils.split(str, delimiter);
if (ArrayUtils.isEmpty(strArray)) {
return null;
}
long maxLength = 0L;
//分割向量
List<String[]> vectorList = Lists.newArrayList();
for (String s : strArray) {
String[] split = StringUtils.split(s, separator);
maxLength = getMaxLength(maxLength, split);
vectorList.add(split);
}
if ( vectorList.size() == 0 ) {
return null;
}
//计算平均值
List<Double> avgList = Lists.newArrayList();
for (int i = 0; i < maxLength; i++) {
Double sum = 0D;
for (String[] vector : vectorList) {
if (vector != null && vector.length > i) {
sum += Double.valueOf(vector[i]);
}
}
avgList.add(sum / vectorList.size());
}
return StringUtils.join(avgList, separator);
}
private long getMaxLength(long maxLength, String[] split) {
if (split == null) {
return maxLength;
}
return split.length > maxLength ? split.length : maxLength;
}
}
package cn.com.duiba.udtf;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.ArrayList;
import java.util.List;
/**
* @author xugf
* 解析数组类型的数据
*/
public class GetJSONArrayUDTF extends GenericUDF {
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
//参数校验
if(arguments.length != 1) {
throw new UDFArgumentException("arguments.length != 1, and must be jsonArray String.");
}
//声明返回的数据类型
ObjectInspector returnOi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
return ObjectInspectorFactory.getStandardListObjectInspector(returnOi);
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
Object obj = arguments[0].get();
if(obj == null) {
return null;
}
List<String> list = new ArrayList<>();
String str = obj.toString();
JSONArray jsonArray = JSON.parseArray(str);
for(int i = 0; i < jsonArray.size(); i++) {
list.add(jsonArray.getString(i));
}
return list;
}
@Override
public String getDisplayString(String[] children) {
return "Usage:GetJSONArray(String str), return ArrayList<String> ";
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment