package cn.com.duiba.udtf;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

import java.util.ArrayList;
import java.util.List;

/**
 * @author xugf
 * 解析数组类型的数据
 */
public class GetJSONArrayUDTF extends GenericUDF {

    @Override
    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
        //参数校验
        if(arguments.length != 1) {
            throw new UDFArgumentException("arguments.length != 1, and must be jsonArray String.");
        }

        //声明返回的数据类型
        ObjectInspector returnOi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
        return ObjectInspectorFactory.getStandardListObjectInspector(returnOi);
    }

    @Override
    public Object evaluate(DeferredObject[] arguments) throws HiveException {
        Object obj = arguments[0].get();
        if(obj == null) {
            return null;
        }

        List<String> list = new ArrayList<>();
        String str = obj.toString();
        JSONArray jsonArray = JSON.parseArray(str);
        for(int i = 0; i < jsonArray.size(); i++) {
            list.add(jsonArray.getString(i));
        }
        return list;
    }

    @Override
    public String getDisplayString(String[] children) {
        return "Usage:GetJSONArray(String str), return ArrayList<String> ";
    }
}
