package cn.com.duiba.udf;

import com.google.common.collect.Sets;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;

import java.util.*;

/**
 * 字符串求差值
 */
public class StringDiffUDF extends UDF {

    public String evaluate(String stra, String strb, String delimiter) {
        if ((StringUtils.isBlank(stra) && StringUtils.isBlank(strb)) || "|".equals(delimiter)) {
            return null;
        }

        if (StringUtils.isBlank(stra) || StringUtils.isBlank(strb)) {
            return (stra == null ? "" : stra) + "|" + (strb == null ? "" : strb);
        }
        Set<String> setA = Sets.newHashSet(StringUtils.split(stra, delimiter));
        Set<String> setB = Sets.newHashSet(StringUtils.split(strb, delimiter));

        //去掉重复数据
        Set<String> resultA = new HashSet(setA);
        Set<String> resultB = new HashSet<>(setB);
        //用set的特性去做
        resultA.removeAll(setB);
        resultB.removeAll(setA);

        String resultStrA = StringUtils.join(resultA, delimiter);
        String resultStrB = StringUtils.join(resultB, delimiter);

        return resultStrA + "|" + resultStrB;
    }

}
