package streaming.dsl.mmlib.algs.feature;

import breeze.linalg.DenseVector;
import breeze.linalg.DenseVector$;
import org.apache.spark.ml.feature.StandardScaler;
import org.apache.spark.ml.feature.VectorAssembler;
import org.apache.spark.ml.linalg.SQLDataTypes$;
import org.apache.spark.ml.linalg.Vector;
import org.apache.spark.ml.linalg.Vectors$;
import org.apache.spark.mllib.feature.Normalizer;
import org.apache.spark.mllib.feature.StandardScalerModel;
import org.apache.spark.mllib.stat.MultivariateStatisticalSummary;
import org.apache.spark.mllib.stat.Statistics$;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$implicits$;
import org.apache.spark.sql.expressions.UserDefinedFunction;
import org.apache.spark.sql.expressions.UserDefinedFunction$;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.ArrayType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DoubleType$;
import org.apache.spark.sql.types.StructField;
import scala.Array$;
import scala.Function1;
import scala.Function2;
import scala.MatchError;
import scala.Predef$;
import scala.Some;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayBuffer;
import scala.reflect.ClassTag$;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.TypeTags;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;
import streaming.dsl.mmlib.algs.MetaConst$;
import streaming.dsl.mmlib.algs.feature.BaseFeatureFunctions;
import streaming.dsl.mmlib.algs.meta.MinMaxValueMeta;
import streaming.dsl.mmlib.algs.meta.OutlierValueMeta;
import streaming.dsl.mmlib.algs.meta.StandardScalerValueMeta;

/* compiled from: DoubleFeature.scala */
/* loaded from: input_file:streaming/dsl/mmlib/algs/feature/DoubleFeature$.class */
public final class DoubleFeature$ implements BaseFeatureFunctions {
    public static final DoubleFeature$ MODULE$ = null;

    static {
        new DoubleFeature$();
    }

    @Override // streaming.dsl.mmlib.algs.feature.BaseFeatureFunctions
    public Dataset<Row> replaceColumn(Dataset<Row> dataset, String str, UserDefinedFunction userDefinedFunction) {
        return BaseFeatureFunctions.Cclass.replaceColumn(this, dataset, str, userDefinedFunction);
    }

    @Override // streaming.dsl.mmlib.algs.feature.BaseFeatureFunctions
    public Tuple2<Dataset<Row>, OutlierValueMeta> killSingleColumnOutlierValue(Dataset<Row> dataset, String str) {
        return BaseFeatureFunctions.Cclass.killSingleColumnOutlierValue(this, dataset, str);
    }

    @Override // streaming.dsl.mmlib.algs.feature.BaseFeatureFunctions
    public DenseVector<Object> asBreeze(Vector vector) {
        return BaseFeatureFunctions.Cclass.asBreeze(this, vector);
    }

    @Override // streaming.dsl.mmlib.algs.feature.BaseFeatureFunctions
    public String getTempCol() {
        return BaseFeatureFunctions.Cclass.getTempCol(this);
    }

    @Override // streaming.dsl.mmlib.algs.feature.BaseFeatureFunctions
    public String getFieldGroupName(Seq<String> seq) {
        return BaseFeatureFunctions.Cclass.getFieldGroupName(this, seq);
    }

    @Override // streaming.dsl.mmlib.algs.feature.BaseFeatureFunctions
    public Dataset<Row> expandColumnsFromVector(Dataset<Row> dataset, Seq<String> seq, String str) {
        return BaseFeatureFunctions.Cclass.expandColumnsFromVector(this, dataset, seq, str);
    }

    public Dataset<Row> killOutlierValue(Dataset<Row> dataset, String str, Seq<String> seq) {
        ObjectRef create = ObjectRef.create(dataset);
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        seq.foreach(new DoubleFeature$$anonfun$killOutlierValue$1(create, arrayBuffer));
        SparkSession sparkSession = dataset.sparkSession();
        SparkSession$implicits$ implicits = sparkSession.implicits();
        TypeTags universe = package$.MODULE$.universe();
        sparkSession.createDataset(arrayBuffer, implicits.newProductEncoder(universe.TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.DoubleFeature$$typecreator4$2
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("streaming.dsl.mmlib.algs.meta.OutlierValueMeta").asType().toTypeConstructor();
            }
        }))).write().mode(SaveMode.Overwrite).parquet(MetaConst$.MODULE$.OUTLIER_VALUE_PATH(str, getFieldGroupName(seq)));
        return (Dataset) create.elem;
    }

    public Function2<Object, String, Object> getModelOutlierValueForPredict(SparkSession sparkSession, String str, Seq<String> seq, Map<String, String> map) {
        Predef$ predef$ = Predef$.MODULE$;
        Predef$ predef$2 = Predef$.MODULE$;
        Dataset parquet = sparkSession.read().parquet(MetaConst$.MODULE$.OUTLIER_VALUE_PATH(str, getFieldGroupName(seq)));
        SparkSession$implicits$ implicits = sparkSession.implicits();
        TypeTags universe = package$.MODULE$.universe();
        return new DoubleFeature$$anonfun$2(sparkSession.sparkContext().broadcast(predef$.refArrayOps((Object[]) predef$2.refArrayOps((Object[]) parquet.as(implicits.newProductEncoder(universe.TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.DoubleFeature$$typecreator17$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("streaming.dsl.mmlib.algs.meta.OutlierValueMeta").asType().toTypeConstructor();
            }
        }))).collect()).map(new DoubleFeature$$anonfun$1(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).toMap(Predef$.MODULE$.$conforms()), ClassTag$.MODULE$.apply(Map.class)));
    }

    public Dataset<Row> vectorize(Dataset<Row> dataset, String str, Seq<String> seq, String str2) {
        return new VectorAssembler().setInputCols((String[]) seq.toArray(ClassTag$.MODULE$.apply(String.class))).setOutputCol(str2).transform(dataset);
    }

    public String vectorize$default$4() {
        return "_features_";
    }

    public Function1<Vector, Vector> getMinMaxModelForPredict(SparkSession sparkSession, Seq<String> seq, String str, Map<String, String> map) {
        Map map2 = Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) sparkSession.read().parquet(MetaConst$.MODULE$.MIN_MAX_PATH(str, getFieldGroupName(seq))).as(sparkSession.implicits().newProductEncoder(package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.DoubleFeature$$typecreator8$2
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("streaming.dsl.mmlib.algs.meta.MinMaxValueMeta").asType().toTypeConstructor();
            }
        }))).collect()).map(new DoubleFeature$$anonfun$3(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).toMap(Predef$.MODULE$.$conforms());
        double[] dArr = (double[]) ((TraversableOnce) seq.map(new DoubleFeature$$anonfun$4(map2), Seq$.MODULE$.canBuildFrom())).toArray(ClassTag$.MODULE$.Double());
        double[] array$mcD$sp = ((DenseVector) asBreeze(Vectors$.MODULE$.dense((double[]) ((TraversableOnce) seq.map(new DoubleFeature$$anonfun$5(map2), Seq$.MODULE$.canBuildFrom())).toArray(ClassTag$.MODULE$.Double()))).$minus(asBreeze(Vectors$.MODULE$.dense(dArr)), DenseVector$.MODULE$.canSubD())).toArray$mcD$sp(ClassTag$.MODULE$.Double());
        double d = new StringOps(Predef$.MODULE$.augmentString((String) map.getOrElse("min", new DoubleFeature$$anonfun$6()))).toDouble();
        double d2 = new StringOps(Predef$.MODULE$.augmentString((String) map.getOrElse("max", new DoubleFeature$$anonfun$7()))).toDouble() - d;
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"predict: ", " ", " ", " ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{Predef$.MODULE$.doubleArrayOps(array$mcD$sp).mkString(","), Predef$.MODULE$.doubleArrayOps(dArr).mkString(","), BoxesRunTime.boxToDouble(d2), BoxesRunTime.boxToDouble(d)})));
        return minMaxFunc(array$mcD$sp, dArr, d2, d);
    }

    public Function1<Vector, Vector> minMaxFunc(double[] dArr, double[] dArr2, double d, double d2) {
        return new DoubleFeature$$anonfun$minMaxFunc$1(dArr, dArr2, d, d2);
    }

    public Function1<Vector, Vector> baseRescaleFunc(Function1<Object, Object> function1) {
        return new DoubleFeature$$anonfun$baseRescaleFunc$1(function1);
    }

    public Dataset<Row> scale(Dataset<Row> dataset, String str, Seq<String> seq, String str2, Map<String, String> map) {
        Function1<Vector, Vector> doubleFeature$$anonfun$13;
        String tempCol = getTempCol();
        Dataset<Row> vectorize = vectorize(dataset, str, seq, tempCol);
        SparkSession sparkSession = dataset.sparkSession();
        DoubleFeature$$anonfun$8 doubleFeature$$anonfun$8 = new DoubleFeature$$anonfun$8();
        if ("min-max".equals(str2)) {
            MultivariateStatisticalSummary colStats = Statistics$.MODULE$.colStats(vectorize.select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col(tempCol)})).rdd().map(new DoubleFeature$$anonfun$9(), ClassTag$.MODULE$.apply(org.apache.spark.mllib.linalg.Vector.class)));
            Tuple2 tuple2 = new Tuple2(colStats.min().toArray(), colStats.max().toArray());
            if (tuple2 == null) {
                throw new MatchError(tuple2);
            }
            Tuple2 tuple22 = new Tuple2((double[]) tuple2._1(), (double[]) tuple2._2());
            double[] dArr = (double[]) tuple22._1();
            double[] dArr2 = (double[]) tuple22._2();
            sparkSession.createDataset(Predef$.MODULE$.wrapRefArray((MinMaxValueMeta[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.doubleArrayOps(dArr).zipWithIndex(Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).map(new DoubleFeature$$anonfun$10(seq, dArr, dArr2), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(MinMaxValueMeta.class)))), sparkSession.implicits().newProductEncoder(package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.DoubleFeature$$typecreator4$1
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    mirror.universe();
                    return mirror.staticClass("streaming.dsl.mmlib.algs.meta.MinMaxValueMeta").asType().toTypeConstructor();
                }
            }))).write().mode(SaveMode.Overwrite).parquet(MetaConst$.MODULE$.MIN_MAX_PATH(str, getFieldGroupName(seq)));
            double[] array$mcD$sp = ((DenseVector) asBreeze(Vectors$.MODULE$.dense(dArr2)).$minus(asBreeze(Vectors$.MODULE$.dense(dArr)), DenseVector$.MODULE$.canSubD())).toArray$mcD$sp(ClassTag$.MODULE$.Double());
            double d = new StringOps(Predef$.MODULE$.augmentString((String) map.getOrElse("min", new DoubleFeature$$anonfun$11()))).toDouble();
            double d2 = new StringOps(Predef$.MODULE$.augmentString((String) map.getOrElse("max", new DoubleFeature$$anonfun$12()))).toDouble() - d;
            Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"train: ", " ", " ", " ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{Predef$.MODULE$.doubleArrayOps(array$mcD$sp).mkString(","), Predef$.MODULE$.doubleArrayOps(dArr).mkString(","), BoxesRunTime.boxToDouble(d2), BoxesRunTime.boxToDouble(d)})));
            doubleFeature$$anonfun$13 = minMaxFunc(array$mcD$sp, dArr, d2, d);
        } else {
            doubleFeature$$anonfun$13 = "log2".equals(str2) ? new DoubleFeature$$anonfun$13(doubleFeature$$anonfun$8) : "logn".equals(str2) ? new DoubleFeature$$anonfun$14(doubleFeature$$anonfun$8) : "log10".equals(str2) ? new DoubleFeature$$anonfun$15(doubleFeature$$anonfun$8) : "sqrt".equals(str2) ? new DoubleFeature$$anonfun$16(doubleFeature$$anonfun$8) : "abs".equals(str2) ? new DoubleFeature$$anonfun$17(doubleFeature$$anonfun$8) : new DoubleFeature$$anonfun$18(doubleFeature$$anonfun$8);
        }
        return expandColumnsFromVector(replaceColumn(vectorize, tempCol, UserDefinedFunction$.MODULE$.apply(doubleFeature$$anonfun$13, SQLDataTypes$.MODULE$.VectorType(), new Some(Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new DataType[]{SQLDataTypes$.MODULE$.VectorType()}))))), seq, tempCol);
    }

    public Function1<Vector, Vector> getModelNormalizeForPredict(SparkSession sparkSession, String str, Seq<String> seq, String str2, Map<String, String> map) {
        DoubleFeature$$anonfun$19 pNormFunc;
        if ("standard".equals(str2)) {
            Predef$ predef$ = Predef$.MODULE$;
            Dataset parquet = sparkSession.read().parquet(MetaConst$.MODULE$.STANDARD_SCALER_PATH(str, getFieldGroupName(seq)));
            SparkSession$implicits$ implicits = sparkSession.implicits();
            TypeTags universe = package$.MODULE$.universe();
            StandardScalerValueMeta standardScalerValueMeta = (StandardScalerValueMeta) predef$.refArrayOps((Object[]) parquet.as(implicits.newProductEncoder(universe.TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.DoubleFeature$$typecreator13$1
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    mirror.universe();
                    return mirror.staticClass("streaming.dsl.mmlib.algs.meta.StandardScalerValueMeta").asType().toTypeConstructor();
                }
            }))).collect()).head();
            pNormFunc = new DoubleFeature$$anonfun$19(sparkSession.sparkContext().broadcast(new StandardScalerModel(org.apache.spark.mllib.linalg.Vectors$.MODULE$.dense(standardScalerValueMeta.std()), org.apache.spark.mllib.linalg.Vectors$.MODULE$.dense(standardScalerValueMeta.mean()), true, true), ClassTag$.MODULE$.apply(StandardScalerModel.class)));
        } else {
            if (!"p-norm".equals(str2)) {
                throw new MatchError(str2);
            }
            pNormFunc = getPNormFunc();
        }
        return pNormFunc;
    }

    public Dataset<Row> normalize(Dataset<Row> dataset, String str, Seq<String> seq, String str2, Map<String, String> map) {
        Dataset<Row> vectorize;
        Dataset<Row> expandColumnsFromVector;
        String tempCol = getTempCol();
        StructField apply = dataset.schema().apply((String) seq.head());
        SparkSession sparkSession = dataset.sparkSession();
        if (seq.size() == 1 && (apply.dataType() instanceof ArrayType)) {
            ArrayType dataType = apply.dataType();
            if (!(dataType instanceof ArrayType) || !DoubleType$.MODULE$.equals(dataType.elementType())) {
                throw new MatchError(dataType);
            }
            vectorize = dataset.withColumn(tempCol, functions$.MODULE$.udf(new DoubleFeature$$anonfun$20(), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.DoubleFeature$$typecreator1$1
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    mirror.universe();
                    return mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor();
                }
            }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.DoubleFeature$$typecreator2$1
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    Universe universe = mirror.universe();
                    return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala.collection.mutable").asModule().moduleClass()), mirror.staticClass("scala.collection.mutable.WrappedArray"), List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Types.TypeApi[]{mirror.staticClass("scala.Double").asType().toTypeConstructor()})));
                }
            })).apply(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col((String) seq.head())})));
        } else {
            vectorize = vectorize(dataset, str, seq, tempCol);
        }
        Dataset<Row> dataset2 = vectorize;
        String tempCol2 = getTempCol();
        if ("standard".equals(str2)) {
            org.apache.spark.ml.feature.StandardScalerModel fit = new StandardScaler().setInputCol(tempCol).setOutputCol(tempCol2).setWithStd(true).setWithMean(true).fit(dataset2);
            Tuple2 tuple2 = new Tuple2(fit.mean().toArray(), fit.std().toArray());
            if (tuple2 == null) {
                throw new MatchError(tuple2);
            }
            Tuple2 tuple22 = new Tuple2((double[]) tuple2._1(), (double[]) tuple2._2());
            sparkSession.createDataset(Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new StandardScalerValueMeta[]{new StandardScalerValueMeta(seq.mkString("_"), (double[]) tuple22._1(), (double[]) tuple22._2())})), sparkSession.implicits().newProductEncoder(package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.DoubleFeature$$typecreator6$1
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    mirror.universe();
                    return mirror.staticClass("streaming.dsl.mmlib.algs.meta.StandardScalerValueMeta").asType().toTypeConstructor();
                }
            }))).write().mode(SaveMode.Overwrite).parquet(MetaConst$.MODULE$.STANDARD_SCALER_PATH(str, getFieldGroupName(seq)));
            expandColumnsFromVector = expandColumnsFromVector(fit.transform(dataset2).drop(tempCol), seq, tempCol2);
        } else {
            if (!"p-norm".equals(str2)) {
                throw new MatchError(str2);
            }
            expandColumnsFromVector = expandColumnsFromVector(dataset2.withColumn(tempCol2, functions$.MODULE$.udf(getPNormFunc(), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.DoubleFeature$$typecreator8$1
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    mirror.universe();
                    return mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor();
                }
            }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: streaming.dsl.mmlib.algs.feature.DoubleFeature$$typecreator9$1
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    mirror.universe();
                    return mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor();
                }
            })).apply(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col(tempCol)}))).drop(tempCol), seq, tempCol2);
        }
        return expandColumnsFromVector;
    }

    public Function1<Vector, Vector> getPNormFunc() {
        return new DoubleFeature$$anonfun$21(new Normalizer(1.0d));
    }

    private DoubleFeature$() {
        MODULE$ = this;
        BaseFeatureFunctions.Cclass.$init$(this);
    }
}
