package org.opencb.oskar.spark.variant.transformers;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.ml.param.Param;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.expressions.MutableAggregationBuffer;
import org.apache.spark.sql.expressions.UserDefinedAggregateFunction;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.opencb.biodata.models.clinical.pedigree.Member;
import org.opencb.biodata.models.clinical.pedigree.Pedigree;
import org.opencb.biodata.models.variant.Genotype;
import org.opencb.biodata.models.variant.metadata.IndelLength;
import org.opencb.biodata.models.variant.metadata.SampleVariantStats;
import org.opencb.biodata.models.variant.stats.VariantStats;
import org.opencb.biodata.tools.pedigree.MendelianError;
import org.opencb.oskar.core.exceptions.OskarException;
import org.opencb.oskar.spark.variant.VariantMetadataManager;
import org.opencb.oskar.spark.variant.converters.VariantToRowConverter;
import org.opencb.oskar.spark.variant.transformers.params.HasStudyId;
import scala.Option;
import scala.Tuple2;
import scala.collection.Iterator;
import scala.collection.JavaConversions;
import scala.collection.JavaConverters;
import scala.collection.Seq;
import scala.collection.mutable.HashMap;

/* loaded from: input_file:org/opencb/oskar/spark/variant/transformers/SampleVariantStatsTransformer.class */
public class SampleVariantStatsTransformer extends AbstractTransformer implements HasStudyId {
    private final Param<List<String>> samplesParam;
    private Param<String> fileIdParam;

    /* loaded from: input_file:org/opencb/oskar/spark/variant/transformers/SampleVariantStatsTransformer$BufferUtils.class */
    private static class BufferUtils {
        public static final String STATS_COLNAME = "stats";
        public static final int SAMPLE_INDEX = 0;
        public static final int NUM_VARIANTS_INDEX = 1;
        public static final int CHROMOSOME_COUNT_INDEX = 2;
        public static final int TYPE_COUNT_INDEX = 3;
        public static final int GENOTYPE_COUNT_INDEX = 4;
        public static final int INDEL_LENGTH_COUNT_INDEX = 5;
        public static final int NUM_PASS_INDEX = 6;
        public static final int TRANSITIONS_INDEX = 7;
        public static final int TRANSVERSIONS_INDEX = 8;
        public static final int TI_TV_RATIO_INDEX = 9;
        public static final int QUALITY_COUNT_INDEX = 10;
        public static final int QUALITY_SUM_INDEX = 11;
        public static final int QUALITY_SUMSQ_INDEX = 12;
        public static final int MEAN_QUALITY_INDEX = 13;
        public static final int STD_DEV_QUALITY_INDEX = 14;
        public static final int MISSING_POSITIONS_INDEX = 15;
        public static final int HETEROZIGOSITY_RATE_INDEX = 16;
        public static final int MENDELIAN_ERROR_COUNT_INDEX = 17;
        public static final int CONSEQUENCE_TYPE_COUNT_INDEX = 18;
        public static final int BIOTYPE_COUNT_INDEX = 19;
        public static final String SAMPLE_COLNAME = "sample";
        public static final String NUM_VARIANTS_COLNAME = "numVariants";
        public static final String CHROMOSOME_COUNT_COLNAME = "chromosomeCount";
        public static final String TYPE_COUNT_COLNAME = "typeCount";
        public static final String GENOTYPE_COUNT_COLNAME = "genotypeCount";
        public static final String INDEL_LENGTH_COUNT_COLNAME = "indelLengthCount";
        public static final String NUM_PASS_COLNAME = "numPass";
        public static final String TRANSITIONS_COLNAME = "transitions";
        public static final String TRANSVERSIONS_COLNAME = "transversions";
        public static final String TI_TV_RATIO_COLNAME = "tiTvRatio";
        public static final String QUALITY_COUNT_COLNAME = "qualityCount";
        public static final String QUALITY_SUM_COLNAME = "qualitySum";
        public static final String QUALITY_SUMSQ_COLNAME = "qualitySumSq";
        public static final String MEAN_QUALITY_COLNAME = "meanQuality";
        public static final String STD_DEV_QUALITY_COLNAME = "stdDevQuality";
        public static final String MISSING_POSITIONS_COLNAME = "missingPositions";
        public static final String HETEROZIGOSITY_RATE_COLNAME = "heterozigosityRate";
        public static final String MENDELIAN_ERROR_COUNT_COLNAME = "mendelianErrorCount";
        public static final String BIOTYPE_COUNT_COLNAME = "biotypeCount";
        public static final String CONSEQUENCE_TYPE_COUNT_COLNAME = "consequenceTypeCount";
        static final StructType SAMPLE_VARIANT_STATS_SCHEMA = DataTypes.createStructType(new StructField[]{DataTypes.createStructField(SAMPLE_COLNAME, DataTypes.StringType, false), DataTypes.createStructField(NUM_VARIANTS_COLNAME, DataTypes.IntegerType, false), DataTypes.createStructField(CHROMOSOME_COUNT_COLNAME, DataTypes.createMapType(DataTypes.StringType, DataTypes.IntegerType, false), false), DataTypes.createStructField(TYPE_COUNT_COLNAME, DataTypes.createMapType(DataTypes.StringType, DataTypes.IntegerType, false), false), DataTypes.createStructField(GENOTYPE_COUNT_COLNAME, DataTypes.createMapType(DataTypes.StringType, DataTypes.IntegerType, false), false), DataTypes.createStructField(INDEL_LENGTH_COUNT_COLNAME, DataTypes.createMapType(DataTypes.IntegerType, DataTypes.IntegerType, false), false), DataTypes.createStructField(NUM_PASS_COLNAME, DataTypes.IntegerType, false), DataTypes.createStructField(TRANSITIONS_COLNAME, DataTypes.IntegerType, false), DataTypes.createStructField(TRANSVERSIONS_COLNAME, DataTypes.IntegerType, false), DataTypes.createStructField(TI_TV_RATIO_COLNAME, DataTypes.DoubleType, false), DataTypes.createStructField(QUALITY_COUNT_COLNAME, DataTypes.IntegerType, false), DataTypes.createStructField(QUALITY_SUM_COLNAME, DataTypes.DoubleType, false), DataTypes.createStructField(QUALITY_SUMSQ_COLNAME, DataTypes.DoubleType, false), DataTypes.createStructField(MEAN_QUALITY_COLNAME, DataTypes.DoubleType, false), DataTypes.createStructField(STD_DEV_QUALITY_COLNAME, DataTypes.DoubleType, false), DataTypes.createStructField(MISSING_POSITIONS_COLNAME, DataTypes.IntegerType, false), DataTypes.createStructField(HETEROZIGOSITY_RATE_COLNAME, DataTypes.DoubleType, false), DataTypes.createStructField(MENDELIAN_ERROR_COUNT_COLNAME, DataTypes.createMapType(DataTypes.IntegerType, DataTypes.IntegerType, false), false), DataTypes.createStructField(BIOTYPE_COUNT_COLNAME, DataTypes.createMapType(DataTypes.StringType, DataTypes.IntegerType, false), false), DataTypes.createStructField(CONSEQUENCE_TYPE_COUNT_COLNAME, DataTypes.createMapType(DataTypes.StringType, DataTypes.IntegerType, false), false)});
        static final StructType VARIANT_SAMPLE_STATS_BUFFER_SCHEMA = DataTypes.createStructType(new StructField[]{DataTypes.createStructField("stats", DataTypes.createArrayType(SAMPLE_VARIANT_STATS_SCHEMA, false), false)});

        private BufferUtils() {
        }

        public static void initialize(MutableAggregationBuffer mutableAggregationBuffer, int i) {
            LinkedList linkedList = new LinkedList();
            for (int i2 = 0; i2 < i; i2++) {
                linkedList.add(RowFactory.create(new Object[]{"", 0, new HashMap(), new HashMap(), new HashMap(), new HashMap(), 0, 0, 0, Double.valueOf(0.0d), 0, Double.valueOf(0.0d), Double.valueOf(0.0d), Double.valueOf(0.0d), Double.valueOf(0.0d), 0, Double.valueOf(0.0d), new HashMap(), new HashMap(), new HashMap()}));
            }
            mutableAggregationBuffer.update(0, ((Iterator) JavaConverters.asScalaIteratorConverter(linkedList.iterator()).asScala()).toSeq());
        }

        public static void merge(MutableAggregationBuffer mutableAggregationBuffer, Row row, int i) {
            LinkedList linkedList = new LinkedList();
            for (int i2 = 0; i2 < i; i2++) {
                Row row2 = (Row) mutableAggregationBuffer.getList(0).get(i2);
                Row row3 = (Row) row.getList(0).get(i2);
                linkedList.add(RowFactory.create(new Object[]{row2.getString(0), Integer.valueOf(row2.getInt(1) + row3.getInt(1)), row2.getMap(2).$plus$plus(row3.getMap(2)), row2.getMap(3).$plus$plus(row3.getMap(3)), row2.getMap(4).$plus$plus(row3.getMap(4)), row2.getMap(5).$plus$plus(row3.getMap(5)), Integer.valueOf(row2.getInt(6) + row3.getInt(6)), Integer.valueOf(row2.getInt(7) + row3.getInt(7)), Integer.valueOf(row2.getInt(8) + row3.getInt(8)), Double.valueOf(0.0d), Integer.valueOf(row2.getInt(10) + row3.getInt(10)), Double.valueOf(row2.getDouble(11) + row3.getDouble(11)), Double.valueOf(row2.getDouble(12) + row3.getDouble(12)), Double.valueOf(0.0d), Double.valueOf(0.0d), Integer.valueOf(row2.getInt(15) + row3.getInt(15)), Double.valueOf(0.0d), row2.getMap(17).$plus$plus(row3.getMap(17)), row2.getMap(18).$plus$plus(row3.getMap(18)), row2.getMap(19).$plus$plus(row3.getMap(19))}));
            }
            mutableAggregationBuffer.update(0, ((Iterator) JavaConverters.asScalaIteratorConverter(linkedList.iterator()).asScala()).toSeq());
        }
    }

    /* loaded from: input_file:org/opencb/oskar/spark/variant/transformers/SampleVariantStatsTransformer$SampleVariantStatsFunction.class */
    private static class SampleVariantStatsFunction extends UserDefinedAggregateFunction {
        private final String studyId;
        private final String fileId;
        private final List<String> samples;
        private final Map<String, Integer> samplesPos;
        private final Map<String, Tuple2<String, String>> validChildren;
        private String sampleId;
        private Integer numVariants;
        private scala.collection.Map<String, Integer> chromosomeCount;
        private scala.collection.Map<String, Integer> typeCount;
        private scala.collection.Map<String, Integer> genotypeCount;
        private scala.collection.Map<Integer, Integer> indelLengthCount;
        private Integer numPass;
        private Integer transitions;
        private Integer transversions;
        private Integer qualityCount;
        private Double qualitySum;
        private Double qualitySumSq;
        private Integer missingPositions;
        private scala.collection.Map<Integer, Integer> mendelianErrorCount;
        private scala.collection.Map<String, Integer> consequenceTypeCount;
        private scala.collection.Map<String, Integer> biotypeCount;

        SampleVariantStatsFunction(String str, String str2, List<String> list, List<Pedigree> list2) {
            this.studyId = (str == null || str.isEmpty()) ? null : str;
            this.fileId = (str2 == null || str2.isEmpty()) ? null : str2;
            this.samples = list;
            this.samplesPos = new LinkedHashMap();
            java.util.Iterator<String> it = list.iterator();
            while (it.hasNext()) {
                this.samplesPos.put(it.next(), Integer.valueOf(this.samplesPos.size()));
            }
            this.validChildren = new java.util.HashMap();
            if (list2 != null) {
                java.util.Iterator<Pedigree> it2 = list2.iterator();
                while (it2.hasNext()) {
                    for (Member member : it2.next().getMembers()) {
                        if (member.getFather() != null || member.getMother() != null) {
                            this.validChildren.put(member.getId(), new Tuple2<>(member.getFather() == null ? null : member.getFather().getId(), member.getMother() == null ? null : member.getMother().getId()));
                        }
                    }
                }
            }
        }

        public StructType inputSchema() {
            return DataTypes.createStructType(new StructField[]{DataTypes.createStructField("chromosome", DataTypes.StringType, false), DataTypes.createStructField("reference", DataTypes.StringType, false), DataTypes.createStructField("alternate", DataTypes.StringType, false), DataTypes.createStructField("type", DataTypes.StringType, false), DataTypes.createStructField("length", DataTypes.IntegerType, false), DataTypes.createStructField("studies", DataTypes.createArrayType(VariantToRowConverter.STUDY_DATA_TYPE), false), DataTypes.createStructField("annotation", VariantToRowConverter.ANNOTATION_DATA_TYPE, true), DataTypes.createStructField("sampleIndices", DataTypes.createArrayType(DataTypes.IntegerType), false)});
        }

        public StructType bufferSchema() {
            return BufferUtils.VARIANT_SAMPLE_STATS_BUFFER_SCHEMA;
        }

        public DataType dataType() {
            return BufferUtils.VARIANT_SAMPLE_STATS_BUFFER_SCHEMA;
        }

        public boolean deterministic() {
            return true;
        }

        public void initialize(MutableAggregationBuffer mutableAggregationBuffer) {
            BufferUtils.initialize(mutableAggregationBuffer, this.samples.size());
        }

        public void update(MutableAggregationBuffer mutableAggregationBuffer, Row row) {
            String string = row.getString(0);
            String string2 = row.getString(1);
            String string3 = row.getString(2);
            String string4 = row.getString(3);
            int i = row.getInt(4);
            Seq seq = row.getSeq(5);
            Row struct = row.getStruct(6);
            Row row2 = null;
            if (!StringUtils.isEmpty(this.studyId)) {
                for (int i2 = 0; i2 < seq.length(); i2++) {
                    Row row3 = (Row) seq.apply(i2);
                    if (this.studyId.equals(row3.getString(row3.fieldIndex("studyId")))) {
                        row2 = row3;
                    }
                }
                if (row2 == null) {
                    throw new IllegalArgumentException("Study not found: " + this.studyId);
                }
            } else {
                if (seq.length() != 1) {
                    throw new IllegalArgumentException("Only 1 study expected. Found " + seq.length());
                }
                row2 = (Row) seq.apply(0);
            }
            ArrayList arrayList = new ArrayList();
            Seq seq2 = row2.getSeq(row2.fieldIndex("samplesData"));
            for (int i3 = 0; i3 < this.samples.size(); i3++) {
                this.sampleId = this.samples.get(i3);
                initCounters(mutableAggregationBuffer, i3);
                String str = (String) ((Seq) seq2.apply(this.samplesPos.get(this.sampleId))).apply(0);
                if (str.contains(".")) {
                    Integer num = this.missingPositions;
                    this.missingPositions = Integer.valueOf(this.missingPositions.intValue() + 1);
                }
                Tuple2<String, String> tuple2 = this.validChildren.get(this.sampleId);
                if (tuple2 != null) {
                    int compute = MendelianError.compute(this.samplesPos.get(tuple2._1()) == null ? null : new Genotype((String) ((Seq) seq2.apply(this.samplesPos.get(tuple2._1()))).apply(0)), this.samplesPos.get(tuple2._2()) == null ? null : new Genotype((String) ((Seq) seq2.apply(this.samplesPos.get(tuple2._2()))).apply(0)), new Genotype(str), string);
                    if (compute > 0) {
                        this.mendelianErrorCount = updated(this.mendelianErrorCount, Integer.valueOf(compute));
                    }
                }
                if (Genotype.hasMainAlternate(str)) {
                    Integer num2 = this.numVariants;
                    this.numVariants = Integer.valueOf(this.numVariants.intValue() + 1);
                    this.genotypeCount = updated(this.genotypeCount, str);
                    this.chromosomeCount = updated(this.chromosomeCount, string);
                    this.typeCount = updated(this.typeCount, string4);
                    if ("INDEL".equals(string4) && i > 0) {
                        this.indelLengthCount = updated(this.indelLengthCount, Integer.valueOf(i));
                    }
                    if (VariantStats.isTransition(string2, string3)) {
                        Integer num3 = this.transitions;
                        this.transitions = Integer.valueOf(this.transitions.intValue() + 1);
                    }
                    if (VariantStats.isTransversion(string2, string3)) {
                        Integer num4 = this.transversions;
                        this.transversions = Integer.valueOf(this.transversions.intValue() + 1);
                    }
                    Seq seq3 = row2.getSeq(row2.fieldIndex("files"));
                    if (seq3 != null) {
                        int i4 = 0;
                        while (true) {
                            if (i4 >= seq3.length()) {
                                break;
                            }
                            Row row4 = (Row) seq3.apply(i4);
                            if (this.fileId == null || !row4.getString(row4.fieldIndex("fileId")).equals(this.fileId)) {
                                i4++;
                            } else {
                                scala.collection.Map map = row4.getMap(row4.fieldIndex("attributes"));
                                Option option = map.get("FILTER");
                                if (option.isDefined() && "PASS".equals(option.get())) {
                                    Integer num5 = this.numPass;
                                    this.numPass = Integer.valueOf(this.numPass.intValue() + 1);
                                }
                                Option option2 = map.get("QUAL");
                                if (option2.isDefined() && !".".equals(option2.get())) {
                                    double doubleValue = Double.valueOf((String) option2.get()).doubleValue();
                                    Integer num6 = this.qualityCount;
                                    this.qualityCount = Integer.valueOf(this.qualityCount.intValue() + 1);
                                    this.qualitySum = Double.valueOf(this.qualitySum.doubleValue() + doubleValue);
                                    this.qualitySumSq = Double.valueOf(this.qualitySumSq.doubleValue() + (doubleValue * doubleValue));
                                }
                            }
                        }
                    }
                    if (struct != null) {
                        HashSet hashSet = new HashSet();
                        HashSet hashSet2 = new HashSet();
                        Seq seq4 = struct.getSeq(struct.fieldIndex("consequenceTypes"));
                        if (seq4 != null) {
                            for (int i5 = 0; i5 < seq4.length(); i5++) {
                                Row row5 = (Row) seq4.apply(i5);
                                String string5 = row5.getString(row5.fieldIndex("biotype"));
                                if (StringUtils.isNotEmpty(string5)) {
                                    hashSet.add(string5);
                                }
                                Seq seq5 = row5.getSeq(row5.fieldIndex("sequenceOntologyTerms"));
                                if (seq5 != null) {
                                    for (int i6 = 0; i6 < seq5.length(); i6++) {
                                        hashSet2.add(((Row) seq5.apply(i6)).getString(1));
                                    }
                                }
                            }
                        }
                        java.util.Iterator it = hashSet.iterator();
                        while (it.hasNext()) {
                            this.biotypeCount = updated(this.biotypeCount, (String) it.next());
                        }
                        java.util.Iterator it2 = hashSet2.iterator();
                        while (it2.hasNext()) {
                            this.consequenceTypeCount = updated(this.consequenceTypeCount, (String) it2.next());
                        }
                    }
                }
                arrayList.add(RowFactory.create(new Object[]{this.sampleId, this.numVariants, this.chromosomeCount, this.typeCount, this.genotypeCount, this.indelLengthCount, this.numPass, this.transitions, this.transversions, Double.valueOf(0.0d), this.qualityCount, Double.valueOf(0.0d), Double.valueOf(0.0d), Double.valueOf(0.0d), Double.valueOf(0.0d), this.missingPositions, Double.valueOf(0.0d), this.mendelianErrorCount, this.consequenceTypeCount, this.biotypeCount}));
            }
            mutableAggregationBuffer.update(0, ((Iterator) JavaConverters.asScalaIteratorConverter(arrayList.iterator()).asScala()).toSeq());
        }

        private void initCounters(MutableAggregationBuffer mutableAggregationBuffer, int i) {
            Row row = (Row) mutableAggregationBuffer.getList(0).get(i);
            this.numVariants = Integer.valueOf(row.getInt(1));
            this.chromosomeCount = row.getMap(2);
            this.typeCount = row.getMap(3);
            this.genotypeCount = row.getMap(4);
            this.indelLengthCount = row.getMap(5);
            this.numPass = Integer.valueOf(row.getInt(6));
            this.transitions = Integer.valueOf(row.getInt(7));
            this.transversions = Integer.valueOf(row.getInt(8));
            this.qualityCount = Integer.valueOf(row.getInt(10));
            this.qualitySum = Double.valueOf(row.getDouble(11));
            this.qualitySumSq = Double.valueOf(row.getDouble(12));
            this.missingPositions = Integer.valueOf(row.getInt(15));
            this.mendelianErrorCount = row.getMap(17);
            this.consequenceTypeCount = row.getMap(18);
            this.biotypeCount = row.getMap(19);
        }

        private <T> scala.collection.Map<T, Integer> updated(scala.collection.Map<T, Integer> map, T t) {
            return !map.contains(t) ? map.$plus(new Tuple2(t, 1)) : map.updated(t, Integer.valueOf(((Integer) map.apply(t)).intValue() + 1));
        }

        public void merge(MutableAggregationBuffer mutableAggregationBuffer, Row row) {
            BufferUtils.merge(mutableAggregationBuffer, row, this.samples.size());
        }

        /* renamed from: evaluate, reason: merged with bridge method [inline-methods] */
        public Row m17evaluate(Row row) {
            LinkedList linkedList = new LinkedList();
            for (int i = 0; i < this.samples.size(); i++) {
                Row row2 = (Row) row.getList(0).get(i);
                int i2 = row2.getInt(1);
                scala.collection.Map map = row2.getMap(2);
                scala.collection.Map map2 = row2.getMap(3);
                scala.collection.Map map3 = row2.getMap(4);
                scala.collection.Map map4 = row2.getMap(5);
                int i3 = row2.getInt(6);
                int i4 = row2.getInt(7);
                int i5 = row2.getInt(8);
                double d = (1.0d * i4) / i5;
                int i6 = row2.getInt(10);
                double d2 = row2.getDouble(11);
                double d3 = row2.getDouble(12);
                double d4 = d2 / i6;
                double sqrt = Math.sqrt((d3 / i6) - (d4 * d4));
                int i7 = row2.getInt(15);
                int i8 = 0;
                Iterator iterator = map3.toIterator();
                while (iterator.hasNext()) {
                    Tuple2 tuple2 = (Tuple2) iterator.next();
                    if (Genotype.isHet((String) tuple2._1)) {
                        i8 += ((Integer) tuple2._2).intValue();
                    }
                }
                linkedList.add(RowFactory.create(new Object[]{row2.getString(0), Integer.valueOf(i2), map, map2, map3, map4, Integer.valueOf(i3), Integer.valueOf(i4), Integer.valueOf(i5), Double.valueOf(d), Integer.valueOf(i6), Double.valueOf(d2), Double.valueOf(d3), Double.valueOf(d4), Double.valueOf(sqrt), Integer.valueOf(i7), Double.valueOf((1.0d * i8) / i2), row2.getMap(17), row2.getMap(18), row2.getMap(19)}));
            }
            return RowFactory.create(new Object[]{linkedList});
        }
    }

    public SampleVariantStatsTransformer() {
        this(null);
    }

    public SampleVariantStatsTransformer(String str) {
        super(str);
        this.samplesParam = new Param<>(this, "samplesIds", "List of samplesIds");
        setDefault(samplesParam(), Collections.emptyList());
        setDefault(studyIdParam(), "");
        setDefault(fileIdParam(), "");
    }

    public static List<SampleVariantStats> toSampleVariantStats(Dataset<Row> dataset) {
        ArrayList arrayList = new ArrayList();
        for (Row row : dataset.collectAsList()) {
            SampleVariantStats sampleVariantStats = new SampleVariantStats();
            sampleVariantStats.setId(row.getString(0));
            sampleVariantStats.setVariantCount(Integer.valueOf(row.getInt(1)));
            sampleVariantStats.setChromosomeCount(JavaConversions.mapAsJavaMap(row.getMap(2)));
            sampleVariantStats.setTypeCount(JavaConversions.mapAsJavaMap(row.getMap(3)));
            sampleVariantStats.setGenotypeCount(JavaConversions.mapAsJavaMap(row.getMap(4)));
            IndelLength indelLength = new IndelLength(0, 0, 0, 0, 0);
            for (Map.Entry entry : JavaConversions.mapAsJavaMap(row.getMap(5)).entrySet()) {
                int intValue = ((Integer) entry.getKey()).intValue();
                int intValue2 = ((Integer) entry.getValue()).intValue();
                if (intValue < 5) {
                    indelLength.setLt5(Integer.valueOf(indelLength.getLt5().intValue() + intValue2));
                } else if (intValue < 10) {
                    indelLength.setLt10(Integer.valueOf(indelLength.getLt10().intValue() + intValue2));
                } else if (intValue < 15) {
                    indelLength.setLt15(Integer.valueOf(indelLength.getLt15().intValue() + intValue2));
                } else if (intValue < 20) {
                    indelLength.setLt20(Integer.valueOf(indelLength.getLt20().intValue() + intValue2));
                } else {
                    indelLength.setGte20(Integer.valueOf(indelLength.getGte20().intValue() + intValue2));
                }
            }
            sampleVariantStats.setIndelLengthCount(indelLength);
            sampleVariantStats.setTiTvRatio(Float.valueOf((float) row.getDouble(9)));
            sampleVariantStats.setQualityAvg(Float.valueOf((float) row.getDouble(13)));
            sampleVariantStats.setQualityStdDev(Float.valueOf((float) row.getDouble(14)));
            sampleVariantStats.setConsequenceTypeCount(JavaConversions.mapAsJavaMap(row.getMap(18)));
            sampleVariantStats.setBiotypeCount(JavaConversions.mapAsJavaMap(row.getMap(19)));
            arrayList.add(sampleVariantStats);
        }
        return arrayList;
    }

    public Param<List<String>> samplesParam() {
        return this.samplesParam;
    }

    public SampleVariantStatsTransformer setSamples(List<String> list) {
        set(this.samplesParam, list);
        return this;
    }

    public SampleVariantStatsTransformer setSamples(String... strArr) {
        set(this.samplesParam, Arrays.asList(strArr));
        return this;
    }

    public List<String> getSamples() {
        return (List) getOrDefault(this.samplesParam);
    }

    @Override // org.opencb.oskar.spark.variant.transformers.params.HasStudyId
    public SampleVariantStatsTransformer setStudyId(String str) {
        set(studyIdParam(), str);
        return this;
    }

    public Param<String> fileIdParam() {
        Param<String> param = this.fileIdParam == null ? new Param<>(this, "fileId", "") : this.fileIdParam;
        this.fileIdParam = param;
        return param;
    }

    public SampleVariantStatsTransformer setFileId(String str) {
        set(fileIdParam(), str);
        return this;
    }

    public String getFileId() {
        return (String) getOrDefault(fileIdParam());
    }

    public Dataset<Row> transform(Dataset<?> dataset) {
        List<String> samples = getSamples();
        VariantMetadataManager variantMetadataManager = new VariantMetadataManager();
        String studyId = getStudyId();
        if (StringUtils.isEmpty(studyId)) {
            List<String> studies = variantMetadataManager.studies((Dataset<Row>) dataset);
            if (studies.size() != 1) {
                throw OskarException.missingStudy(studies);
            }
            studyId = studies.get(0);
        }
        if (samples.isEmpty()) {
            samples = variantMetadataManager.samples(dataset, studyId);
            if (samples.isEmpty()) {
                throw OskarException.missingParam(this.samplesParam.name());
            }
        }
        return dataset.agg(new SampleVariantStatsFunction(studyId, getFileId(), samples, variantMetadataManager.pedigrees(dataset, studyId)).apply(new Column[]{functions.col("chromosome"), functions.col("reference"), functions.col("alternate"), functions.col("type"), functions.col("length"), functions.col("studies"), functions.col("annotation")}).alias("stats"), new Column[0]).selectExpr(new String[]{"stats.*"}).withColumn("stats", functions.explode(functions.col("stats"))).selectExpr(new String[]{"stats.*"});
    }
}
