package org.apache.tez.analyzer.plugins;

import com.google.common.collect.Lists;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Objects;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ToolRunner;
import org.apache.tez.analyzer.Analyzer;
import org.apache.tez.analyzer.CSVResult;
import org.apache.tez.common.counters.TaskCounter;
import org.apache.tez.common.counters.TezCounter;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.history.parser.datamodel.DagInfo;
import org.apache.tez.history.parser.datamodel.TaskAttemptInfo;
import org.apache.tez.history.parser.datamodel.VertexInfo;

/* loaded from: input_file:org/apache/tez/analyzer/plugins/SkewAnalyzer.class */
public class SkewAnalyzer extends TezAnalyzerBase implements Analyzer {
    private static final String SHUFFLE_BYTES_PER_ATTEMPT_PER_SOURCE = "tez.skew-analyzer.shuffle.bytes.per.source";
    private static final long SHUFFLE_BYTES_PER_ATTEMPT_PER_SOURCE_DEFAULT = 943718400;
    private static final String ATTEMPT_SHUFFLE_KEY_GROUP_MIN_RATIO = "tez.skew-analyzer.shuffle.key.group.min.ratio";
    private static final float ATTEMPT_SHUFFLE_KEY_GROUP_MIN_RATIO_DEFAULT = 0.2f;
    private static final String ATTEMPT_SHUFFLE_KEY_GROUP_MAX_RATIO = "tez.skew-analyzer.shuffle.key.group.max.ratio";
    private static final float ATTEMPT_SHUFFLE_KEY_GROUP_MAX_RATIO_DEFAULT = 0.4f;
    private static final String[] headers = {"vertexName", "taskAttemptId", "counterGroup", "node", "REDUCE_INPUT_GROUPS", "REDUCE_INPUT_RECORDS", "ratio", "SHUFFLE_BYTES", "timeTaken", "observation"};
    private final CSVResult csvResult;
    private final float minRatio;
    private final float maxRatio;
    private final long maxShuffleBytesPerSource;

    public SkewAnalyzer(Configuration configuration) {
        super(configuration);
        this.csvResult = new CSVResult(headers);
        this.maxRatio = configuration.getFloat(ATTEMPT_SHUFFLE_KEY_GROUP_MAX_RATIO, ATTEMPT_SHUFFLE_KEY_GROUP_MAX_RATIO_DEFAULT);
        this.minRatio = configuration.getFloat(ATTEMPT_SHUFFLE_KEY_GROUP_MIN_RATIO, ATTEMPT_SHUFFLE_KEY_GROUP_MIN_RATIO_DEFAULT);
        this.maxShuffleBytesPerSource = configuration.getLong(SHUFFLE_BYTES_PER_ATTEMPT_PER_SOURCE, SHUFFLE_BYTES_PER_ATTEMPT_PER_SOURCE_DEFAULT);
    }

    @Override // org.apache.tez.analyzer.Analyzer
    public void analyze(DagInfo dagInfo) throws TezException {
        analyzeReducers((DagInfo) Objects.requireNonNull(dagInfo, "DAG can't be null"));
    }

    private void analyzeReducers(DagInfo dagInfo) {
        Iterator it = dagInfo.getVertices().iterator();
        while (it.hasNext()) {
            for (TaskAttemptInfo taskAttemptInfo : ((VertexInfo) it.next()).getTaskAttempts()) {
                analyzeGroupSkewPerSource(taskAttemptInfo);
                analyzeRecordSkewPerSource(taskAttemptInfo);
                analyzeForParallelism(taskAttemptInfo);
            }
        }
    }

    private void analyzeGroupSkewPerSource(TaskAttemptInfo taskAttemptInfo) {
        Map counter = taskAttemptInfo.getCounter(TaskCounter.REDUCE_INPUT_GROUPS.toString());
        Map counter2 = taskAttemptInfo.getCounter(TaskCounter.REDUCE_INPUT_RECORDS.toString());
        Map counter3 = taskAttemptInfo.getCounter(TaskCounter.SHUFFLE_BYTES.toString());
        for (Map.Entry entry : counter.entrySet()) {
            if (!((String) entry.getKey()).equals(TaskCounter.class.getName())) {
                String str = (String) entry.getKey();
                long value = ((TezCounter) entry.getValue()).getValue();
                long value2 = counter2.get(str) != null ? ((TezCounter) counter2.get(str)).getValue() : 0L;
                long value3 = counter3.get(str) != null ? ((TezCounter) counter3.get(str)).getValue() : 0L;
                float f = (((float) value) * 1.0f) / ((float) value2);
                if (value3 > this.maxShuffleBytesPerSource && f < this.minRatio) {
                    LinkedList newLinkedList = Lists.newLinkedList();
                    newLinkedList.add(taskAttemptInfo.getTaskInfo().getVertexInfo().getVertexName());
                    newLinkedList.add(taskAttemptInfo.getTaskAttemptId());
                    newLinkedList.add(str);
                    newLinkedList.add(taskAttemptInfo.getNodeId());
                    newLinkedList.add(value + "");
                    newLinkedList.add(value2 + "");
                    newLinkedList.add(f + "");
                    newLinkedList.add(value3 + "");
                    newLinkedList.add(taskAttemptInfo.getTimeTaken() + "");
                    newLinkedList.add("Please check partitioning. Otherwise consider increasing memLimit");
                    this.csvResult.addRecord((String[]) newLinkedList.toArray(new String[newLinkedList.size()]));
                }
            }
        }
    }

    private void analyzeRecordSkewPerSource(TaskAttemptInfo taskAttemptInfo) {
        Map counter = taskAttemptInfo.getTaskInfo().getVertexInfo().getCounter(TaskCounter.REDUCE_INPUT_RECORDS.toString());
        int numTasks = taskAttemptInfo.getTaskInfo().getVertexInfo().getNumTasks();
        Map counter2 = taskAttemptInfo.getCounter(TaskCounter.REDUCE_INPUT_GROUPS.toString());
        Map counter3 = taskAttemptInfo.getCounter(TaskCounter.REDUCE_INPUT_RECORDS.toString());
        Map counter4 = taskAttemptInfo.getCounter(TaskCounter.SHUFFLE_BYTES.toString());
        for (Map.Entry entry : counter2.entrySet()) {
            if (!((String) entry.getKey()).equals(TaskCounter.class.getName())) {
                String str = (String) entry.getKey();
                long value = ((TezCounter) entry.getValue()).getValue();
                long value2 = counter3.get(str) != null ? ((TezCounter) counter3.get(str)).getValue() : 0L;
                long value3 = counter4.get(str) != null ? ((TezCounter) counter4.get(str)).getValue() : 0L;
                long value4 = counter.get(str) != null ? ((TezCounter) counter.get(str)).getValue() : 0L;
                float f = (((float) value2) * 1.0f) / ((float) value4);
                if (numTasks > 1 && f > this.maxRatio && ((float) value2) > ((float) value4) * 0.6f) {
                    LinkedList newLinkedList = Lists.newLinkedList();
                    newLinkedList.add(taskAttemptInfo.getTaskInfo().getVertexInfo().getVertexName());
                    newLinkedList.add(taskAttemptInfo.getTaskAttemptId());
                    newLinkedList.add(str);
                    newLinkedList.add(taskAttemptInfo.getNodeId());
                    newLinkedList.add(value + "");
                    newLinkedList.add(value2 + "");
                    newLinkedList.add(f + "");
                    newLinkedList.add(value3 + "");
                    newLinkedList.add(taskAttemptInfo.getTimeTaken() + "");
                    newLinkedList.add("Some task attempts are getting > 60% of reduce input records. Consider adjusting parallelism & check partition logic");
                    this.csvResult.addRecord((String[]) newLinkedList.toArray(new String[newLinkedList.size()]));
                }
            }
        }
    }

    private void analyzeForParallelism(TaskAttemptInfo taskAttemptInfo) {
        Map counter = taskAttemptInfo.getCounter(TaskCounter.REDUCE_INPUT_GROUPS.toString());
        Map counter2 = taskAttemptInfo.getCounter(TaskCounter.REDUCE_INPUT_RECORDS.toString());
        Map counter3 = taskAttemptInfo.getCounter(TaskCounter.SHUFFLE_BYTES.toString());
        for (Map.Entry entry : counter.entrySet()) {
            if (!((String) entry.getKey()).equals(TaskCounter.class.getName())) {
                String str = (String) entry.getKey();
                long value = ((TezCounter) entry.getValue()).getValue();
                long value2 = counter2.get(str) != null ? ((TezCounter) counter2.get(str)).getValue() : 0L;
                long value3 = counter3.get(str) != null ? ((TezCounter) counter3.get(str)).getValue() : 0L;
                float f = (((float) value) * 1.0f) / ((float) value2);
                if (value3 > SHUFFLE_BYTES_PER_ATTEMPT_PER_SOURCE_DEFAULT && f > this.minRatio && f < this.maxRatio) {
                    LinkedList newLinkedList = Lists.newLinkedList();
                    newLinkedList.add(taskAttemptInfo.getTaskInfo().getVertexInfo().getVertexName());
                    newLinkedList.add(taskAttemptInfo.getTaskAttemptId());
                    newLinkedList.add(str);
                    newLinkedList.add(taskAttemptInfo.getNodeId());
                    newLinkedList.add(value + "");
                    newLinkedList.add(value2 + "");
                    newLinkedList.add(f + "");
                    newLinkedList.add(value3 + "");
                    newLinkedList.add(taskAttemptInfo.getTimeTaken() + "");
                    newLinkedList.add("Consider increasing parallelism.");
                    this.csvResult.addRecord((String[]) newLinkedList.toArray(new String[newLinkedList.size()]));
                }
            }
        }
    }

    @Override // org.apache.tez.analyzer.Analyzer
    public CSVResult getResult() throws TezException {
        return this.csvResult;
    }

    @Override // org.apache.tez.analyzer.Analyzer
    public String getName() {
        return "Skew Analyzer";
    }

    @Override // org.apache.tez.analyzer.Analyzer
    public String getDescription() {
        return "Analyze reducer skews by mining reducer task counters";
    }

    public static void main(String[] strArr) throws Exception {
        Configuration configuration = new Configuration();
        SkewAnalyzer skewAnalyzer = new SkewAnalyzer(configuration);
        int run = ToolRunner.run(configuration, skewAnalyzer, strArr);
        skewAnalyzer.printResults();
        System.exit(run);
    }
}
