package com.google.cloud.dataflow.sdk.io.hdfs;

import com.google.cloud.dataflow.sdk.coders.AvroCoder;
import com.google.cloud.dataflow.sdk.coders.Coder;
import com.google.cloud.dataflow.sdk.coders.CoderException;
import com.google.cloud.dataflow.sdk.coders.KvCoder;
import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
import com.google.cloud.dataflow.sdk.coders.VoidCoder;
import com.google.cloud.dataflow.sdk.io.BoundedSource;
import com.google.cloud.dataflow.sdk.options.PipelineOptions;
import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
import com.google.cloud.dataflow.sdk.util.CoderUtils;
import com.google.cloud.dataflow.sdk.values.KV;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.NoSuchElementException;
import javax.annotation.Nullable;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapreduce.AvroKeyInputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;

/* loaded from: input_file:com/google/cloud/dataflow/sdk/io/hdfs/HDFSFileSource.class */
public class HDFSFileSource<T, K, V> extends BoundedSource<T> {
    private static final long serialVersionUID = 0;
    private final String filepattern;
    private final Class<? extends FileInputFormat<K, V>> formatClass;
    private final Coder<T> coder;
    private final SerializableFunction<KV<K, V>, T> inputConverter;
    private final SerializableConfiguration serializableConfiguration;
    private final SerializableSplit serializableSplit;
    private final boolean validate;

    /* loaded from: input_file:com/google/cloud/dataflow/sdk/io/hdfs/HDFSFileSource$HDFSFileReader.class */
    private static class HDFSFileReader<T, K, V> extends BoundedSource.BoundedReader<T> {
        private final HDFSFileSource<T, K, V> source;
        private final String filepattern;
        private final Class<? extends FileInputFormat<K, V>> formatClass;
        private final Job job = Job.getInstance();
        private List<InputSplit> splits;
        private ListIterator<InputSplit> splitsIterator;
        private Configuration conf;
        private FileInputFormat<?, ?> format;
        private TaskAttemptContext attemptContext;
        private RecordReader<K, V> currentReader;
        private KV<K, V> currentPair;

        HDFSFileReader(HDFSFileSource<T, K, V> hDFSFileSource, String str, Class<? extends FileInputFormat<K, V>> cls, SerializableSplit serializableSplit) throws IOException {
            this.source = hDFSFileSource;
            this.filepattern = str;
            this.formatClass = cls;
            if (((HDFSFileSource) hDFSFileSource).serializableConfiguration != null) {
                Iterator it = ((HDFSFileSource) hDFSFileSource).serializableConfiguration.get().iterator();
                while (it.hasNext()) {
                    Map.Entry entry = (Map.Entry) it.next();
                    this.job.getConfiguration().set((String) entry.getKey(), (String) entry.getValue());
                }
            }
            if (serializableSplit != null) {
                this.splits = ImmutableList.of(serializableSplit.getSplit());
                this.splitsIterator = this.splits.listIterator();
            }
        }

        public boolean start() throws IOException {
            FileInputFormat.addInputPath(this.job, new Path(this.filepattern));
            this.conf = this.job.getConfiguration();
            try {
                this.format = this.formatClass.newInstance();
                this.attemptContext = new TaskAttemptContextImpl(this.conf, new TaskAttemptID());
                if (this.splitsIterator == null) {
                    this.splits = this.format.getSplits(this.job);
                    this.splitsIterator = this.splits.listIterator();
                }
                return advance();
            } catch (IllegalAccessException | InstantiationException e) {
                throw new IOException("Cannot instantiate file input format " + this.formatClass, e);
            }
        }

        public boolean advance() throws IOException {
            try {
                if (this.currentReader != null && this.currentReader.nextKeyValue()) {
                    this.currentPair = nextPair();
                    return true;
                }
                while (this.splitsIterator.hasNext()) {
                    InputSplit next = this.splitsIterator.next();
                    RecordReader<K, V> createRecordReader = this.format.createRecordReader(next, this.attemptContext);
                    if (this.currentReader != null) {
                        this.currentReader.close();
                    }
                    this.currentReader = createRecordReader;
                    this.currentReader.initialize(next, this.attemptContext);
                    if (this.currentReader.nextKeyValue()) {
                        this.currentPair = nextPair();
                        return true;
                    }
                    this.currentReader.close();
                    this.currentReader = null;
                }
                this.currentPair = null;
                return false;
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                throw new IOException(e);
            }
        }

        public T getCurrent() throws NoSuchElementException {
            if (this.currentPair == null) {
                throw new NoSuchElementException();
            }
            return (T) ((HDFSFileSource) this.source).inputConverter.apply(this.currentPair);
        }

        public void close() throws IOException {
            if (this.currentReader != null) {
                this.currentReader.close();
                this.currentReader = null;
            }
            this.currentPair = null;
        }

        /* renamed from: getCurrentSource, reason: merged with bridge method [inline-methods] */
        public BoundedSource<T> m4getCurrentSource() {
            return this.source;
        }

        private KV<K, V> nextPair() throws IOException, InterruptedException {
            Object currentKey = this.currentReader.getCurrentKey();
            Object currentValue = this.currentReader.getCurrentValue();
            if (currentKey instanceof Writable) {
                currentKey = WritableUtils.clone((Writable) currentKey, this.conf);
            }
            if (currentValue instanceof Writable) {
                currentValue = WritableUtils.clone((Writable) currentValue, this.conf);
            }
            return KV.of(currentKey, currentValue);
        }

        public Double getFractionConsumed() {
            int previousIndex;
            int size;
            if (this.currentReader == null) {
                return Double.valueOf(0.0d);
            }
            if (!this.splits.isEmpty() && (previousIndex = this.splitsIterator.previousIndex()) != (size = this.splits.size())) {
                double d = (1.0d * previousIndex) / size;
                double d2 = (1.0d * (previousIndex + 1)) / size;
                Double progress = getProgress();
                return progress == null ? Double.valueOf(d) : Double.valueOf(d + (progress.doubleValue() * (d2 - d)));
            }
            return Double.valueOf(1.0d);
        }

        private Double getProgress() {
            try {
                return Double.valueOf(this.currentReader.getProgress());
            } catch (IOException | InterruptedException e) {
                return null;
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/google/cloud/dataflow/sdk/io/hdfs/HDFSFileSource$SerializableSplit.class */
    public static class SerializableSplit implements Externalizable {
        private static final long serialVersionUID = 0;
        private InputSplit split;

        public SerializableSplit() {
        }

        public SerializableSplit(InputSplit inputSplit) {
            Preconditions.checkArgument(inputSplit instanceof Writable, "Split is not writable: %s", new Object[]{inputSplit});
            this.split = inputSplit;
        }

        public InputSplit getSplit() {
            return this.split;
        }

        @Override // java.io.Externalizable
        public void writeExternal(ObjectOutput objectOutput) throws IOException {
            objectOutput.writeUTF(this.split.getClass().getCanonicalName());
            this.split.write(objectOutput);
        }

        @Override // java.io.Externalizable
        public void readExternal(ObjectInput objectInput) throws IOException, ClassNotFoundException {
            try {
                this.split = (InputSplit) Class.forName(objectInput.readUTF()).newInstance();
                this.split.readFields(objectInput);
            } catch (IllegalAccessException | InstantiationException e) {
                throw new IOException(e);
            }
        }
    }

    private HDFSFileSource(String str, Class<? extends FileInputFormat<?, ?>> cls, Coder<T> coder, SerializableFunction<KV<K, V>, T> serializableFunction, SerializableConfiguration serializableConfiguration, SerializableSplit serializableSplit, boolean z) {
        this.filepattern = str;
        this.formatClass = castClass(cls);
        this.coder = coder;
        this.inputConverter = serializableFunction;
        this.serializableConfiguration = serializableConfiguration;
        this.serializableSplit = serializableSplit;
        this.validate = z;
    }

    public static <T, K, V, F extends FileInputFormat<K, V>> HDFSFileSource<T, K, V> from(String str, Class<F> cls, Coder<T> coder, SerializableFunction<KV<K, V>, T> serializableFunction) {
        return new HDFSFileSource<>(str, cls, coder, serializableFunction, null, null, true);
    }

    public static <K, V, F extends FileInputFormat<K, V>> HDFSFileSource<KV<K, V>, K, V> from(String str, Class<F> cls, Class<K> cls2, Class<V> cls3) {
        return new HDFSFileSource<>(str, cls, KvCoder.of(getDefaultCoder(cls2), getDefaultCoder(cls3)), new SerializableFunction<KV<K, V>, KV<K, V>>() { // from class: com.google.cloud.dataflow.sdk.io.hdfs.HDFSFileSource.1
            public KV<K, V> apply(KV<K, V> kv) {
                return kv;
            }
        }, null, null, true);
    }

    public static HDFSFileSource<String, LongWritable, Text> fromText(String str) {
        return from(str, TextInputFormat.class, (Coder) StringUtf8Coder.of(), (SerializableFunction) new SerializableFunction<KV<LongWritable, Text>, String>() { // from class: com.google.cloud.dataflow.sdk.io.hdfs.HDFSFileSource.2
            public String apply(KV<LongWritable, Text> kv) {
                return ((Text) kv.getValue()).toString();
            }
        });
    }

    public static <T> HDFSFileSource<T, AvroKey<T>, NullWritable> fromAvro(String str, final AvroCoder<T> avroCoder) {
        Class castClass = castClass(AvroKeyInputFormat.class);
        SerializableFunction<KV<AvroKey<T>, NullWritable>, T> serializableFunction = new SerializableFunction<KV<AvroKey<T>, NullWritable>, T>() { // from class: com.google.cloud.dataflow.sdk.io.hdfs.HDFSFileSource.3
            public T apply(KV<AvroKey<T>, NullWritable> kv) {
                try {
                    return (T) CoderUtils.clone(avroCoder, ((AvroKey) kv.getKey()).datum());
                } catch (CoderException e) {
                    throw new RuntimeException((Throwable) e);
                }
            }
        };
        Configuration configuration = new Configuration();
        configuration.set("avro.schema.input.key", avroCoder.getSchema().toString());
        return from(str, castClass, (Coder) avroCoder, (SerializableFunction) serializableFunction).withConfiguration(configuration);
    }

    public static HDFSFileSource<GenericRecord, AvroKey<GenericRecord>, NullWritable> fromAvro(String str, Schema schema) {
        return fromAvro(str, AvroCoder.of(schema));
    }

    public static <T> HDFSFileSource<T, AvroKey<T>, NullWritable> fromAvro(String str, Class<T> cls) {
        return fromAvro(str, AvroCoder.of(cls));
    }

    public HDFSFileSource<T, K, V> withCoder(Coder<T> coder) {
        return new HDFSFileSource<>(this.filepattern, this.formatClass, coder, this.inputConverter, this.serializableConfiguration, this.serializableSplit, this.validate);
    }

    public HDFSFileSource<T, K, V> withInputConverter(SerializableFunction<KV<K, V>, T> serializableFunction) {
        return new HDFSFileSource<>(this.filepattern, this.formatClass, this.coder, serializableFunction, this.serializableConfiguration, this.serializableSplit, this.validate);
    }

    public HDFSFileSource<T, K, V> withConfiguration(Configuration configuration) {
        return new HDFSFileSource<>(this.filepattern, this.formatClass, this.coder, this.inputConverter, new SerializableConfiguration(configuration), this.serializableSplit, this.validate);
    }

    public HDFSFileSource<T, K, V> withInputSplit(InputSplit inputSplit) {
        return new HDFSFileSource<>(this.filepattern, this.formatClass, this.coder, this.inputConverter, this.serializableConfiguration, new SerializableSplit(inputSplit), this.validate);
    }

    public HDFSFileSource<T, K, V> withoutValidation() {
        return new HDFSFileSource<>(this.filepattern, this.formatClass, this.coder, this.inputConverter, this.serializableConfiguration, this.serializableSplit, false);
    }

    public List<? extends BoundedSource<T>> splitIntoBundles(long j, PipelineOptions pipelineOptions) throws Exception {
        return this.serializableSplit == null ? Lists.transform(computeSplits(j), new Function<InputSplit, BoundedSource<T>>() { // from class: com.google.cloud.dataflow.sdk.io.hdfs.HDFSFileSource.4
            public BoundedSource<T> apply(@Nullable InputSplit inputSplit) {
                return new HDFSFileSource(HDFSFileSource.this.filepattern, HDFSFileSource.this.formatClass, HDFSFileSource.this.coder, HDFSFileSource.this.inputConverter, HDFSFileSource.this.serializableConfiguration, new SerializableSplit(inputSplit), HDFSFileSource.this.validate);
            }
        }) : ImmutableList.of(this);
    }

    public long getEstimatedSizeBytes(PipelineOptions pipelineOptions) throws Exception {
        long j = 0;
        Job job = Job.getInstance();
        Iterator<FileStatus> it = listStatus(createFormat(job), job).iterator();
        while (it.hasNext()) {
            j += it.next().getLen();
        }
        return j;
    }

    public boolean producesSortedKeys(PipelineOptions pipelineOptions) throws Exception {
        return false;
    }

    public BoundedSource.BoundedReader<T> createReader(PipelineOptions pipelineOptions) throws IOException {
        validate();
        return new HDFSFileReader(this, this.filepattern, this.formatClass, this.serializableSplit);
    }

    public void validate() {
        if (this.validate) {
            try {
                FileStatus[] globStatus = FileSystem.get(new URI(this.filepattern), Job.getInstance().getConfiguration()).globStatus(new Path(this.filepattern));
                Preconditions.checkState(globStatus != null && globStatus.length > 0, "Unable to find any files matching %s", new Object[]{this.filepattern});
            } catch (IOException | URISyntaxException e) {
                throw new RuntimeException(e);
            }
        }
    }

    public Coder<T> getDefaultOutputCoder() {
        return this.coder;
    }

    private List<InputSplit> computeSplits(long j) throws IOException, IllegalAccessException, InstantiationException {
        Job job = Job.getInstance();
        FileInputFormat.setMinInputSplitSize(job, j);
        FileInputFormat.setMaxInputSplitSize(job, j);
        return createFormat(job).getSplits(job);
    }

    private FileInputFormat<K, V> createFormat(Job job) throws IOException, IllegalAccessException, InstantiationException {
        FileInputFormat.addInputPath(job, new Path(this.filepattern));
        return this.formatClass.newInstance();
    }

    private List<FileStatus> listStatus(FileInputFormat<K, V> fileInputFormat, Job job) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
        Method declaredMethod = FileInputFormat.class.getDeclaredMethod("listStatus", JobContext.class);
        declaredMethod.setAccessible(true);
        return (List) declaredMethod.invoke(fileInputFormat, job);
    }

    private static <T> Coder<T> getDefaultCoder(Class<T> cls) {
        if (Writable.class.isAssignableFrom(cls)) {
            return WritableCoder.of(cls);
        }
        if (Void.class.equals(cls)) {
            return VoidCoder.of();
        }
        throw new IllegalStateException("Cannot find coder for " + cls);
    }

    /* JADX WARN: Multi-variable type inference failed */
    private static <T> Class<T> castClass(Class<?> cls) {
        return cls;
    }
}
