package ws.palladian.extraction.location.scope.evaluation;

import java.io.File;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang3.Validate;
import ws.palladian.extraction.location.ImmutableLocation;
import ws.palladian.extraction.location.LocationBuilder;
import ws.palladian.extraction.location.LocationType;
import ws.palladian.extraction.location.evaluation.LocationDocument;
import ws.palladian.helper.ProgressMonitor;
import ws.palladian.helper.ProgressReporter;
import ws.palladian.helper.collection.AbstractIterator;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.geo.GeoCoordinate;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.io.LineIterator;
import ws.palladian.helper.nlp.StringHelper;

/* loaded from: input_file:ws/palladian/extraction/location/scope/evaluation/GeoTextDatasetReader.class */
public final class GeoTextDatasetReader implements Iterable<LocationDocument> {
    private final File fullTextFile;
    private final int numTotalEntries;
    private final SubSet subSet;
    private final Combination combination;

    /* loaded from: input_file:ws/palladian/extraction/location/scope/evaluation/GeoTextDatasetReader$Combination.class */
    public enum Combination {
        SINGLE,
        USER
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:ws/palladian/extraction/location/scope/evaluation/GeoTextDatasetReader$CombininingIterator.class */
    public static final class CombininingIterator extends AbstractIterator<LocationDocument> {
        private final Iterator<LocationDocument> wrapped;
        private StringBuilder buffer = new StringBuilder();
        private String userName;
        private GeoCoordinate coordinate;

        CombininingIterator(Iterator<LocationDocument> it) {
            this.wrapped = it;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        /* renamed from: getNext, reason: merged with bridge method [inline-methods] */
        public LocationDocument m25getNext() throws AbstractIterator.Finished {
            while (this.wrapped.hasNext()) {
                LocationDocument next = this.wrapped.next();
                LocationDocument locationDocument = null;
                String str = next.getFileName().split("#")[0];
                if (this.userName != null && !str.equals(this.userName)) {
                    locationDocument = createAndClear();
                }
                if (this.coordinate == null) {
                    this.coordinate = next.getMainLocation().getCoordinate();
                    this.userName = str;
                }
                this.buffer.append(next.getText()).append('\n');
                if (locationDocument != null) {
                    return locationDocument;
                }
            }
            if (this.buffer.toString().length() > 0) {
                return createAndClear();
            }
            throw FINISHED;
        }

        private LocationDocument createAndClear() {
            try {
                LocationBuilder locationBuilder = new LocationBuilder();
                locationBuilder.setPrimaryName("undetermined");
                locationBuilder.setCoordinate(this.coordinate);
                return new LocationDocument(this.userName, this.buffer.toString().trim(), (List) null, locationBuilder.create());
            } finally {
                this.buffer = new StringBuilder();
                this.userName = null;
                this.coordinate = null;
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:ws/palladian/extraction/location/scope/evaluation/GeoTextDatasetReader$DatasetIterator.class */
    public static final class DatasetIterator extends AbstractIterator<LocationDocument> {
        final LineIterator lineIterator;
        final ProgressReporter progress = new ProgressMonitor();
        final SubSet subSet;

        DatasetIterator(File file, int i, SubSet subSet) {
            String str;
            this.lineIterator = new LineIterator(file);
            str = "Reading GeoText";
            this.progress.startTask(subSet != null ? str + " " + subSet : "Reading GeoText", i);
            this.subSet = subSet;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        /* renamed from: getNext, reason: merged with bridge method [inline-methods] */
        public LocationDocument m26getNext() throws AbstractIterator.Finished {
            while (this.lineIterator.hasNext()) {
                this.progress.increment();
                String str = (String) this.lineIterator.next();
                String[] split = str.split("\\t");
                if (split.length != 6) {
                    throw new IllegalStateException("Illegal format: '" + str + "', expected 6 columns, got " + split.length + ".");
                }
                String str2 = split[0];
                int fold = getFold(Long.parseLong(str2.replace("USER_", ""), 16));
                if (this.subSet == null || this.subSet.contains(fold)) {
                    double parseDouble = Double.parseDouble(split[3]);
                    double parseDouble2 = Double.parseDouble(split[4]);
                    String str3 = split[5];
                    return new LocationDocument(str2 + "#" + StringHelper.sha1(str3), str3, (List) null, new ImmutableLocation(-1, "undetermined", LocationType.UNDETERMINED, GeoCoordinate.from(parseDouble, parseDouble2), (Long) null));
                }
            }
            throw FINISHED;
        }

        private static int getFold(long j) {
            long j2 = j % 5;
            if (j2 == 0) {
                return 5;
            }
            return (int) j2;
        }
    }

    /* loaded from: input_file:ws/palladian/extraction/location/scope/evaluation/GeoTextDatasetReader$SubSet.class */
    public enum SubSet {
        TRAIN(1, 2, 3),
        DEV(4),
        TEST(5);

        int[] folds;

        SubSet(int... iArr) {
            this.folds = iArr;
        }

        public boolean contains(int i) {
            for (int i2 : this.folds) {
                if (i2 == i) {
                    return true;
                }
            }
            return false;
        }
    }

    public GeoTextDatasetReader(File file, SubSet subSet, Combination combination) {
        Validate.notNull(file, "fullTextFile must not be null", new Object[0]);
        Validate.notNull(combination, "combination must not be null", new Object[0]);
        this.fullTextFile = file;
        this.numTotalEntries = FileHelper.getNumberOfLines(file);
        this.subSet = subSet;
        this.combination = combination;
    }

    @Override // java.lang.Iterable
    public Iterator<LocationDocument> iterator() {
        AbstractIterator datasetIterator = new DatasetIterator(this.fullTextFile, this.numTotalEntries, this.subSet);
        AbstractIterator abstractIterator = datasetIterator;
        if (this.combination == Combination.USER) {
            abstractIterator = new CombininingIterator(datasetIterator);
        }
        return abstractIterator;
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("GeoTextDatasetScopeIterator [fullTextFile=").append(this.fullTextFile);
        if (this.subSet != null) {
            sb.append(", subSet=").append(this.subSet);
        }
        sb.append("]");
        return sb.toString();
    }

    public static void main(String[] strArr) {
        Iterator<LocationDocument> it = new GeoTextDatasetReader(new File("/Users/pk/Desktop/GeoText.2010-10-12/full_text.txt"), null, Combination.USER).iterator();
        System.out.println(CollectionHelper.count(it));
        System.exit(0);
        while (it.hasNext()) {
            System.out.println(it.next());
        }
    }
}
