package com.yahoo.language.process;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;

/* loaded from: input_file:com/yahoo/language/process/GramSplitter.class */
public class GramSplitter {
    private final CharacterClasses characterClasses;

    /* loaded from: input_file:com/yahoo/language/process/GramSplitter$Gram.class */
    public static final class Gram {
        private final int start;
        private final int codePointCount;

        public Gram(int i, int i2) {
            this.start = i;
            this.codePointCount = i2;
        }

        public int getStart() {
            return this.start;
        }

        public int getCodePointCount() {
            return this.codePointCount;
        }

        public String extractFrom(String str) {
            return extractFrom(new UnicodeString(str));
        }

        public String extractFrom(UnicodeString unicodeString) {
            return unicodeString.substring(this.start, this.codePointCount).toString();
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (!(obj instanceof Gram)) {
                return false;
            }
            Gram gram = (Gram) obj;
            return this.codePointCount == gram.codePointCount && this.start == gram.start;
        }

        public int hashCode() {
            return (31 * this.start) + this.codePointCount;
        }
    }

    /* loaded from: input_file:com/yahoo/language/process/GramSplitter$GramSplitterIterator.class */
    public static class GramSplitterIterator implements Iterator<Gram> {
        private final CharacterClasses characterClasses;
        private final UnicodeString input;
        private final int n;
        private int i = 0;
        private boolean isFirstAfterSeparator = true;
        private Gram nextGram = null;

        public GramSplitterIterator(String str, int i, CharacterClasses characterClasses) {
            this.input = new UnicodeString(str);
            this.n = i;
            this.characterClasses = characterClasses;
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            if (this.nextGram != null) {
                return true;
            }
            this.nextGram = findNext();
            return this.nextGram != null;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public Gram next() {
            Gram gram = this.nextGram;
            if (gram == null) {
                gram = findNext();
            }
            if (gram == null) {
                throw new NoSuchElementException("No next gram at position " + this.i);
            }
            this.nextGram = null;
            return gram;
        }

        private Gram findNext() {
            while (this.i < this.input.length() && !this.characterClasses.isLetterOrDigit(this.input.codePointAt(this.i))) {
                this.i = this.input.next(this.i);
                this.isFirstAfterSeparator = true;
            }
            if (this.i >= this.input.length()) {
                return null;
            }
            UnicodeString substring = this.input.substring(this.i, this.n);
            int indexOfNonWordCodepoint = indexOfNonWordCodepoint(substring);
            if (indexOfNonWordCodepoint == 0) {
                throw new RuntimeException("Programming error");
            }
            if (indexOfNonWordCodepoint > 0) {
                substring = new UnicodeString(substring.toString().substring(0, indexOfNonWordCodepoint));
            }
            if (substring.codePointCount() == this.n) {
                Gram gram = new Gram(this.i, substring.codePointCount());
                this.i = this.input.next(this.i);
                this.isFirstAfterSeparator = false;
                return gram;
            }
            if (!this.isFirstAfterSeparator) {
                this.i = this.input.skip(substring.codePointCount() + 1, this.i);
                this.isFirstAfterSeparator = true;
                return findNext();
            }
            Gram gram2 = new Gram(this.i, substring.codePointCount());
            this.i = this.input.next(this.i);
            this.isFirstAfterSeparator = false;
            return gram2;
        }

        private int indexOfNonWordCodepoint(UnicodeString unicodeString) {
            int i = 0;
            while (true) {
                int i2 = i;
                if (i2 >= unicodeString.length()) {
                    return -1;
                }
                if (!this.characterClasses.isLetterOrDigit(unicodeString.codePointAt(i2))) {
                    return i2;
                }
                i = unicodeString.next(i2);
            }
        }

        @Override // java.util.Iterator
        public void remove() {
            throw new UnsupportedOperationException("This iterator is read only");
        }

        public List<String> toExtractedList() {
            ArrayList arrayList = new ArrayList();
            while (hasNext()) {
                arrayList.add(next().extractFrom(this.input));
            }
            return Collections.unmodifiableList(arrayList);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/yahoo/language/process/GramSplitter$UnicodeString.class */
    public static class UnicodeString {
        private final String s;

        public UnicodeString(String str) {
            this.s = str;
        }

        public UnicodeString substring(int i, int i2) {
            return new UnicodeString(this.s.substring(i, this.s.offsetByCodePoints(i, i2 * 2 <= this.s.length() - i ? i2 : Math.min(i2, this.s.codePointCount(i, this.s.length())))));
        }

        public int skip(int i, int i2) {
            int i3 = i2;
            for (int i4 = 0; i4 < i; i4++) {
                i3 = next(i3);
                if (i3 > this.s.length()) {
                    break;
                }
            }
            return i3;
        }

        public int next(int i) {
            int i2 = i + 1;
            if (i2 < this.s.length() && Character.isLowSurrogate(this.s.charAt(i2))) {
                i2++;
            }
            return i2;
        }

        public int length() {
            return this.s.length();
        }

        public int codePointCount() {
            return this.s.codePointCount(0, this.s.length());
        }

        public int codePointAt(int i) {
            return this.s.codePointAt(i);
        }

        public String toString() {
            return this.s;
        }
    }

    public GramSplitter(CharacterClasses characterClasses) {
        this.characterClasses = characterClasses;
    }

    public GramSplitterIterator split(String str, int i) {
        if (str == null) {
            throw new NullPointerException("input cannot be null");
        }
        if (i < 1) {
            throw new IllegalArgumentException("n (gram size) cannot be smaller than 1, was " + i);
        }
        return new GramSplitterIterator(str, i, this.characterClasses);
    }
}
