package ai.platon.pulsar.boilerpipe.filters.simple;

import ai.platon.pulsar.boilerpipe.document.TextBlock;
import ai.platon.pulsar.boilerpipe.document.TextDocument;
import ai.platon.pulsar.boilerpipe.filters.TextBlockFilter;
import ai.platon.pulsar.boilerpipe.utils.ProcessingException;
import ai.platon.pulsar.boilerpipe.utils.ScentUtils;
import com.google.common.collect.ListMultimap;
import java.util.Map;
import java.util.Objects;

/* loaded from: input_file:ai/platon/pulsar/boilerpipe/filters/simple/RegexFieldExtractorFilter.class */
public final class RegexFieldExtractorFilter implements TextBlockFilter {
    private int keyGroup = 1;
    private int valueGroup = 2;
    private final int maxTextLength;
    private final ListMultimap<String, String> regexFieldRules;

    public RegexFieldExtractorFilter(ListMultimap<String, String> listMultimap, int i) {
        this.regexFieldRules = listMultimap;
        this.maxTextLength = i;
    }

    @Override // ai.platon.pulsar.boilerpipe.filters.TextBlockFilter
    public boolean process(TextDocument textDocument) throws ProcessingException {
        boolean z = false;
        for (TextBlock textBlock : textDocument.getTextBlocks()) {
            if (textBlock.isContent()) {
                String text = textBlock.getText();
                z = text.length() < this.maxTextLength ? tryExtractText(textDocument, text) : tryExtractText(textDocument, text.substring(0, this.maxTextLength)) | tryExtractText(textDocument, text.substring(text.length() - this.maxTextLength, text.length()));
            }
        }
        return z;
    }

    private boolean tryExtractText(TextDocument textDocument, String str) {
        Map<String, String> extract = ScentUtils.extract(str, this.regexFieldRules, this.keyGroup, this.valueGroup);
        Objects.requireNonNull(textDocument);
        extract.forEach(textDocument::setField);
        return !extract.isEmpty();
    }
}
