package ai.platon.pulsar.boilerpipe.filters.heuristics;

import ai.platon.pulsar.boilerpipe.document.BlockLabels;
import ai.platon.pulsar.boilerpipe.document.TextBlock;
import ai.platon.pulsar.boilerpipe.document.TextDocument;
import ai.platon.pulsar.boilerpipe.filters.TextBlockFilter;
import ai.platon.pulsar.boilerpipe.utils.BoiConstants;
import ai.platon.pulsar.boilerpipe.utils.ProcessingException;
import ai.platon.pulsar.common.DateTimeDetector;
import java.time.Duration;
import java.time.Instant;
import java.time.OffsetDateTime;
import java.time.ZoneId;
import java.util.regex.Pattern;

/* loaded from: input_file:ai/platon/pulsar/boilerpipe/filters/heuristics/ArticleMetadataFilter.class */
public class ArticleMetadataFilter implements TextBlockFilter {
    public static final ArticleMetadataFilter INSTANCE = new ArticleMetadataFilter();
    private DateTimeDetector dateTimeDetector = new DateTimeDetector();

    public ArticleMetadataFilter() {
    }

    public ArticleMetadataFilter(ZoneId zoneId) {
        this.dateTimeDetector.setZoneId(zoneId);
    }

    @Override // ai.platon.pulsar.boilerpipe.filters.TextBlockFilter
    public boolean process(TextDocument textDocument) throws ProcessingException {
        Instant now = Instant.now();
        Instant instant = null;
        int i = 0;
        boolean z = false;
        boolean z2 = false;
        for (TextBlock textBlock : textDocument.getTextBlocks()) {
            String text = textBlock.getText();
            if (text.length() > 200) {
                z2 = true;
            }
            Instant sniffValidDateTime = sniffValidDateTime(text, now);
            if (sniffValidDateTime.isAfter(textDocument.getModifiedTime())) {
                i++;
                textDocument.setModifiedTime(sniffValidDateTime);
            }
            if (!z2 && text.length() > 15 && instant == null && sniffValidDateTime.isAfter(textDocument.getPublishTime())) {
                instant = sniffValidDateTime;
                textDocument.setPublishTime(instant);
                textBlock.setIsContent(true);
                textBlock.addLabel(BlockLabels.ARTICLE_METADATA);
                z = true;
            }
            if (textBlock.getNumWords() < 10) {
                for (Pattern pattern : BoiConstants.PATTERNS_SHORT) {
                    if (pattern.matcher(text).find()) {
                        z = true;
                        textBlock.setIsContent(true);
                        textBlock.addLabel(BlockLabels.ARTICLE_METADATA);
                    }
                }
            }
        }
        textDocument.setDateTimeCount(i);
        return z;
    }

    private Instant sniffValidDateTime(String str, Instant instant) {
        OffsetDateTime detectDateTimeLeniently = this.dateTimeDetector.detectDateTimeLeniently(str);
        if (detectDateTimeLeniently != null) {
            Instant instant2 = detectDateTimeLeniently.toInstant();
            if (Duration.between(instant2, instant).toDays() < 1825) {
                return instant2;
            }
        }
        return Instant.EPOCH;
    }
}
