package com.xuxueli.crawler;

import com.xuxueli.crawler.conf.XxlCrawlerConf;
import com.xuxueli.crawler.parser.PageParser;
import com.xuxueli.crawler.thread.CrawlerThread;
import com.xuxueli.crawler.util.RegexUtil;
import com.xuxueli.crawler.util.UrlUtil;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/xuxueli/crawler/XxlCrawler.class */
public class XxlCrawler {
    private static Logger logger = LoggerFactory.getLogger(XxlCrawler.class);
    private Set<String> whiteUrlRegexs;
    private volatile Map<String, String> paramMap;
    private volatile Map<String, String> cookieMap;
    private PageParser pageParser;
    private volatile LinkedBlockingQueue<String> unVisitedUrlQueue = new LinkedBlockingQueue<>();
    private volatile Set<String> visitedUrlSet = Collections.synchronizedSet(new HashSet());
    private volatile boolean allowSpread = true;
    private volatile boolean ifPost = false;
    private volatile String userAgent = XxlCrawlerConf.USER_AGENT_SAMPLE;
    private volatile int timeoutMillis = XxlCrawlerConf.TIMEOUT_MILLIS_DEFAULT;
    private volatile int pauseMillis = 0;
    private int threadCount = 1;
    private ExecutorService crawlers = Executors.newCachedThreadPool();
    private List<CrawlerThread> crawlerThreads = new CopyOnWriteArrayList();

    /* loaded from: input_file:com/xuxueli/crawler/XxlCrawler$Builder.class */
    public static class Builder {
        private XxlCrawler crawler = new XxlCrawler();

        public Builder setUrls(Set<String> set) {
            if (set != null && set.size() > 0) {
                Iterator<String> it = set.iterator();
                while (it.hasNext()) {
                    this.crawler.addUrl(it.next());
                }
            }
            return this;
        }

        public Builder setAllowSpread(boolean z) {
            this.crawler.allowSpread = z;
            return this;
        }

        public Builder setWhiteUrlRegexs(Set<String> set) {
            this.crawler.whiteUrlRegexs = set;
            return this;
        }

        private Builder setIfPost(boolean z) {
            this.crawler.ifPost = z;
            return this;
        }

        private Builder setUserAgent(String str) {
            this.crawler.userAgent = str;
            return this;
        }

        private Builder setParamMap(Map<String, String> map) {
            this.crawler.paramMap = map;
            return this;
        }

        private Builder setCookieMap(Map<String, String> map) {
            this.crawler.cookieMap = map;
            return this;
        }

        public Builder setTimeoutMillis(int i) {
            this.crawler.timeoutMillis = i;
            return this;
        }

        public Builder setPauseMillis(int i) {
            this.crawler.pauseMillis = i;
            return this;
        }

        public Builder setThreadCount(int i) {
            this.crawler.threadCount = i;
            return this;
        }

        public Builder setPageParser(PageParser pageParser) {
            this.crawler.pageParser = pageParser;
            return this;
        }

        public XxlCrawler build() {
            return this.crawler;
        }
    }

    public boolean getIfPost() {
        return this.ifPost;
    }

    public boolean getAllowSpread() {
        return this.allowSpread;
    }

    public String getUserAgent() {
        return this.userAgent;
    }

    public Map<String, String> getParamMap() {
        return this.paramMap;
    }

    public Map<String, String> getCookieMap() {
        return this.cookieMap;
    }

    public PageParser getPageParser() {
        return this.pageParser;
    }

    public int getTimeoutMillis() {
        return this.timeoutMillis;
    }

    public int getPauseMillis() {
        return this.pauseMillis;
    }

    public boolean validWhiteUrl(String str) {
        if (!UrlUtil.isUrl(str)) {
            return false;
        }
        if (this.whiteUrlRegexs == null || this.whiteUrlRegexs.size() <= 0) {
            return true;
        }
        boolean z = false;
        Iterator<String> it = this.whiteUrlRegexs.iterator();
        while (it.hasNext()) {
            if (RegexUtil.matches(it.next(), str)) {
                z = true;
            }
        }
        return z;
    }

    public boolean addUrl(String str) {
        if (!UrlUtil.isUrl(str)) {
            logger.debug(">>>>>>>>>>> xxl-crawler addUrl fail, link not valid: {}", str);
            return false;
        }
        if (this.visitedUrlSet.contains(str)) {
            logger.debug(">>>>>>>>>>> xxl-crawler addUrl fail, link repeate: {}", str);
            return false;
        }
        if (this.unVisitedUrlQueue.contains(str)) {
            logger.debug(">>>>>>>>>>> xxl-crawler addUrl fail, link visited: {}", str);
            return false;
        }
        this.unVisitedUrlQueue.add(str);
        logger.info(">>>>>>>>>>> xxl-crawler addUrl success, link: {}", str);
        return true;
    }

    public String takeUrl() throws InterruptedException {
        String take = this.unVisitedUrlQueue.take();
        if (take != null) {
            this.visitedUrlSet.add(take);
        }
        return take;
    }

    public void start(boolean z) {
        if (this.unVisitedUrlQueue.size() < 1) {
            throw new RuntimeException("xxl crawler indexUrl can not be empty.");
        }
        if (this.threadCount < 1 || this.threadCount > 1000) {
            throw new RuntimeException("xxl crawler threadCount invalid, threadCount : " + this.threadCount);
        }
        if (this.pageParser == null) {
            throw new RuntimeException("xxl crawler pageParser can not be null.");
        }
        logger.info(">>>>>>>>>>> xxl crawler start ...");
        for (int i = 0; i < this.threadCount; i++) {
            this.crawlerThreads.add(new CrawlerThread(this));
        }
        Iterator<CrawlerThread> it = this.crawlerThreads.iterator();
        while (it.hasNext()) {
            this.crawlers.execute(it.next());
        }
        this.crawlers.shutdown();
        if (z) {
            while (!this.crawlers.awaitTermination(5L, TimeUnit.SECONDS)) {
                try {
                    logger.info(">>>>>>>>>>> xxl crawler still running ...");
                } catch (InterruptedException e) {
                    logger.error(e.getMessage(), e);
                    return;
                }
            }
        }
    }

    public void tryFinish() {
        boolean z = false;
        Iterator<CrawlerThread> it = this.crawlerThreads.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            } else if (it.next().isRunning()) {
                z = true;
                break;
            }
        }
        if (this.unVisitedUrlQueue.size() == 0 && !z) {
            logger.info(">>>>>>>>>>> xxl crawler is finished.");
            stop();
        }
    }

    public void stop() {
        Iterator<CrawlerThread> it = this.crawlerThreads.iterator();
        while (it.hasNext()) {
            it.next().toStop();
        }
        this.crawlers.shutdownNow();
        logger.info(">>>>>>>>>>> xxl crawler stop.");
    }
}
