/*
 * Decompiled with CFR 0.152.
 */
package net.sf.regain.crawler;

import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashSet;
import net.sf.regain.RegainException;
import net.sf.regain.RegainToolkit;
import net.sf.regain.crawler.config.StartUrl;
import net.sf.regain.crawler.config.UrlMatcher;
import net.sf.regain.crawler.config.UrlMatcherResult;
import net.sf.regain.crawler.config.WhiteListEntry;
import org.apache.log4j.Logger;

public class UrlChecker {
    private static Logger mLog = Logger.getLogger(UrlChecker.class);
    private HashSet<String> mAcceptedUrlSet = new HashSet();
    private HashSet<String> mIgnoredUrlSet = new HashSet();
    private int mIgnoredCount = 0;
    private WhiteListEntry[] mWhiteListEntryArr;
    private UrlMatcher[] mBlackListArr;

    public UrlChecker(WhiteListEntry[] whiteList, UrlMatcher[] blackList) {
        this.mWhiteListEntryArr = whiteList;
        this.mBlackListArr = blackList;
    }

    public StartUrl[] normalizeStartUrls(StartUrl[] urlArr) {
        boolean foundPrefix = false;
        block0: for (int i = 0; i < urlArr.length; ++i) {
            String currUrl = urlArr[i].getUrl();
            if (!currUrl.startsWith("file://")) continue;
            for (int j = 0; j < urlArr.length; ++j) {
                if (i == j || urlArr[j] == null || !currUrl.startsWith(urlArr[j].getUrl())) continue;
                mLog.info("Ignoring start URL '" + currUrl + "', because it is " + "covered by start URL '" + urlArr[j].getUrl() + "'");
                urlArr[i] = null;
                foundPrefix = true;
                continue block0;
            }
        }
        if (foundPrefix) {
            ArrayList<StartUrl> list = new ArrayList<StartUrl>(urlArr.length);
            for (int i = 0; i < urlArr.length; ++i) {
                if (urlArr[i] == null) continue;
                list.add(urlArr[i]);
            }
            urlArr = new StartUrl[list.size()];
            list.toArray(urlArr);
        }
        return urlArr;
    }

    public boolean hasNoCycles(String url, int maxCycles) {
        String mPath = "";
        boolean mResult = true;
        try {
            URL mUrl = new URL(url);
            mPath = mUrl.getPath();
        }
        catch (MalformedURLException ex) {
            return mResult;
        }
        if (mPath.length() < 2) {
            return mResult;
        }
        String[] mParts = RegainToolkit.splitString(mPath, "/");
        HashSet<String> uniqueParts = new HashSet<String>();
        for (int i = 0; i < mParts.length; ++i) {
            if (mLog.isDebugEnabled()) {
                mLog.debug("Add part: '" + mParts[i] + "'");
            }
            uniqueParts.add(mParts[i]);
        }
        if (mLog.isDebugEnabled()) {
            mLog.debug("uniqueParts.size(): " + uniqueParts.size());
            mLog.debug("mParts.length: " + mParts.length);
            mLog.debug("maxCycles: " + maxCycles);
        }
        if (uniqueParts.size() != mParts.length && uniqueParts.size() <= mParts.length - maxCycles) {
            mResult = false;
        }
        return mResult;
    }

    public UrlMatcher isUrlAccepted(String url) {
        int i;
        UrlMatcherResult urlMatchResult = new UrlMatcherResult(false, false);
        mLog.debug("isUrlAccepted for url: " + url);
        for (i = 0; i < this.mWhiteListEntryArr.length; ++i) {
            UrlMatcher matcher;
            if (!this.mWhiteListEntryArr[i].shouldBeUpdated() || !(matcher = this.mWhiteListEntryArr[i].getUrlMatcher()).matches(url)) continue;
            urlMatchResult.setShouldBeParsed(matcher.getShouldBeParsed());
            urlMatchResult.setShouldBeIndexed(matcher.getShouldBeIndexed());
            mLog.debug("Whitelist matches for url: " + url);
            break;
        }
        if (urlMatchResult.getShouldBeParsed() || urlMatchResult.getShouldBeIndexed()) {
            for (i = 0; i < this.mBlackListArr.length; ++i) {
                if (!this.mBlackListArr[i].matches(url)) continue;
                urlMatchResult.setShouldBeParsed(false);
                urlMatchResult.setShouldBeIndexed(false);
                mLog.debug("Blacklist matches for url: " + url);
            }
        }
        return urlMatchResult;
    }

    public UrlMatcher[] createPreserveUrlMatcherArr() {
        ArrayList<UrlMatcher> list = new ArrayList<UrlMatcher>();
        for (int i = 0; i < this.mWhiteListEntryArr.length; ++i) {
            if (this.mWhiteListEntryArr[i].shouldBeUpdated()) continue;
            list.add(this.mWhiteListEntryArr[i].getUrlMatcher());
        }
        UrlMatcher[] asArr = new UrlMatcher[list.size()];
        list.toArray(asArr);
        return asArr;
    }

    public boolean wasAlreadyAccepted(String url) {
        if (url.startsWith("file://")) {
            return false;
        }
        return this.getmAcceptedUrlSet().contains(url);
    }

    public boolean wasAlreadyIgnored(String url) {
        if (url.startsWith("file://")) {
            return false;
        }
        return this.mIgnoredUrlSet.contains(url);
    }

    public boolean shouldBeKeptInIndex(String url) throws RegainException {
        if (url.startsWith("file://")) {
            UrlMatcher urlMatch = this.isUrlAccepted(url);
            if (!urlMatch.getShouldBeIndexed()) {
                return false;
            }
            File file = RegainToolkit.urlToFile(url);
            return file.exists();
        }
        return this.getmAcceptedUrlSet().contains(url);
    }

    public void setAccepted(String url) {
        if (!url.startsWith("file://")) {
            this.getmAcceptedUrlSet().add(url);
        }
    }

    public void setIgnored(String url) {
        ++this.mIgnoredCount;
        if (!url.startsWith("file://")) {
            this.mIgnoredUrlSet.add(url);
        }
    }

    public int getIgnoredCount() {
        return this.mIgnoredCount;
    }

    public HashSet<String> getmAcceptedUrlSet() {
        return this.mAcceptedUrlSet;
    }
}

