/*
 * Decompiled with CFR 0.152.
 */
package net.sf.regain.crawler;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.Authenticator;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.PasswordAuthentication;
import java.net.URL;
import java.net.URLConnection;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.regain.RegainException;
import net.sf.regain.RegainToolkit;
import net.sf.regain.crawler.HttpStreamException;
import net.sf.regain.crawler.RedirectException;
import net.sf.regain.crawler.access.AccountPasswordEntry;
import net.sf.regain.crawler.config.CrawlerConfig;
import net.sf.regain.util.io.HtmlEntities;
import org.apache.log4j.Logger;

public class CrawlerToolkit {
    private static Logger mLog = Logger.getLogger(CrawlerToolkit.class);
    private static Pattern urlPatternLeft = Pattern.compile("([\\w]*://[\\w\\.:\\d-]*[^/]).*");

    public static String createURLFromProps(String[] parts) {
        StringBuilder result = new StringBuilder(32);
        if (parts.length >= 4) {
            result.append(parts[0]).append("://");
            for (int i = 1; i < parts.length - 2; ++i) {
                result.append(parts[i]);
                if (i >= parts.length - 3) continue;
                result.append(".");
            }
            if (Pattern.matches("^\\d*$", parts[parts.length - 2])) {
                result.append(":");
            } else {
                result.append(".");
            }
            result.append(parts[parts.length - 2]).append("/");
        } else {
            mLog.error("This is not a valid authentication entry: " + Arrays.toString(parts));
        }
        return result.toString();
    }

    private static String toCommand(String[] commandArr) {
        StringBuffer buffer = new StringBuffer();
        for (int i = 0; i < commandArr.length; ++i) {
            if (i != 0) {
                buffer.append(" ");
            }
            buffer.append(commandArr[i]);
        }
        return buffer.toString();
    }

    public static String[] executeNativeCommand(String[] commandArr) throws RegainException {
        InputStream in = null;
        try {
            int exitCode;
            String line;
            long startTime = -1L;
            if (mLog.isDebugEnabled()) {
                startTime = System.currentTimeMillis();
            }
            Process proc = Runtime.getRuntime().exec(commandArr);
            in = proc.getInputStream();
            BufferedReader reader = new BufferedReader(new InputStreamReader(in));
            ArrayList<String> list = new ArrayList<String>();
            while ((line = reader.readLine()) != null) {
                if (mLog.isDebugEnabled()) {
                    mLog.debug("  Got line: '" + line + "'");
                }
                list.add(line);
            }
            try {
                exitCode = proc.waitFor();
            }
            catch (InterruptedException exc) {
                throw new RegainException("Waiting for termination of process failed: " + commandArr[0], exc);
            }
            if (mLog.isDebugEnabled()) {
                double duration = (double)(System.currentTimeMillis() - startTime) / 1000.0;
                NumberFormat format = NumberFormat.getInstance();
                format.setMinimumFractionDigits(2);
                format.setMaximumFractionDigits(2);
                mLog.debug("..." + CrawlerToolkit.toCommand(commandArr) + " finished (" + format.format(duration) + " secs)");
            }
            if (exitCode != 0) {
                throw new RegainException("Native command exited with exit code " + exitCode + ": '" + CrawlerToolkit.toCommand(commandArr) + "'");
            }
            String[] asArr = new String[list.size()];
            list.toArray(asArr);
            String[] stringArray = asArr;
            return stringArray;
        }
        catch (IOException exc) {
            throw new RegainException("Executing native command failed: '" + CrawlerToolkit.toCommand(commandArr) + "'", exc);
        }
        finally {
            if (in != null) {
                try {
                    in.close();
                }
                catch (IOException exc) {}
            }
        }
    }

    public static InputStream getHttpStream(URL url) throws RedirectException, HttpStreamException {
        URLConnection conn = null;
        try {
            String userPassword = CrawlerToolkit.extractCredentialsFromProtocolHostFragment(CrawlerToolkit.createURLWithoutPath(url.toExternalForm()));
            if (userPassword != null && userPassword.length() > 0) {
                final String[] token = userPassword.split(":");
                Authenticator.setDefault(new Authenticator(){

                    @Override
                    protected PasswordAuthentication getPasswordAuthentication() {
                        return new PasswordAuthentication(token[0], token[1].toCharArray());
                    }
                });
            }
            if ((conn = url.openConnection()) instanceof HttpURLConnection) {
                boolean redirect;
                HttpURLConnection hconn = (HttpURLConnection)conn;
                hconn.setInstanceFollowRedirects(false);
                String charset = RegainToolkit.getSystemDefaultEncoding() + ",utf-8,*";
                hconn.setRequestProperty("Accept-Charset", charset);
                int response = hconn.getResponseCode();
                boolean bl = redirect = response >= 300 && response <= 399;
                if (redirect) {
                    String loc = conn.getHeaderField("Location");
                    if (loc != null) {
                        String redirectUrl = loc.startsWith("http") ? new URL(loc).toString() : new URL(url, loc).toString();
                        throw new RedirectException("Redirect '" + url + "' -> '" + redirectUrl + "'", redirectUrl);
                    }
                    throw new IOException("Redirect did not provide a 'Location' header");
                }
            }
            return conn.getInputStream();
        }
        catch (RedirectException thr) {
            throw thr;
        }
        catch (Throwable thr) {
            throw HttpStreamException.createInstance("Could not get HTTP connection to " + url.toString(), thr, conn);
        }
    }

    public static byte[] loadHttpDocument(String url) throws RegainException {
        InputStream in = null;
        ByteArrayOutputStream out = null;
        try {
            in = CrawlerToolkit.getHttpStream(new URL(url));
            out = new ByteArrayOutputStream();
            RegainToolkit.pipe(in, out);
            out.close();
            byte[] byArray = out.toByteArray();
            return byArray;
        }
        catch (RedirectException exc) {
            throw exc;
        }
        catch (IOException exc) {
            throw new RegainException("Could not load Document with HTTP", exc);
        }
        finally {
            if (in != null) {
                try {
                    in.close();
                }
                catch (Exception exc) {}
            }
            if (out != null) {
                try {
                    out.close();
                }
                catch (Exception exc) {}
            }
        }
    }

    public static byte[] loadFile(File file) throws RegainException {
        if (file.isDirectory()) {
            throw new RegainException("Can't load a directory: " + file.getAbsolutePath());
        }
        FileInputStream in = null;
        ByteArrayOutputStream out = null;
        try {
            in = new FileInputStream(file);
            out = new ByteArrayOutputStream((int)file.length());
            RegainToolkit.pipe(in, out);
            byte[] byArray = out.toByteArray();
            return byArray;
        }
        catch (IOException exc) {
            throw new RegainException("Loading file failed " + file.getAbsolutePath(), exc);
        }
        finally {
            if (out != null) {
                try {
                    out.close();
                }
                catch (IOException exc) {}
            }
            if (in != null) {
                try {
                    in.close();
                }
                catch (IOException exc) {}
            }
        }
    }

    public static byte[] loadFileFromStream(InputStream inputStream, int length) throws RegainException {
        ByteArrayOutputStream out = null;
        try {
            out = new ByteArrayOutputStream(length);
            RegainToolkit.pipe(inputStream, out);
            byte[] byArray = out.toByteArray();
            return byArray;
        }
        catch (IOException exc) {
            throw new RegainException("Loading inputstream failed ", exc);
        }
        finally {
            if (out != null) {
                try {
                    out.close();
                }
                catch (IOException exc) {}
            }
        }
    }

    public static String toAbsoluteUrl(String url, String parentUrl) {
        if (!url.startsWith("http://") && !url.startsWith("file://")) {
            if (parentUrl.startsWith("http://") && url.startsWith("/")) {
                int firstSlashPos = parentUrl.indexOf(47, 7);
                if (firstSlashPos != -1) {
                    String domain = parentUrl.substring(0, firstSlashPos);
                    url = domain + url;
                } else {
                    url = parentUrl + url;
                }
            } else {
                int lastSlashPos = parentUrl.lastIndexOf(47);
                if (lastSlashPos > 7) {
                    String domainWidthPath = parentUrl.substring(0, lastSlashPos + 1);
                    url = domainWidthPath + url;
                } else {
                    url = parentUrl + "/" + url;
                }
            }
        }
        if ((url = RegainToolkit.replace(url, "/./", "/")).endsWith("/.")) {
            url = url.substring(0, url.length() - 2);
        }
        int updirIdx = 0;
        while ((updirIdx = url.indexOf("/..", updirIdx)) != -1) {
            int slashAfterIdx = updirIdx + 3;
            if (slashAfterIdx >= url.length() || url.charAt(slashAfterIdx) == '/') {
                int slashBeforeIdx = url.lastIndexOf(47, updirIdx - 1);
                if (slashBeforeIdx != -1) {
                    url = url.substring(0, slashBeforeIdx) + url.substring(slashAfterIdx);
                    updirIdx = slashBeforeIdx;
                    continue;
                }
                throw new IllegalArgumentException("Illegal URL: " + url + ". (parent URL: " + parentUrl + ") Contains a .. with no / before");
            }
            updirIdx += 3;
        }
        return url;
    }

    public static String completeDirectory(String url) {
        try {
            URL parsedUrl = new URL(url);
            String path = parsedUrl.getPath();
            String query = parsedUrl.getQuery();
            if (!(query != null && query.length() != 0 || path == null || path.length() <= 0 || path.contains(".") || path.endsWith("/") || !url.endsWith("?"))) {
                url = url.substring(0, url.length() - 1);
            }
        }
        catch (MalformedURLException malformedURLException) {
            // empty catch block
        }
        return url;
    }

    public static String removeAnchor(String url) {
        int index = url.indexOf(35);
        if (index != -1) {
            return url.substring(0, index);
        }
        return url;
    }

    public static void printActiveThreads() {
        ThreadGroup group = Thread.currentThread().getThreadGroup();
        Thread[] activeArr = new Thread[group.activeCount()];
        group.enumerate(activeArr);
        System.out.print("active threads: ");
        for (int i = 0; i < activeArr.length; ++i) {
            if (i != 0) {
                System.out.print(", ");
            }
            System.out.print(activeArr[i].getName());
        }
        System.out.println();
    }

    public static void initHttpClient(CrawlerConfig config) {
        String httpProxyHost = config.getProxyHost();
        String httpProxyPort = config.getProxyPort();
        String httpProxyUser = config.getProxyUser();
        String httpProxyPassword = config.getProxyPassword();
        String msg = "";
        if (httpProxyHost != null) {
            System.setProperty("http.proxyHost", httpProxyHost);
            msg = msg + " host: " + httpProxyHost;
        }
        if (httpProxyPort != null) {
            System.setProperty("http.proxyPort", httpProxyPort);
            msg = msg + " port: " + httpProxyPort;
        }
        if (httpProxyUser != null) {
            System.setProperty("http.proxyUser", httpProxyUser);
            msg = msg + " user: " + httpProxyUser;
        }
        if (httpProxyPassword != null) {
            System.setProperty("http.proxyPassword", httpProxyPassword);
            msg = msg + " password: (" + httpProxyPassword.length() + " characters)";
        }
        if (msg.length() != 0) {
            mLog.info("Using proxy:" + msg);
        } else {
            mLog.info("Using no proxy");
        }
        String userAgent = config.getUserAgent();
        if (userAgent != null) {
            System.setProperty("http.agent", userAgent);
            mLog.info("Using HTTP user agent:" + userAgent);
        }
    }

    public static String replaceHtmlEntities(String text) {
        int entityStart;
        StringBuffer clean = new StringBuffer();
        int offset = 0;
        while ((entityStart = text.indexOf(38, offset)) != -1) {
            String decoded;
            String textPart = text.substring(offset, entityStart);
            clean.append(textPart);
            int entityEnd = text.indexOf(59, entityStart);
            if (entityEnd == -1) {
                offset = text.length();
                break;
            }
            String entity = text.substring(entityStart, entityEnd + 1);
            try {
                decoded = HtmlEntities.decode(entity);
            }
            catch (Throwable thr) {
                decoded = entity;
            }
            clean.append(decoded);
            offset = entityEnd + 1;
        }
        if (offset < text.length()) {
            clean.append(text.substring(offset, text.length()));
        }
        return clean.toString();
    }

    public static String cleanFromHtmlTags(String text) {
        String goodPart;
        int tagStart;
        StringBuffer clean = new StringBuffer(text.length());
        int offset = 0;
        while ((tagStart = text.indexOf(60, offset)) != -1) {
            int tagEnd;
            goodPart = text.substring(offset, tagStart);
            int tagRestEnd = goodPart.indexOf(62);
            if (tagRestEnd != -1) {
                goodPart = goodPart.substring(tagRestEnd + 1);
            }
            if ((goodPart = goodPart.trim()).length() > 0) {
                goodPart = CrawlerToolkit.replaceHtmlEntities(goodPart);
                clean.append(goodPart);
                clean.append(" ");
            }
            if ((tagEnd = text.indexOf(62, tagStart)) == -1) {
                offset = text.length();
                break;
            }
            offset = tagEnd + 1;
        }
        if (offset < text.length()) {
            goodPart = text.substring(offset, text.length()).trim();
            goodPart = CrawlerToolkit.replaceHtmlEntities(goodPart);
            clean.append(goodPart);
        }
        return clean.toString();
    }

    public static AccountPasswordEntry findAuthenticationValuesForURL(String url, Map<String, AccountPasswordEntry> authMap) throws RegainException {
        String leftUrlPart = CrawlerToolkit.createURLWithoutPath(url);
        mLog.debug("search for >" + leftUrlPart + "< in authentication store.");
        if (authMap.containsKey(leftUrlPart)) {
            mLog.debug("Found an authentication entry for " + leftUrlPart);
            return authMap.get(leftUrlPart);
        }
        mLog.debug("Didn't find an authentication entry for " + leftUrlPart);
        return null;
    }

    public static String replaceAuthenticationValuesInURL(String url, AccountPasswordEntry entry) {
        String finalUrl = url;
        if (entry != null) {
            finalUrl = url.substring(0, url.indexOf("://"));
            finalUrl = finalUrl + "://" + entry.getAccountName() + ":" + entry.getPassword() + "@";
            finalUrl = finalUrl + url.substring(url.indexOf("://") + 3);
        }
        return finalUrl;
    }

    public static String createURLWithoutPath(String completeUrl) throws RegainException {
        Matcher matcher = urlPatternLeft.matcher(completeUrl);
        matcher.find();
        if (matcher.groupCount() > 0) {
            try {
                return matcher.group(1) + "/";
            }
            catch (IllegalStateException ex) {
                return "";
            }
        }
        throw new RegainException("URL is unparsable. url: " + completeUrl);
    }

    public static String cleanURL(String url, String[] urlCleaners) {
        String result = url;
        for (String pattern : urlCleaners) {
            result = result.replaceAll(pattern, "");
            mLog.debug("Remove " + pattern + " from URL: " + url);
        }
        if ((result = result.replaceAll("&&", "&")).endsWith("&")) {
            result = result.substring(0, result.length() - 1);
        }
        if (result.endsWith("?")) {
            result = result.substring(0, result.length() - 1);
        }
        mLog.debug("Resulting Url after replacement: " + result);
        return result;
    }

    public static String extractCredentialsFromProtocolHostFragment(String urlFragment) {
        String result = "";
        if (urlFragment.contains("@") && urlFragment.contains(":")) {
            String temp;
            int startPos = urlFragment.indexOf("//") + 2;
            int endPos = urlFragment.indexOf("@");
            if (!(endPos <= startPos + 2 || (temp = urlFragment.substring(startPos, endPos)).startsWith(":") || temp.endsWith(":") || temp.endsWith(":@"))) {
                result = temp;
            }
        }
        return result;
    }
}

