/*
 * Decompiled with CFR 0.152.
 */
package org.semanticdesktop.aperture.crawler.base;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.Stack;
import org.ontoware.aifbcommons.collection.ClosableIterator;
import org.semanticdesktop.aperture.accessor.AccessData;
import org.semanticdesktop.aperture.accessor.DataAccessorRegistry;
import org.semanticdesktop.aperture.accessor.DataObject;
import org.semanticdesktop.aperture.accessor.RDFContainerFactory;
import org.semanticdesktop.aperture.crawler.CrawlReport;
import org.semanticdesktop.aperture.crawler.Crawler;
import org.semanticdesktop.aperture.crawler.CrawlerHandler;
import org.semanticdesktop.aperture.crawler.ExitCode;
import org.semanticdesktop.aperture.crawler.base.CrawlReportBase;
import org.semanticdesktop.aperture.datasource.DataSource;
import org.semanticdesktop.aperture.datasource.DataSourceConfigurationException;
import org.semanticdesktop.aperture.datasource.config.ConfigurationUtil;
import org.semanticdesktop.aperture.datasource.config.DomainBoundaries;
import org.semanticdesktop.aperture.subcrawler.SubCrawler;
import org.semanticdesktop.aperture.subcrawler.SubCrawlerException;
import org.semanticdesktop.aperture.subcrawler.SubCrawlerHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class CrawlerBase
implements Crawler {
    private Logger logger = LoggerFactory.getLogger(this.getClass());
    protected DataSource source;
    protected DataAccessorRegistry accessorRegistry;
    protected AccessData accessData;
    protected File crawlReportFile;
    private CrawlReportBase crawlReport;
    private CrawlerHandler handler;
    protected boolean stopRequested = false;
    private DomainBoundaries domain;
    private Object subCrawlerMonitor = new Object();
    private Stack<SubCrawler> subCrawlerStack = new Stack();

    public void setDataSource(DataSource source) {
        this.source = source;
    }

    public DataSource getDataSource() {
        return this.source;
    }

    public void setDataAccessorRegistry(DataAccessorRegistry accessorRegistry) {
        this.accessorRegistry = accessorRegistry;
    }

    public DataAccessorRegistry getDataAccessorRegistry() {
        return this.accessorRegistry;
    }

    public void setAccessData(AccessData accessData) {
        this.accessData = accessData;
    }

    public AccessData getAccessData() {
        return this.accessData;
    }

    public void setCrawlerHandler(CrawlerHandler handler) {
        this.handler = handler;
    }

    public CrawlerHandler getCrawlerHandler() {
        return this.handler;
    }

    public synchronized void crawl() {
        this.crawlReport = new CrawlReportBase();
        this.crawlReport.setCrawlStarted(System.currentTimeMillis());
        if (this.source != null) {
            this.domain = ConfigurationUtil.getDomainBoundaries(this.source.getConfiguration());
        }
        this.stopRequested = false;
        ExitCode exitCode = null;
        this.handler.crawlStarted(this);
        try {
            if (this.accessData != null) {
                this.accessData.initialize();
            }
            if ((exitCode = this.crawlObjects()).equals(ExitCode.COMPLETED) && this.accessData != null) {
                this.reportUntouched();
            }
            if (this.accessData != null) {
                this.accessData.store();
            }
        }
        catch (Exception e) {
            this.reportFatalErrorCause(e);
            exitCode = ExitCode.FATAL_ERROR;
        }
        this.crawlReport.setExitCode(exitCode);
        this.crawlReport.setCrawlStopped(System.currentTimeMillis());
        this.storeCrawlReport();
        this.handler.crawlStopped(this, exitCode);
    }

    protected abstract ExitCode crawlObjects();

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void stop() {
        Object object = this.subCrawlerMonitor;
        synchronized (object) {
            this.stopRequested = true;
            if (!this.subCrawlerStack.empty()) {
                for (SubCrawler subCrawler : this.subCrawlerStack) {
                    subCrawler.stopSubCrawler();
                }
            }
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public boolean isStopRequested() {
        Object object = this.subCrawlerMonitor;
        synchronized (object) {
            return this.stopRequested;
        }
    }

    public void clear() {
        this.handler.clearStarted(this);
        ExitCode exitCode = ExitCode.COMPLETED;
        try {
            if (this.accessData != null) {
                this.accessData.initialize();
                Iterator iterator = this.accessData.getStoredIDs().iterator();
                while (!this.stopRequested && iterator.hasNext()) {
                    this.clear((String)iterator.next());
                }
                this.accessData.clear();
                if (this.stopRequested) {
                    exitCode = ExitCode.STOP_REQUESTED;
                }
            }
        }
        catch (IOException e) {
            this.reportFatalErrorCause("IOException while accessing AccessData", e);
            exitCode = ExitCode.FATAL_ERROR;
        }
        this.handler.clearFinished(this, exitCode);
    }

    protected void clear(String url) {
        this.handler.clearingObject(this, url);
    }

    public void setCrawlReportFile(File file) {
        this.crawlReportFile = file;
    }

    public File getCrawlReportFile() {
        return this.crawlReportFile;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public CrawlReport getCrawlReport() {
        if (this.crawlReport == null && this.crawlReportFile != null && this.crawlReportFile.exists()) {
            try {
                CrawlReportBase tmp = new CrawlReportBase();
                BufferedInputStream stream = new BufferedInputStream(new FileInputStream(this.crawlReportFile));
                try {
                    tmp.read(stream);
                    this.crawlReport = tmp;
                }
                finally {
                    ((InputStream)stream).close();
                }
            }
            catch (IOException e) {
                this.logger.error("Unable to load crawl report file", e);
            }
        }
        return this.crawlReport;
    }

    protected void reportAccessingObject(String url) {
        if (this.handler != null) {
            this.handler.accessingObject(this, url);
        }
    }

    protected void reportNewDataObject(DataObject object) {
        this.touchObject(object.getID().toString());
        this.crawlReport.increaseNewCount();
        this.handler.objectNew(this, object);
    }

    protected void touchObject(String string) {
        if (this.accessData != null) {
            this.accessData.touch(string);
        }
    }

    protected void reportModifiedDataObject(DataObject object) {
        this.touchObject(object.getID().toString());
        this.crawlReport.increaseChangedCount();
        this.handler.objectChanged(this, object);
    }

    protected void reportUnmodifiedDataObject(String url) {
        this.accessData.touchRecursively(url);
        ClosableIterator iter = this.accessData.getAggregatedIDsClosure(url);
        while (iter.hasNext()) {
            this.crawlReport.increaseUnchangedCount();
            this.handler.objectNotModified(this, iter.next().toString());
        }
    }

    protected void reportDeletedDataObject(String url) {
        ClosableIterator iter = this.accessData.getAggregatedIDsClosure(url);
        while (iter.hasNext()) {
            String urlToReport = (String)iter.next();
            this.handler.objectRemoved(this, urlToReport);
            this.crawlReport.increaseRemovedCount();
        }
        this.accessData.remove(url);
    }

    protected void reportUntouched() {
        ClosableIterator iter = this.accessData.getUntouchedIDsIterator();
        while (iter.hasNext()) {
            this.handler.objectRemoved(this, iter.next().toString());
            this.crawlReport.increaseRemovedCount();
        }
        this.accessData.removeUntouchedIDs();
    }

    protected ExitCode reportFatalErrorCause(String msg) {
        this.crawlReport.setFatalErrorCause(new DataSourceConfigurationException(msg));
        return ExitCode.FATAL_ERROR;
    }

    protected ExitCode reportFatalErrorCause(String msg, Throwable cause) {
        this.crawlReport.setFatalErrorCause(new DataSourceConfigurationException(msg, cause));
        return ExitCode.FATAL_ERROR;
    }

    protected ExitCode reportFatalErrorCause(Throwable t) {
        this.crawlReport.setFatalErrorCause(t);
        return ExitCode.FATAL_ERROR;
    }

    protected RDFContainerFactory getRDFContainerFactory(String url) {
        return this.handler.getRDFContainerFactory(this, url);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected void storeCrawlReport() {
        if (this.crawlReport != null && this.crawlReportFile != null) {
            try {
                BufferedOutputStream stream = new BufferedOutputStream(new FileOutputStream(this.crawlReportFile));
                try {
                    this.crawlReport.write(stream);
                }
                finally {
                    ((OutputStream)stream).close();
                }
            }
            catch (IOException e) {
                this.logger.error("Unable to write crawl report file", e);
            }
        }
    }

    protected boolean inDomain(String uri) {
        return this.domain.inDomain(uri);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void runSubCrawler(SubCrawler localSubCrawler, DataObject object, InputStream stream, Charset charset, String mimeType) throws SubCrawlerException {
        try {
            Object object2 = this.subCrawlerMonitor;
            synchronized (object2) {
                block18: {
                    if (!this.stopRequested) break block18;
                    this.logger.debug("Not starting the subCrawler, the crawler has been requested to stop");
                    return;
                }
                this.subCrawlerStack.push(localSubCrawler);
            }
            localSubCrawler.subCrawl(object.getID(), stream, new DefaultSubCrawlerHandler(this, object), this.source, this.accessData, charset, mimeType, object.getMetadata());
        }
        finally {
            Object object3 = this.subCrawlerMonitor;
            synchronized (object3) {
                SubCrawler stackSubCrawler = this.subCrawlerStack.pop();
                if (stackSubCrawler != localSubCrawler) {
                    this.logger.error("SubCrawler stack error");
                    throw new SubCrawlerException("SubCrawlerStack error push/pop got desynchronized");
                }
                stackSubCrawler = null;
                localSubCrawler = null;
            }
        }
    }

    private static class DefaultSubCrawlerHandler
    implements SubCrawlerHandler {
        private CrawlerBase crawlerBase;
        private String subCrawledObjectId;

        public DefaultSubCrawlerHandler(CrawlerBase innerCrawler, DataObject object) {
            this.crawlerBase = innerCrawler;
            this.subCrawledObjectId = object.getID().toString();
        }

        public RDFContainerFactory getRDFContainerFactory(String url) {
            return this.crawlerBase.handler.getRDFContainerFactory(this.crawlerBase, url);
        }

        public void objectChanged(DataObject object) {
            if (this.crawlerBase.accessData != null) {
                this.crawlerBase.accessData.putAggregatedID(this.subCrawledObjectId, object.getID().toString());
            }
            this.crawlerBase.reportModifiedDataObject(object);
        }

        public void objectNew(DataObject object) {
            if (this.crawlerBase.accessData != null) {
                this.crawlerBase.accessData.putAggregatedID(this.subCrawledObjectId, object.getID().toString());
            }
            this.crawlerBase.reportNewDataObject(object);
        }

        public void objectNotModified(String url) {
            if (this.crawlerBase.accessData != null) {
                this.crawlerBase.accessData.putAggregatedID(this.subCrawledObjectId, url);
            }
            this.crawlerBase.reportUnmodifiedDataObject(url);
        }
    }
}

