/*
 * Decompiled with CFR 0.152.
 */
package org.semanticdesktop.aperture.extractor.pdf;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Calendar;
import java.util.List;
import java.util.StringTokenizer;
import org.apache.jempbox.xmp.XMPMetadata;
import org.apache.jempbox.xmp.XMPSchemaDublinCore;
import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.exceptions.InvalidPasswordException;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.util.PDFTextStripper;
import org.ontoware.rdf2go.model.Model;
import org.ontoware.rdf2go.model.node.Resource;
import org.ontoware.rdf2go.model.node.URI;
import org.ontoware.rdf2go.vocabulary.RDF;
import org.semanticdesktop.aperture.extractor.Extractor;
import org.semanticdesktop.aperture.extractor.ExtractorException;
import org.semanticdesktop.aperture.rdf.RDFContainer;
import org.semanticdesktop.aperture.rdf.util.ModelUtil;
import org.semanticdesktop.aperture.vocabulary.NCO;
import org.semanticdesktop.aperture.vocabulary.NFO;
import org.semanticdesktop.aperture.vocabulary.NIE;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class PdfExtractor
implements Extractor {
    private Logger logger = LoggerFactory.getLogger(this.getClass());
    private PDDocument document = null;
    private boolean closeDocument;

    public PdfExtractor() {
        this.closeDocument = true;
    }

    public PdfExtractor(boolean closeDocument) {
        this.closeDocument = closeDocument;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public void extract(URI id, InputStream stream, Charset charset, String mimeType, RDFContainer result) throws ExtractorException {
        this.document = null;
        try {
            try {
                PDFParser parser = new PDFParser(stream);
                parser.parse();
                this.document = parser.getPDDocument();
            }
            catch (IOException e) {
                throw new ExtractorException(e);
            }
            this.processDocument(id, this.document, result);
        }
        finally {
            if (this.document != null && this.closeDocument) {
                try {
                    this.document.close();
                }
                catch (IOException e) {
                    throw new ExtractorException(e);
                }
            }
        }
    }

    public PDDocument getPDDocument() {
        return this.document;
    }

    private void processDocument(URI id, PDDocument document, RDFContainer result) throws ExtractorException {
        if (document.isEncrypted()) {
            try {
                this.logger.info("Trying to decrypt " + id);
                document.decrypt("");
                this.logger.info("Decryption succeeded");
            }
            catch (CryptographyException e) {
                result.add(NFO.encryptionStatus, NFO.encryptedStatus);
                return;
            }
            catch (IOException e) {
                throw new ExtractorException(e);
            }
            catch (InvalidPasswordException e) {
                this.logger.info("Decryption failed", e);
            }
        }
        this.extractFullText(id, document, result);
        this.extractNormalMetadata(id, document, result);
        this.extractXMPMetadata(id, document, result);
    }

    private void extractFullText(URI id, PDDocument document, RDFContainer result) {
        try {
            PDFTextStripper stripper = new PDFTextStripper();
            String text = stripper.getText(document);
            if (text != null) {
                result.add(NIE.plainTextContent, text);
            }
        }
        catch (IOException e) {
            this.logger.warn("IOException while extracting full-text of " + id, e);
        }
    }

    private void extractNormalMetadata(URI id, PDDocument document, RDFContainer result) {
        PDDocumentInformation metadata = document.getDocumentInformation();
        try {
            this.addContactStatement(NCO.creator, metadata.getAuthor(), result);
        }
        catch (Exception e) {
            this.logger.warn("Exception while extracting author of " + id, e);
        }
        try {
            this.addStringMetadata(NIE.title, metadata.getTitle(), result);
        }
        catch (Exception e) {
            this.logger.warn("Exception while extracting title of " + id, e);
        }
        try {
            this.addStringMetadata(NIE.subject, metadata.getSubject(), result);
        }
        catch (Exception e) {
            this.logger.warn("Exception while extracting subject of " + id, e);
        }
        try {
            this.addStringMetadata(NIE.generator, metadata.getCreator(), result);
        }
        catch (Exception e) {
            this.logger.warn("Exception while extracting creator of " + id, e);
        }
        try {
            this.addStringMetadata(NIE.generator, metadata.getProducer(), result);
        }
        catch (Exception e) {
            this.logger.warn("Exception while extracting producer of " + id, e);
        }
        try {
            this.addCalendarMetadata(NIE.contentCreated, metadata.getCreationDate(), result);
        }
        catch (Exception e) {
            this.logger.warn("Exception while extracting creation date of " + id, e);
        }
        try {
            this.addCalendarMetadata(NIE.contentLastModified, metadata.getModificationDate(), result);
        }
        catch (Exception e) {
            this.logger.warn("Exception while extracting modification date of " + id, e);
        }
        try {
            int nrPages = document.getNumberOfPages();
            if (nrPages >= 0) {
                result.add(RDF.type, NFO.PaginatedTextDocument);
                result.add(NFO.pageCount, nrPages);
            }
        }
        catch (Exception e) {
            this.logger.warn("Exception while extracting number of pages of " + id, e);
        }
        try {
            String keywords = metadata.getKeywords();
            if (keywords != null) {
                StringTokenizer tokenizer = new StringTokenizer(keywords, " \t,;'\"|", false);
                while (tokenizer.hasMoreTokens()) {
                    String keyword = tokenizer.nextToken();
                    if (keyword == null) continue;
                    result.add(NIE.keyword, keyword);
                }
            }
        }
        catch (Exception e) {
            this.logger.warn("Exception while extracting keywords of " + id, e);
        }
    }

    private void extractXMPMetadata(URI id, PDDocument document, RDFContainer result) {
        block10: {
            try {
                PDDocumentInformation pddi = document.getDocumentInformation();
                PDMetadata md = document.getDocumentCatalog().getMetadata();
                if (md == null) {
                    return;
                }
                XMPMetadata xmpmd = XMPMetadata.load((InputStream)md.createInputStream());
                XMPSchemaDublinCore dcschema = xmpmd.getDublinCoreSchema();
                String creator = null;
                try {
                    creator = pddi.getAuthor();
                }
                catch (Exception e) {
                    // empty catch block
                }
                if (dcschema != null) {
                    try {
                        this.addContactListMetadata(NCO.creator, dcschema.getCreators(), creator, result);
                    }
                    catch (Exception e) {
                        this.logger.warn("Exception while extracting XMP dublincore-creators of " + id, e);
                    }
                    try {
                        this.addContactListMetadata(NCO.contributor, dcschema.getContributors(), null, result);
                    }
                    catch (Exception e) {
                        this.logger.warn("Exception while extracting XMP dublincore-contributors of " + id, e);
                    }
                    break block10;
                }
                this.logger.debug("No dcschema data found for " + id);
            }
            catch (Exception e) {
                this.logger.warn("Exception while extracting XMP metadata of " + id, e);
            }
        }
    }

    private void addStringMetadata(URI property, String value, RDFContainer result) {
        if (value != null) {
            result.add(property, value);
        }
    }

    private void addCalendarMetadata(URI property, Calendar value, RDFContainer result) {
        if (value != null) {
            result.add(property, value);
        }
    }

    private void addContactStatement(URI uri, String fullname, RDFContainer container) {
        if (fullname != null) {
            Model model = container.getModel();
            Resource contactResource = ModelUtil.generateRandomResource(model);
            model.addStatement(contactResource, RDF.type, NCO.Contact);
            model.addStatement(contactResource, NCO.fullname, fullname);
            container.add(uri, contactResource);
        }
    }

    private void addContactListMetadata(URI property, List<String> values, String omitValue, RDFContainer result) {
        if (values != null) {
            for (String value : values) {
                if (omitValue != null && value.equals(omitValue)) continue;
                this.addContactStatement(property, value, result);
            }
        }
    }
}

