/*
 * Decompiled with CFR 0.152.
 */
package org.semanticdesktop.aperture.extractor.publisher;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import org.ontoware.rdf2go.model.node.URI;
import org.ontoware.rdf2go.vocabulary.RDF;
import org.semanticdesktop.aperture.extractor.Extractor;
import org.semanticdesktop.aperture.extractor.ExtractorException;
import org.semanticdesktop.aperture.extractor.microsoft.util.PoiUtil;
import org.semanticdesktop.aperture.rdf.RDFContainer;
import org.semanticdesktop.aperture.util.StringExtractor;
import org.semanticdesktop.aperture.vocabulary.NFO;
import org.semanticdesktop.aperture.vocabulary.NIE;

public class PublisherExtractor
implements Extractor {
    private static final String[] EXCLUDE_LINES = new String[]{"fdpc", "syid", "syidz", "chnkink", "btep", "btec", "font", "fontj", "mcld", "ontd", "quill96 story group class"};

    public void extract(URI id, InputStream stream, Charset charset, String mimeType, RDFContainer result) throws ExtractorException {
        try {
            stream = PoiUtil.extractMetadata(stream, true, result);
        }
        catch (IOException e) {
            throw new ExtractorException(e);
        }
        FullTextExtractor extractor = new FullTextExtractor();
        try {
            String text = extractor.extract(stream).trim();
            if (text.length() > 0) {
                result.add(NIE.plainTextContent, text);
            }
            result.add(RDF.type, NFO.TextDocument);
        }
        catch (IOException e) {
            throw new ExtractorException(e);
        }
    }

    private static class FullTextExtractor
    extends StringExtractor {
        private FullTextExtractor() {
        }

        protected boolean isValidLine(String lineLowerCase) {
            for (int i = 0; i < EXCLUDE_LINES.length; ++i) {
                if (!lineLowerCase.equals(EXCLUDE_LINES[i])) continue;
                return false;
            }
            return super.isValidLine(lineLowerCase);
        }
    }
}

