/*
 * Decompiled with CFR 0.152.
 */
package org.semanticdesktop.aperture.helper.html;

import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.HashSet;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.htmlparser.Parser;
import org.htmlparser.Tag;
import org.htmlparser.Text;
import org.htmlparser.lexer.InputStreamSource;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.lexer.Page;
import org.htmlparser.lexer.Source;
import org.htmlparser.lexer.Stream;
import org.htmlparser.tags.MetaTag;
import org.htmlparser.util.EncodingChangeException;
import org.htmlparser.util.ParserException;
import org.htmlparser.util.ParserFeedback;
import org.htmlparser.util.Translate;
import org.htmlparser.visitors.NodeVisitor;
import org.semanticdesktop.aperture.helper.html.HtmlParserException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class HtmlParserUtil {
    private static final int BUFFER_SIZE = InputStreamSource.BUFFER_SIZE;
    private static final ParserFeedback FEEDBACK_LOGGER = new ParserFeedback(){
        private final Logger logger = LoggerFactory.getLogger(this.getClass());

        public void info(String message) {
            this.logger.info(message);
        }

        public void warning(String message) {
            this.logger.warn(message);
        }

        public void error(String message, ParserException e) {
            this.logger.error(message, (Throwable)e);
        }
    };

    public static void parse(InputStream stream, Charset charset, ContentExtractor extractor) throws HtmlParserException {
        String charsetName = charset == null ? "ISO-8859-1" : charset.displayName();
        try {
            InputStreamSource source = new InputStreamSource((InputStream)new Stream(stream), charsetName, BUFFER_SIZE);
            Page page = new Page((Source)source);
            Lexer lexer = new Lexer(page);
            Parser parser = new Parser(lexer, FEEDBACK_LOGGER);
            try {
                try {
                    parser.visitAllNodesWith((NodeVisitor)extractor);
                }
                catch (EncodingChangeException e) {
                    parser.reset();
                    extractor.reset();
                    parser.visitAllNodesWith((NodeVisitor)extractor);
                }
            }
            catch (ParserException e) {
                throw new HtmlParserException(e);
            }
        }
        catch (UnsupportedEncodingException e) {
            throw new HtmlParserException(e);
        }
    }

    public static class ContentExtractor
    extends NodeVisitor {
        private static final String XMP = "XMP";
        private static final String PLAINTEXT = "PLAINTEXT";
        private static final String STYLE = "STYLE";
        private static final String SCRIPT = "SCRIPT";
        private static final String TITLE = "TITLE";
        private boolean inTextContext;
        private boolean inTitleContext;
        private boolean decodeText;
        private StringBuilder textBuffer = new StringBuilder(32768);
        private HashSet keywordBuffer = new HashSet();
        private String title;
        private String author;
        private String description;

        public ContentExtractor() {
            this.initFlags();
        }

        private void initFlags() {
            this.inTextContext = true;
            this.inTitleContext = false;
            this.decodeText = true;
        }

        public void reset() {
            this.initFlags();
            this.textBuffer.setLength(0);
            this.keywordBuffer.clear();
            this.title = null;
            this.author = null;
            this.description = null;
        }

        public String getText() {
            return this.textBuffer.toString();
        }

        public Iterator getKeywords() {
            return this.keywordBuffer.iterator();
        }

        public String getTitle() {
            return this.title;
        }

        public String getAuthor() {
            return this.author;
        }

        public String getDescription() {
            return this.description;
        }

        public void visitStringNode(Text node) {
            if (this.inTitleContext) {
                this.title = this.resolveText(node.getText());
                if (this.title != null) {
                    this.title = this.title.trim();
                }
            }
            if (this.inTextContext) {
                String text = node.getText();
                if (this.decodeText) {
                    text = this.resolveText(text);
                }
                this.textBuffer.append(text);
                this.textBuffer.append(' ');
            }
        }

        private String resolveText(String text) {
            text = Translate.decode((String)text);
            text = text.replace('\u00a0', ' ');
            return text;
        }

        public void visitTag(Tag tag) {
            String tagName = tag.getTagName();
            if (STYLE.equals(tagName) || SCRIPT.equals(tagName)) {
                this.inTextContext = false;
            } else {
                this.inTextContext = true;
                this.inTitleContext = TITLE.equals(tagName);
                if (tag instanceof MetaTag) {
                    MetaTag metaTag = (MetaTag)tag;
                    String metaTagName = metaTag.getMetaTagName();
                    String metaTagContent = metaTag.getMetaContent();
                    if (metaTagName != null && metaTagContent != null) {
                        if ((metaTagName = metaTagName.toLowerCase()).equals("author")) {
                            this.author = this.resolveText(metaTagContent);
                        } else if (metaTagName.equals("description")) {
                            this.description = this.resolveText(metaTagContent);
                        } else if (metaTagName.equals("keywords")) {
                            StringTokenizer tokenizer = new StringTokenizer(metaTagContent, ",", false);
                            while (tokenizer.hasMoreTokens()) {
                                String keyword = tokenizer.nextToken();
                                if (keyword == null) continue;
                                this.keywordBuffer.add(this.resolveText(keyword));
                            }
                        }
                    }
                } else if (XMP.equals(tagName) || PLAINTEXT.equals(tagName)) {
                    this.decodeText = false;
                }
            }
        }

        public void visitEndTag(Tag tag) {
            this.inTitleContext = false;
            String tagName = tag.getTagName();
            if (XMP.equals(tagName) || PLAINTEXT.equals(tagName)) {
                this.decodeText = true;
            }
        }
    }
}

