package org.apache.pdfbox.tools;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.batik.constants.XMLConstants;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.font.PDFontDescriptor;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;

/* loaded from: input_file:META-INF/lib/pdfbox-app-3.0.5.jar:org/apache/pdfbox/tools/PDFText2HTML.class */
public class PDFText2HTML extends PDFTextStripper {
    private static final int INITIAL_PDF_TO_HTML_BYTES = 8192;
    private final FontState fontState = new FontState();

    /* loaded from: input_file:META-INF/lib/pdfbox-app-3.0.5.jar:org/apache/pdfbox/tools/PDFText2HTML$FontState.class */
    private static class FontState {
        private final List<String> stateList;
        private final Set<String> stateSet;

        private FontState() {
            this.stateList = new ArrayList();
            this.stateSet = new HashSet();
        }

        public String push(String str, List<TextPosition> list) {
            StringBuilder sb = new StringBuilder();
            if (str.length() == list.size()) {
                for (int i = 0; i < str.length(); i++) {
                    push(sb, str.charAt(i), list.get(i));
                }
            } else if (!str.isEmpty()) {
                if (list.isEmpty()) {
                    return str;
                }
                push(sb, str.charAt(0), list.get(0));
                sb.append(PDFText2HTML.escape(str.substring(1)));
            }
            return sb.toString();
        }

        public String clear() {
            StringBuilder sb = new StringBuilder();
            closeUntil(sb, null);
            this.stateList.clear();
            this.stateSet.clear();
            return sb.toString();
        }

        protected String push(StringBuilder sb, char c, TextPosition textPosition) {
            boolean z = false;
            boolean z2 = false;
            PDFontDescriptor fontDescriptor = textPosition.getFont().getFontDescriptor();
            if (fontDescriptor != null) {
                z = isBold(fontDescriptor);
                z2 = isItalic(fontDescriptor);
            }
            sb.append(z ? open("b") : close("b"));
            sb.append(z2 ? open("i") : close("i"));
            PDFText2HTML.appendEscaped(sb, c);
            return sb.toString();
        }

        private String open(String str) {
            if (this.stateSet.contains(str)) {
                return "";
            }
            this.stateList.add(str);
            this.stateSet.add(str);
            return openTag(str);
        }

        private String close(String str) {
            if (!this.stateSet.contains(str)) {
                return "";
            }
            StringBuilder sb = new StringBuilder();
            int closeUntil = closeUntil(sb, str);
            this.stateList.remove(closeUntil);
            this.stateSet.remove(str);
            while (closeUntil < this.stateList.size()) {
                sb.append(openTag(this.stateList.get(closeUntil)));
                closeUntil++;
            }
            return sb.toString();
        }

        private int closeUntil(StringBuilder sb, String str) {
            int size = this.stateList.size();
            while (true) {
                int i = size;
                size--;
                if (i <= 0) {
                    return -1;
                }
                String str2 = this.stateList.get(size);
                sb.append(closeTag(str2));
                if (str != null && str2.equals(str)) {
                    return size;
                }
            }
        }

        private String openTag(String str) {
            return XMLConstants.XML_OPEN_TAG_START + str + XMLConstants.XML_CLOSE_TAG_END;
        }

        private String closeTag(String str) {
            return XMLConstants.XML_CLOSE_TAG_START + str + XMLConstants.XML_CLOSE_TAG_END;
        }

        private boolean isBold(PDFontDescriptor pDFontDescriptor) {
            if (pDFontDescriptor.isForceBold()) {
                return true;
            }
            return pDFontDescriptor.getFontName().contains("Bold");
        }

        private boolean isItalic(PDFontDescriptor pDFontDescriptor) {
            if (pDFontDescriptor.isItalic()) {
                return true;
            }
            return pDFontDescriptor.getFontName().contains("Italic");
        }
    }

    public PDFText2HTML() throws IOException {
        setLineSeparator(LINE_SEPARATOR);
        setParagraphStart("<p>");
        setParagraphEnd("</p>" + LINE_SEPARATOR);
        setPageStart("<div style=\"page-break-before:always; page-break-after:always\">");
        setPageEnd("</div>" + LINE_SEPARATOR);
        setArticleStart(LINE_SEPARATOR);
        setArticleEnd(LINE_SEPARATOR);
    }

    @Override // org.apache.pdfbox.text.PDFTextStripper
    protected void startDocument(PDDocument pDDocument) throws IOException {
        StringBuilder sb = new StringBuilder(8192);
        sb.append("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"\n\"http://www.w3.org/TR/html4/loose.dtd\">\n");
        sb.append("<html><head>");
        sb.append("<title>").append(escape(getTitle())).append("</title>\n");
        sb.append("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n");
        sb.append("</head>\n");
        sb.append("<body>\n");
        super.writeString(sb.toString());
    }

    @Override // org.apache.pdfbox.text.PDFTextStripper
    public void endDocument(PDDocument pDDocument) throws IOException {
        super.writeString("</body></html>");
    }

    protected String getTitle() {
        String title = this.document.getDocumentInformation().getTitle();
        if (title != null && title.length() > 0) {
            return title;
        }
        Iterator<List<TextPosition>> it = getCharactersByArticle().iterator();
        float f = -1.0f;
        StringBuilder sb = new StringBuilder();
        while (it.hasNext()) {
            for (TextPosition textPosition : it.next()) {
                float fontSize = textPosition.getFontSize();
                if (Float.compare(fontSize, f) != 0 || sb.length() > 64) {
                    if (sb.length() > 0) {
                        return sb.toString();
                    }
                    f = fontSize;
                }
                if (fontSize > 13.0f) {
                    sb.append(textPosition.getUnicode());
                }
            }
        }
        return "";
    }

    @Override // org.apache.pdfbox.text.PDFTextStripper
    protected void startArticle(boolean z) throws IOException {
        if (z) {
            super.writeString("<div>");
        } else {
            super.writeString("<div dir=\"RTL\">");
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.pdfbox.text.PDFTextStripper
    public void endArticle() throws IOException {
        super.endArticle();
        super.writeString("</div>");
    }

    @Override // org.apache.pdfbox.text.PDFTextStripper
    protected void writeString(String str, List<TextPosition> list) throws IOException {
        super.writeString(this.fontState.push(str, list));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.pdfbox.text.PDFTextStripper
    public void writeString(String str) throws IOException {
        super.writeString(escape(str));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.pdfbox.text.PDFTextStripper
    public void writeParagraphEnd() throws IOException {
        super.writeString(this.fontState.clear());
        super.writeParagraphEnd();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String escape(String str) {
        StringBuilder sb = new StringBuilder(str.length());
        for (int i = 0; i < str.length(); i++) {
            appendEscaped(sb, str.charAt(i));
        }
        return sb.toString();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static void appendEscaped(StringBuilder sb, char c) {
        if (c < ' ' || c > '~') {
            sb.append("&#").append((int) c).append(';');
            return;
        }
        switch (c) {
            case '\"':
                sb.append(XMLConstants.XML_ENTITY_QUOT);
                return;
            case '&':
                sb.append(XMLConstants.XML_ENTITY_AMP);
                return;
            case '<':
                sb.append(XMLConstants.XML_ENTITY_LT);
                return;
            case '>':
                sb.append(XMLConstants.XML_ENTITY_GT);
                return;
            default:
                sb.append(c);
                return;
        }
    }
}
