package org.jpedal.tika;

import com.lowagie.text.ElementTags;
import com.lowagie.text.html.HtmlTags;
import com.lowagie.text.html.Markup;
import com.lowagie.text.xml.xmp.PdfSchema;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.util.Collections;
import java.util.Set;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.jpedal.examples.images.ExtractImages;
import org.jpedal.examples.text.ExtractStructuredText;
import org.jpedal.examples.text.ExtractTextInRectangle;
import org.jpedal.exception.PdfException;
import org.jpedal.objects.PdfImageData;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/* loaded from: input_file:resources/public/jpedal.jar:org/jpedal/tika/PDFParser.class */
public class PDFParser implements Parser {
    private final Ability ability;
    private final Set<MediaType> supportedTypes;
    public static final String PASSWORD = "com.idrsolutions.tika.password";

    /* loaded from: input_file:resources/public/jpedal.jar:org/jpedal/tika/PDFParser$Ability.class */
    public enum Ability {
        UNSTRUCTURED_TEXT,
        STRUCTURED_TEXT,
        IMAGE_METADATA
    }

    public PDFParser(Ability ability) {
        this.supportedTypes = Collections.singleton(MediaType.application(PdfSchema.DEFAULT_XPATH_ID));
        this.ability = ability;
    }

    public PDFParser() {
        this.supportedTypes = Collections.singleton(MediaType.application(PdfSchema.DEFAULT_XPATH_ID));
        this.ability = Ability.UNSTRUCTURED_TEXT;
    }

    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return this.supportedTypes;
    }

    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        TikaInputStream cast = TikaInputStream.cast(inputStream);
        if (cast == null) {
            throw new UnsupportedOperationException("File path should be passed as a TikaInputStream");
        }
        XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
        xHTMLContentHandler.startDocument();
        switch (this.ability) {
            case UNSTRUCTURED_TEXT:
                unstructuredText(cast, xHTMLContentHandler, metadata);
                break;
            case STRUCTURED_TEXT:
                structuredText(cast, xHTMLContentHandler, metadata);
                break;
            case IMAGE_METADATA:
                imageMetadata(cast, xHTMLContentHandler, metadata);
                break;
        }
        xHTMLContentHandler.endDocument();
    }

    private static void unstructuredText(TikaInputStream tikaInputStream, XHTMLContentHandler xHTMLContentHandler, Metadata metadata) throws IOException, SAXException, TikaException {
        ExtractTextInRectangle extractTextInRectangle = new ExtractTextInRectangle(tikaInputStream.getFile().getAbsolutePath());
        extractTextInRectangle.setOutputFormat(ExtractTextInRectangle.OUTPUT_FORMAT.TXT);
        extractTextInRectangle.setPassword(metadata.get(PASSWORD));
        try {
            try {
                if (!extractTextInRectangle.openPDFFile()) {
                    throw new IOException("Problem opening PDF file");
                }
                for (int i = 1; i <= extractTextInRectangle.getPageCount(); i++) {
                    xHTMLContentHandler.startElement(HtmlTags.PARAGRAPH);
                    xHTMLContentHandler.characters(extractTextInRectangle.getTextOnPage(i));
                    xHTMLContentHandler.endElement(HtmlTags.PARAGRAPH);
                }
            } catch (PdfException e) {
                throw new TikaException(e.getMessage());
            }
        } finally {
            extractTextInRectangle.closePDFfile();
        }
    }

    private static void structuredText(TikaInputStream tikaInputStream, XHTMLContentHandler xHTMLContentHandler, Metadata metadata) throws IOException, SAXException, TikaException {
        ExtractStructuredText extractStructuredText = new ExtractStructuredText(tikaInputStream.getFile().getAbsolutePath());
        extractStructuredText.setPassword(metadata.get(PASSWORD));
        try {
            try {
                if (!extractStructuredText.openPDFFile()) {
                    throw new IOException("Problem opening PDF file");
                }
                try {
                    DOMSource dOMSource = new DOMSource(extractStructuredText.getStructuredTextContent());
                    StringWriter stringWriter = new StringWriter();
                    TransformerFactory.newInstance().newTransformer().transform(dOMSource, new StreamResult(stringWriter));
                    xHTMLContentHandler.characters(stringWriter.toString());
                } catch (TransformerException e) {
                    throw new TikaException(e.getMessage());
                }
            } catch (PdfException e2) {
                throw new TikaException(e2.getMessage());
            }
        } finally {
            extractStructuredText.closePDFfile();
        }
    }

    private static void imageMetadata(TikaInputStream tikaInputStream, XHTMLContentHandler xHTMLContentHandler, Metadata metadata) throws IOException, SAXException, TikaException {
        ExtractImages extractImages = new ExtractImages(tikaInputStream.getFile().getAbsolutePath());
        extractImages.setPassword(metadata.get(PASSWORD));
        try {
            try {
                if (!extractImages.openPDFFile()) {
                    throw new IOException("Problem opening PDF file");
                }
                AttributesImpl attributesImpl = new AttributesImpl();
                attributesImpl.addAttribute("", "name", "name", "CDATA", "fileSource");
                attributesImpl.addAttribute("", "content", "content", "CDATA", tikaInputStream.getFile().getAbsolutePath());
                xHTMLContentHandler.startElement(HtmlTags.META, attributesImpl);
                xHTMLContentHandler.endElement(HtmlTags.META);
                for (int i = 1; i <= extractImages.getPageCount(); i++) {
                    int imageCount = extractImages.getImageCount(i);
                    if (imageCount != 0) {
                        AttributesImpl attributesImpl2 = new AttributesImpl();
                        attributesImpl2.addAttribute("", Markup.HTML_ATTR_CSS_CLASS, Markup.HTML_ATTR_CSS_CLASS, "CDATA", "page");
                        attributesImpl2.addAttribute("", "data-page-number", "data-page-number", "CDATA", String.valueOf(i));
                        xHTMLContentHandler.startElement("div", attributesImpl2);
                        PdfImageData imageData = extractImages.getImageData(i);
                        for (int i2 = 0; i2 < imageCount; i2++) {
                            AttributesImpl attributesImpl3 = new AttributesImpl();
                            attributesImpl3.addAttribute("", Markup.HTML_ATTR_CSS_CLASS, Markup.HTML_ATTR_CSS_CLASS, "CDATA", ElementTags.IMAGE);
                            attributesImpl3.addAttribute("", "data-image-index", "data-image-index", "CDATA", String.valueOf(i2));
                            xHTMLContentHandler.startElement("div", attributesImpl3);
                            xHTMLContentHandler.startElement("table");
                            xHTMLContentHandler.startElement(HtmlTags.ROW);
                            xHTMLContentHandler.startElement(HtmlTags.HEADERCELL);
                            xHTMLContentHandler.characters("x1");
                            xHTMLContentHandler.endElement(HtmlTags.HEADERCELL);
                            xHTMLContentHandler.startElement(HtmlTags.HEADERCELL);
                            xHTMLContentHandler.characters("y1");
                            xHTMLContentHandler.endElement(HtmlTags.HEADERCELL);
                            xHTMLContentHandler.startElement(HtmlTags.HEADERCELL);
                            xHTMLContentHandler.characters("x2");
                            xHTMLContentHandler.endElement(HtmlTags.HEADERCELL);
                            xHTMLContentHandler.startElement(HtmlTags.HEADERCELL);
                            xHTMLContentHandler.characters("y2");
                            xHTMLContentHandler.endElement(HtmlTags.HEADERCELL);
                            xHTMLContentHandler.endElement(HtmlTags.ROW);
                            xHTMLContentHandler.startElement(HtmlTags.ROW);
                            float imageXCoord = imageData.getImageXCoord(i2);
                            float imageYCoord = imageData.getImageYCoord(i2);
                            float imageWidth = imageData.getImageWidth(i2);
                            float imageHeight = imageData.getImageHeight(i2);
                            xHTMLContentHandler.startElement(HtmlTags.CELL);
                            xHTMLContentHandler.characters(String.valueOf(imageXCoord));
                            xHTMLContentHandler.endElement(HtmlTags.CELL);
                            xHTMLContentHandler.startElement(HtmlTags.CELL);
                            xHTMLContentHandler.characters(String.valueOf(imageYCoord + imageHeight));
                            xHTMLContentHandler.endElement(HtmlTags.CELL);
                            xHTMLContentHandler.startElement(HtmlTags.CELL);
                            xHTMLContentHandler.characters(String.valueOf(imageXCoord + imageWidth));
                            xHTMLContentHandler.endElement(HtmlTags.CELL);
                            xHTMLContentHandler.startElement(HtmlTags.CELL);
                            xHTMLContentHandler.characters(String.valueOf(imageYCoord));
                            xHTMLContentHandler.endElement(HtmlTags.CELL);
                            xHTMLContentHandler.endElement(HtmlTags.ROW);
                            xHTMLContentHandler.endElement("table");
                            xHTMLContentHandler.endElement("div");
                        }
                        xHTMLContentHandler.endElement("div");
                    }
                }
            } catch (PdfException e) {
                throw new TikaException(e.getMessage(), e);
            }
        } finally {
            extractImages.closePDFfile();
        }
    }
}
