Java:PDF图片抽取的两种方法
warning:
这篇文章距离上次修改已过180天,其中的内容可能已经有所变动。
在Java中,可以使用Apache PDFBox和Java ImageIO库来抽取PDF中的图片。以下是两种方法的示例代码:
方法一:使用Apache PDFBox
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.List;
public class ExtractImages {
public static void main(String[] args) throws IOException {
File file = new File("example.pdf");
PDDocument document = PDDocument.load(file);
List<PDPage> pages = document.getDocumentCatalog().getAllPages();
for (PDPage page : pages) {
PDResources resources = page.getResources();
for (PDImageXObject image : resources.getImages()) {
BufferedImage bufferedImage = image.toImage();
String extension = "png"; // or "jpg" if image is not a PNG
File outputFile = new File("extracted_image." + extension);
ImageIO.write(bufferedImage, extension, outputFile);
}
}
document.close();
}
}
方法二:使用Java Advanced Imaging (JAI)
import javax.media.jai.JAI;
import javax.media.jai.RenderedOp;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import javax.imageio.ImageReader;
import javax.imageio.stream.ImageInputStream;
public class ExtractImages {
public static void main(String[] args) throws IOException {
File file = new File("example.pdf");
Iterator<ImageReader> readers = ImageIO.getImageReadersByFormatName("PDF");
ImageReader reader = readers.next();
ImageInputStream iis = ImageIO.createImageInputStream(file);
reader.setInput(iis);
for (int i = 0; i < reader.getNumImages(true); i++) {
BufferedImage image = reader.read(i);
String extension = "png"; // or "jpg" if image is not a PNG
File outputFile = new File("extracted_image." + extension);
Ima
评论已关闭