refactor(pdf): update PDFBox usage for improved 3.x compatibility (#2079)

* refactor(pdf): update PDFBox usage for 3.x compatibility and improve file handling

- Use RandomAccessReadBufferedFile and RandomAccessStreamCache for loading PDFs
- Update methods to use new PDFBox 3.x APIs for reading and writing
- Add pdfbox-io dependency to build.gradle
- Add comments regarding compressed mode in PDFBox 3.x

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>

* test(pdf): remove outdated comments about PDFBox 3.x default compression in save operations

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>

* test(pdf): remove outdated comments about PDFBox 3.x default compression in save operations

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>

---------

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs 2026-01-01 08:50:46 +01:00 committed by GitHub
parent 03311b76ee
commit f3299915f5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 18 additions and 5 deletions

View File

@ -52,6 +52,7 @@ dependencies {
// --- Book & Image Processing ---
implementation 'org.apache.pdfbox:pdfbox:3.0.6'
implementation 'org.apache.pdfbox:pdfbox-io:3.0.6'
implementation 'org.apache.pdfbox:xmpbox:3.0.6'
implementation 'org.apache.pdfbox:jbig2-imageio:3.0.4'
implementation 'com.github.jai-imageio:jai-imageio-core:1.4.0'

View File

@ -16,6 +16,7 @@ import com.adityachandel.booklore.util.FileUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
@ -58,7 +59,9 @@ public class PdfProcessor extends AbstractFileProcessor implements BookFileProce
@Override
public boolean generateCover(BookEntity bookEntity) {
try (PDDocument pdf = Loader.loadPDF(new File(FileUtils.getBookFullPath(bookEntity)))) {
File pdfFile = new File(FileUtils.getBookFullPath(bookEntity));
try (RandomAccessReadBufferedFile randomAccessRead = new RandomAccessReadBufferedFile(pdfFile);
PDDocument pdf = Loader.loadPDF(randomAccessRead)) {
return generateCoverImageAndSave(bookEntity.getId(), pdf);
} catch (OutOfMemoryError e) {
// Note: Catching OOM is generally discouraged, but for batch processing

View File

@ -8,6 +8,7 @@ import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
@ -45,7 +46,8 @@ public class PdfMetadataExtractor implements FileMetadataExtractor {
@Override
public byte[] extractCover(File file) {
BufferedImage coverImage = null;
try (PDDocument pdf = Loader.loadPDF(file)) {
try (RandomAccessReadBufferedFile randomAccessRead = new RandomAccessReadBufferedFile(file);
PDDocument pdf = Loader.loadPDF(randomAccessRead)) {
coverImage = new PDFRenderer(pdf).renderImageWithDPI(0, 300, ImageType.RGB);
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
ImageIO.write(coverImage, "jpg", baos);
@ -70,7 +72,8 @@ public class PdfMetadataExtractor implements FileMetadataExtractor {
BookMetadata.BookMetadataBuilder metadataBuilder = BookMetadata.builder();
try (PDDocument pdf = Loader.loadPDF(file)) {
try (RandomAccessReadBufferedFile randomAccessRead = new RandomAccessReadBufferedFile(file);
PDDocument pdf = Loader.loadPDF(randomAccessRead)) {
PDDocumentInformation info = pdf.getDocumentInformation();
if (info != null) {

View File

@ -6,6 +6,8 @@ import com.adityachandel.booklore.model.enums.BookFileType;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
@ -63,10 +65,12 @@ public class PdfMetadataWriter implements MetadataWriter {
log.warn("Could not create PDF temp backup for {}: {}", file.getName(), e.getMessage());
}
try (PDDocument pdf = Loader.loadPDF(file)) {
try (RandomAccessReadBufferedFile randomAccessRead = new RandomAccessReadBufferedFile(file);
PDDocument pdf = Loader.loadPDF(randomAccessRead, IOUtils.createMemoryOnlyStreamCache())) {
pdf.setAllSecurityToBeRemoved(true);
applyMetadataToDocument(pdf, metadataEntity, clear);
tempFile = File.createTempFile("pdfmeta-", ".pdf");
// PDFBox 3.x saves in compressed mode by default
pdf.save(tempFile);
Files.move(tempFile.toPath(), filePath, StandardCopyOption.REPLACE_EXISTING);
log.info("Successfully embedded metadata into PDF: {}", file.getName());

View File

@ -9,6 +9,7 @@ import com.adityachandel.booklore.util.FileUtils;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
@ -91,7 +92,8 @@ public class PdfReaderService {
if (!Files.isReadable(pdfPath)) {
throw new FileNotFoundException("PDF file is not readable: " + pdfPath);
}
try (PDDocument document = Loader.loadPDF(new File(pdfPath.toFile().toURI()))) {
try (RandomAccessReadBufferedFile randomAccessRead = new RandomAccessReadBufferedFile(pdfPath.toFile());
PDDocument document = Loader.loadPDF(randomAccessRead)) {
PDFRenderer renderer = new PDFRenderer(document);
for (int i = 0; i < document.getNumberOfPages(); i++) {
BufferedImage image = null;