Skip to content

Performance

This guide covers performance considerations and optimisation strategies for PDF processing.

In Node.js, the native backend provides faster performance:

OperationWASMNativeImprovement
Document load2.5ms1.8ms~1.4x
Page render (1x)15ms12ms~1.25x
Text extraction0.8ms0.5ms~1.6x
Character operations0.3ms0.15ms~2x
// Use native backend for better performance
const pdfium = await PDFium.init({ useNative: true });
ScenarioRecommendation
Browser appWASM only
Node.js, high throughputNative
Node.js, need forms/creationWASM
Cross-platform scriptsWASM (portable)

See Native vs WASM Backends for details.

Higher scale = more pixels = more time and memory:

ScalePixels (US Letter)Relative Time
0.5306 × 3960.25x
1612 × 7921x
21224 × 15844x
31836 × 23769x
// Use appropriate scale for use case
const thumbnail = page.render({ scale: 0.3 }); // Quick preview
const display = page.render({ scale: 1.5 }); // Screen display
const print = page.render({ scale: 3 }); // High quality

When you need exact dimensions, use width/height instead of scale:

// Instead of calculating scale...
const scale = targetWidth / page.width;
const result = page.render({ scale });
// Use direct dimensions
const result = page.render({ width: 800, height: 1000 });

Don’t load all pages at once:

// BAD: All pages in memory
const pages = [];
for (let i = 0; i < document.pageCount; i++) {
pages.push(document.getPage(i));
}
// GOOD: Process one at a time
for (const page of document.pages()) {
using p = page;
await processPage(p);
}
// BAD: New instance per document
for (const file of files) {
using pdfium = await PDFium.init();
using doc = await pdfium.openDocument(file);
// ...
}
// GOOD: Reuse instance
using pdfium = await PDFium.init();
for (const file of files) {
using doc = await pdfium.openDocument(file);
// ...
}
async function processBatch(
pdfium: PDFium,
files: Uint8Array[],
concurrency = 4
) {
const results: string[][] = [];
const queue = [...files];
async function processNext(): Promise<void> {
const data = queue.shift();
if (!data) return;
using document = await pdfium.openDocument(data);
const texts: string[] = [];
for (const page of document.pages()) {
using p = page;
texts.push(p.getText());
}
results.push(texts);
await processNext();
}
// Start concurrent workers
await Promise.all(
Array.from({ length: concurrency }, processNext)
);
return results;
}
interface Progress {
current: number;
total: number;
percentage: number;
}
async function processWithProgress(
documents: Uint8Array[],
onProgress: (progress: Progress) => void
) {
using pdfium = await PDFium.init();
const total = documents.length;
for (let i = 0; i < total; i++) {
using document = await pdfium.openDocument(documents[i]);
// Process...
onProgress({
current: i + 1,
total,
percentage: Math.round(((i + 1) / total) * 100),
});
}
}
class PDFTextCache {
private cache = new Map<string, string>();
getText(page: PDFiumPage, cacheKey: string): string {
if (this.cache.has(cacheKey)) {
return this.cache.get(cacheKey)!;
}
const text = page.getText();
this.cache.set(cacheKey, text);
return text;
}
clear() {
this.cache.clear();
}
}

Use maxTextCharCount for very large documents:

const pdfium = await PDFium.init({
limits: {
maxTextCharCount: 100_000, // Stop after 100K chars
},
});

When processing many documents in a pipeline, set limits once with configure() instead of passing them to every PDFium.init() call:

import { configure } from '@scaryterry/pdfium';
configure({
limits: {
maxTextCharCount: 100_000,
maxDocumentSize: 50 * 1024 * 1024,
},
});
// All subsequent PDFium.init() calls inherit these limits
using pdfium = await PDFium.init();

See the Security Guide for more details on global configuration.

// Find first occurrence only
function findFirst(document: PDFiumDocument, query: string) {
for (const page of document.pages()) {
using p = page;
for (const result of p.findText(query)) {
return { pageIndex: p.index, result };
}
}
return null;
}
function findLimited(
document: PDFiumDocument,
query: string,
maxResults = 100
) {
const results: { pageIndex: number; charIndex: number }[] = [];
for (const page of document.pages()) {
using p = page;
for (const result of p.findText(query)) {
results.push({
pageIndex: p.index,
charIndex: result.charIndex,
});
if (results.length >= maxResults) {
return results;
}
}
}
return results;
}

Move PDF processing off the main thread:

await using pdfium = await PDFium.init({
useWorker: true,
workerUrl,
wasmBinary,
});
await using document = await pdfium.openDocument(pdfData);
const result = await document.renderPage(0, { scale: 2 });

Render visible pages first:

async function renderVisiblePages(
document: PDFiumDocument,
visibleIndices: number[],
allIndices: number[]
) {
// Render visible pages first
for (const i of visibleIndices) {
using page = document.getPage(i);
const result = page.render({ scale: 2 });
displayPage(i, result);
}
// Then render remaining
for (const i of allIndices) {
if (!visibleIndices.includes(i)) {
using page = document.getPage(i);
const result = page.render({ scale: 2 });
cachePage(i, result);
}
}
}
class LazyPageRenderer {
private rendered = new Map<number, RenderResult>();
constructor(
private document: PDFiumDocument,
private scale: number
) {}
async getPage(index: number): Promise<RenderResult> {
if (this.rendered.has(index)) {
return this.rendered.get(index)!;
}
using page = this.document.getPage(index);
const result = page.render({ scale: this.scale });
this.rendered.set(index, result);
return result;
}
evict(index: number) {
this.rendered.delete(index);
}
evictOldest(keepCount: number) {
const indices = [...this.rendered.keys()];
while (this.rendered.size > keepCount) {
const oldest = indices.shift()!;
this.rendered.delete(oldest);
}
}
}
// Process and discard immediately
async function lowMemoryProcess(document: PDFiumDocument) {
const results: string[] = [];
for (const page of document.pages()) {
using p = page;
results.push(p.getText());
// Page memory freed after each iteration
}
return results;
}
// Pre-load for faster access
async function highSpeedProcess(document: PDFiumDocument) {
const pages: PDFiumPage[] = [];
// Load all pages first
for (let i = 0; i < document.pageCount; i++) {
pages.push(document.getPage(i));
}
try {
// Fast random access
const results = await Promise.all(
pages.map(async (page, i) => ({
index: i,
text: page.getText(),
}))
);
return results;
} finally {
// Cleanup all
for (const page of pages) {
page.dispose();
}
}
}
function measureOperation<T>(name: string, fn: () => T): T {
const start = performance.now();
const result = fn();
const end = performance.now();
console.log(`${name}: ${(end - start).toFixed(2)}ms`);
return result;
}
const text = measureOperation('getText', () => page.getText());
const result = measureOperation('render', () => page.render({ scale: 2 }));
function logMemory(label: string) {
if (typeof process !== 'undefined') {
const usage = process.memoryUsage();
console.log(`${label}: ${(usage.heapUsed / 1024 / 1024).toFixed(2)}MB`);
}
}
logMemory('Before load');
using document = await pdfium.openDocument(data);
logMemory('After load');
ScenarioRecommendation
ThumbnailsScale 0.3-0.5
Screen displayScale 1-2
Print qualityScale 3-4
Many documentsReuse PDFium instance
Large documentsProcess pages sequentially
Browser UIUse Web Workers
Memory constrainedLower limits, sequential processing