Iterate Pages
This guide covers different patterns for iterating through document pages.
Generator Pattern
Section titled “Generator Pattern”The pages() method returns a generator:
for (const page of document.pages()) { using p = page; console.log(`Page ${p.index}: ${p.width} × ${p.height}`);}Index Pattern
Section titled “Index Pattern”Access pages by index:
for (let i = 0; i < document.pageCount; i++) { using page = document.getPage(i); console.log(`Page ${i + 1}: ${page.getText().slice(0, 100)}`);}Process Specific Pages
Section titled “Process Specific Pages”Page Range
Section titled “Page Range”function processPageRange( document: PDFiumDocument, start: number, end: number) { for (let i = start; i <= end && i < document.pageCount; i++) { using page = document.getPage(i); // Process page... }}
// Process pages 5-10 (indices 4-9)processPageRange(document, 4, 9);Selected Pages
Section titled “Selected Pages”function processSelectedPages( document: PDFiumDocument, indices: number[]) { for (const i of indices) { if (i >= 0 && i < document.pageCount) { using page = document.getPage(i); // Process page... } }}
// Process pages 1, 5, 10 (indices 0, 4, 9)processSelectedPages(document, [0, 4, 9]);Every Nth Page
Section titled “Every Nth Page”function processEveryNthPage( document: PDFiumDocument, n: number) { for (let i = 0; i < document.pageCount; i += n) { using page = document.getPage(i); // Process page... }}
// Process every 5th pageprocessEveryNthPage(document, 5);Odd/Even Pages
Section titled “Odd/Even Pages”function processOddPages(document: PDFiumDocument) { for (let i = 0; i < document.pageCount; i += 2) { using page = document.getPage(i); // Process odd page (1, 3, 5, ...) }}
function processEvenPages(document: PDFiumDocument) { for (let i = 1; i < document.pageCount; i += 2) { using page = document.getPage(i); // Process even page (2, 4, 6, ...) }}Collecting Results
Section titled “Collecting Results”Array of Results
Section titled “Array of Results”function extractAllText(document: PDFiumDocument): string[] { const texts: string[] = [];
for (const page of document.pages()) { using p = page; texts.push(p.getText()); }
return texts;}Map of Results
Section titled “Map of Results”interface PageResult { text: string; width: number; height: number;}
function processAllPages( document: PDFiumDocument): Map<number, PageResult> { const results = new Map<number, PageResult>();
for (const page of document.pages()) { using p = page; results.set(p.index, { text: p.getText(), width: p.width, height: p.height, }); }
return results;}Early Exit
Section titled “Early Exit”Find First Match
Section titled “Find First Match”function findPageWithText( document: PDFiumDocument, searchText: string): number | null { for (const page of document.pages()) { using p = page;
for (const _ of p.findText(searchText)) { return p.index; // Found, return immediately } }
return null; // Not found}Process Until Condition
Section titled “Process Until Condition”function processUntilError(document: PDFiumDocument): number { let processed = 0;
for (const page of document.pages()) { using p = page;
try { const text = p.getText(); if (text.includes('STOP')) { break; } processed++; } catch { break; } }
return processed;}Async Processing
Section titled “Async Processing”Sequential with Async Operations
Section titled “Sequential with Async Operations”async function renderAllPages( document: PDFiumDocument, outputDir: string) { for (const page of document.pages()) { using p = page;
const { data, width, height } = p.render({ scale: 2 });
// Async save operation await saveImage(data, width, height, `${outputDir}/page-${p.index}.png`); }}Limited Concurrency
Section titled “Limited Concurrency”async function processWithConcurrency( document: PDFiumDocument, concurrency: number) { const results: string[] = []; const indices = Array.from({ length: document.pageCount }, (_, i) => i);
// Process in batches for (let i = 0; i < indices.length; i += concurrency) { const batch = indices.slice(i, i + concurrency);
const batchResults = await Promise.all( batch.map(async (index) => { using page = document.getPage(index); return page.getText(); }) );
results.push(...batchResults); }
return results;}Progress Tracking
Section titled “Progress Tracking”interface Progress { current: number; total: number; percentage: number;}
async function processWithProgress( document: PDFiumDocument, onProgress: (progress: Progress) => void) { const total = document.pageCount;
for (const page of document.pages()) { using p = page;
// Process page... const text = p.getText();
onProgress({ current: p.index + 1, total, percentage: Math.round(((p.index + 1) / total) * 100), }); }}
// Usageawait processWithProgress(document, (progress) => { console.log(`Processing: ${progress.percentage}%`);});Reverse Order
Section titled “Reverse Order”function processInReverse(document: PDFiumDocument) { for (let i = document.pageCount - 1; i >= 0; i--) { using page = document.getPage(i); // Process page... }}Complete Example
Section titled “Complete Example”import { PDFium } from '@scaryterry/pdfium';import { promises as fs } from 'fs';import sharp from 'sharp';
async function processPDF(inputPath: string, outputDir: string) { const data = await fs.readFile(inputPath); await fs.mkdir(outputDir, { recursive: true });
using pdfium = await PDFium.init(); using document = await pdfium.openDocument(data);
console.log(`Processing ${document.pageCount} pages...`);
const results: Array<{ index: number; chars: number }> = [];
for (const page of document.pages()) { using p = page;
// Extract text const text = p.getText(); results.push({ index: p.index, chars: text.length });
// Render to image const { data: pixels, width, height } = p.render({ scale: 2 });
const png = await sharp(pixels, { raw: { width, height, channels: 4 }, }).png().toBuffer();
await fs.writeFile(`${outputDir}/page-${p.index + 1}.png`, png);
console.log(`Page ${p.index + 1}: ${text.length} chars`); }
// Summary const totalChars = results.reduce((sum, r) => sum + r.chars, 0); console.log(`\nTotal characters: ${totalChars}`);}
processPDF('document.pdf', './output');See Also
Section titled “See Also”- PDFiumDocument — pages() method
- Resource Management — Disposal patterns
- Performance — Optimisation tips