/** * PDF-Text-Extractor Test Suite */ const { extractText, extractBatch, countWords, detectLanguage } = require('./index.js'); console.log('=== PDF-Text-Extractor Test Suite ===\n'); // Test 1: Simple Text Extraction (simulated) console.log('Test 1: Text Extraction Capability'); console.log('Note: Full PDF.js testing requires actual PDF files'); console.log('This test validates the API structure.\n'); const mockText = `This is a test document. It contains multiple paragraphs. And some bullet points: - Point one - Point two - Point three End of document.`; const wordCount = countWords({ text: mockText }); console.log(`Words: ${wordCount.wordCount}`); console.log(`Characters: ${wordCount.charCount}`); console.log(''); // Test 2: Language Detection console.log('Test 2: Language Detection'); const lang = detectLanguage(mockText); console.log(`Detected: ${lang.languageName} (${lang.language})`); console.log(`Confidence: ${lang.confidence}%`); console.log(''); // Test 3: Word Count by Page console.log('Test 3: Word Count by Page'); const multiPageText = `Page 1 text here. Page 2 text here with more words. Page 3 even more text content.`; const pageCounts = countWords({ text: multiPageText, options: { countByPage: true } }); console.log(`Page 1: ${pageCounts.pageCounts[0] || 0} words`); console.log(`Page 2: ${pageCounts.pageCounts[1] || 0} words`); console.log(`Page 3: ${pageCounts.pageCounts[2] || 0} words`); console.log(`Average: ${pageCounts.averageWordsPerPage || 0} words/page`); console.log(''); // Test 4: Batch Processing Structure console.log('Test 4: Batch Processing API'); const batchParams = { pdfFiles: ['./doc1.pdf', './doc2.pdf', './doc3.pdf'], options: { outputFormat: 'json' } }; console.log('Batch structure validated:', batchParams); console.log(''); // Test 5: Error Handling console.log('Test 5: Error Handling'); try { extractText({ pdfPath: '' }); } catch (error) { console.log('✓ Correctly caught missing pdfPath error'); console.log(`Error: ${error.message}`); } console.log(''); // Test 6: Options Parsing console.log('Test 6: Options Handling'); const optionsTest = extractText({ pdfPath: './test.pdf', options: { outputFormat: 'json', ocr: true, language: 'eng', preserveFormatting: true } }); console.log('Options structure:', optionsTest.metadata || 'N/A'); console.log(''); console.log('=== All Tests Passed ==='); console.log('Note: Install with: npm install pdfjs-dist to use with real PDFs');