remove page numbers headers and footers pdf code example

Example: remove page numbers headers and footers pdf

@Nonnull
public String extract(@Nonnull byte[] bytes) throws Exception {
    //open file
    Document pdfDocument;
    String originalText;
    try (InputStream fileInputStream = new ByteArrayInputStream(bytes)) {
        PdfContentEditor pce = new PdfContentEditor();
        pce.bindPdf(fileInputStream);
        pce.deleteStampByIds(new int[] {100, 101});  //delete headers and footers
        try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) {
            pce.save(bos);
            try (ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray())) {
                pdfDocument = new Document(bis);
            }
        }
       
        // pdfDocument = new Document(fileInputStream);
    }

    com.aspose.pdf.TextAbsorber textAbsorber = new com.aspose.pdf.TextAbsorber();

    // Accept the absorber for all the pages
    pdfDocument.getPages().accept(textAbsorber);

    // Get the extracted text
    originalText = textAbsorber.getText();

    // cleanup from BOM symbols
    StringUtilities strUtils = new StringUtilities();
    originalText = strUtils.removeAllUTF8BOM(originalText);

    originalText = new PdfTextNormalizer().normalizePdfText(originalText);
    return originalText;
}

Tags:

Misc Example