Read text from PDF, PDFBox

It's a simple theme, but I had to Tinker. So post the code that works for me .

Tagged:

Answers

  • /*
    It code displayed the txt content of the PDF file successfully.
    manual here:
    https://forum.processing.org/one/topic/view-a-pdf.html
    https://pdfbox.apache.org/
    work for pdfbox 2.0.8
    */
    import java.io.File;
    import java.io.IOException;
    import org.apache.pdfbox.pdmodel.PDDocument;
    import org.apache.pdfbox.text.PDFTextStripper;
    String file_Name = "example_pdf";
    String text = "haven't read anything.";

    void setup() {   
      size(100, 100);   
      background(#F0F8FF);   
      PDDocument document = null;   
    try   
      {      
      //Loading an existing document
      File file = new File("c:/" + file_Name + ".pdf");
      document = PDDocument.load(file) ;    
      //Instantiate PDFTextStripper class
      PDFTextStripper pdfStripper = new PDFTextStripper() ;
      //Retrieving text from PDF document
      text = pdfStripper.getText(document) ;     
      println(text);
      println("Conversion done");
      }
    catch (Exception e)
      {
        e.printStackTrace();
      }
      finally
      {
        if (document != null)
        {   
          try { document.close(); }
          catch (IOException e)  {   
                     println("Problem when closing doc: " + e.getMessage()); 
                    }
        }     
      }    
    

    }

Sign In or Register to comment.