In This Topic
Programming / OCR / Creating a searchable PDF document from an existing scanned PDF document

Creating a searchable PDF document from an existing scanned PDF document

In This Topic

Starting GdPicture V10, you do not need to render pages in your PDF documents to images, running the OCR functions on them, then create a new PDF documents with a text data. Now you can just simply process OCR on any PDF document and the data will be added as invisible text on the page.

This example shows you how it works.

Copy Code
'We assume that GdPicture has been correctly installed and unlocked.
Dim oGdPicturePDF As New GdPicturePDF()
'Loading an input document.
Dim status As GdPictureStatus = oGdPicturePDF.LoadFromFile("input.pdf", False)
'Checking, if loading has been successful.
If status = GdPictureStatus.OK Then
    Dim pageCount As Integer = oGdPicturePDF.GetPageCount()
    'Loop through pages.
    For i As Integer = 1 To pageCount
        'Selecting a page.
        oGdPicturePDF.SelectPage(i)
        If oGdPicturePDF.OcrPage("eng", "C:\GdPicture.NET 14\Redist\OCR", "", 200) <> GdPictureStatus.OK Then
            MessageBox.Show("Error occurred on the page " + i.ToString() + ". Error: " + oGdPicturePDF.GetStat().ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
        End If
    Next
    'Saving to a different file.
    status = oGdPicturePDF.SaveToFile("output.pdf", True)
    If status = GdPictureStatus.OK Then
        MessageBox.Show("Done!", "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
    Else
        MessageBox.Show("The document can't be saved." + status.ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
    End If
    'Closing and releasing resources.
    oGdPicturePDF.CloseDocument()
Else
    MessageBox.Show("The document can't be opened." + status.ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
End If
oGdPicturePDF.Dispose()
Copy Code
//We assume that GdPicture has been correctly installed and unlocked.
GdPicturePDF oGdPicturePDF = new GdPicturePDF();
//Loading an input document.
GdPictureStatus status = oGdPicturePDF.LoadFromFile("input.pdf", false);
//Checking if loading has been successful.
if (status == GdPictureStatus.OK)
{
    int pageCount = oGdPicturePDF.GetPageCount();
    //Loop through pages.
    for (int i = 1; i <= pageCount; i++)
    {
        //Selecting a page.
        oGdPicturePDF.SelectPage(i);
        if (oGdPicturePDF.OcrPage("eng", "C:\\GdPicture.NET 14\\Redist\\OCR", "", 200) != GdPictureStatus.OK)
        {
            MessageBox.Show("Error occurred on the page " + i.ToString() + ". Error: " + oGdPicturePDF.GetStat().ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
        }
    }
    //Saving to a different file.
    status = oGdPicturePDF.SaveToFile("output.pdf", true);
    if (status == GdPictureStatus.OK)
        MessageBox.Show("Done!", "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
    else
        MessageBox.Show("The document can't be saved." + status.ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
    //Closing and releasing resources.
    oGdPicturePDF.CloseDocument();
}
else
{
    MessageBox.Show("The document can't be opened." + status.ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
oGdPicturePDF.Dispose();