Reference Guide
Programming / OCR / Creating a searchable PDF document from an existing scanned PDF document
In This Topic
    Creating a searchable PDF document from an existing scanned PDF document
    In This Topic

    Starting GdPicture V10, you do not need to render pages in your PDF documents to images, running the OCR functions on them, then create a new PDF documents with a text data. Now you can just simply process OCR on any PDF document and the data will be added as invisible text on the page.

    This example shows you how it works.

    This example makes use of the optional GdPicture.NET Managed PDF Plugin.
    Copy Code
    'We assume that GdPicture has been correctly installed and unlocked.
    Dim oGdPicturePDF As New GdPicturePDF()
    'Loading an input document.
    Dim status As GdPictureStatus = oGdPicturePDF.LoadFromFile("input.pdf", False)
    'Checking, if loading has been successful.
    If status = GdPictureStatus.OK Then
        Dim pageCount As Integer = oGdPicturePDF.GetPageCount()
        'Loop through pages.
        For i As Integer = 1 To pageCount
            'Selecting a page.
            oGdPicturePDF.SelectPage(i)
            If oGdPicturePDF.OcrPage("eng", "C:\GdPicture.NET 14\Redist\OCR", "", 200) <> GdPictureStatus.OK Then
                MessageBox.Show("Error occurred on the page " + i.ToString() + ". Error: " + oGdPicturePDF.GetStat().ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
            End If
        Next
        'Saving to a different file.
        status = oGdPicturePDF.SaveToFile("output.pdf", True)
        If status = GdPictureStatus.OK Then
            MessageBox.Show("Done!", "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
        Else
            MessageBox.Show("The document can't be saved." + status.ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
        End If
        'Closing and releasing resources.
        oGdPicturePDF.CloseDocument()
    Else
        MessageBox.Show("The document can't be opened." + status.ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
    End If
    oGdPicturePDF.Dispose()
    Copy Code
    //We assume that GdPicture has been correctly installed and unlocked.
    GdPicturePDF oGdPicturePDF = new GdPicturePDF();
    //Loading an input document.
    GdPictureStatus status = oGdPicturePDF.LoadFromFile("input.pdf", false);
    //Checking if loading has been successful.
    if (status == GdPictureStatus.OK)
    {
        int pageCount = oGdPicturePDF.GetPageCount();
        //Loop through pages.
        for (int i = 1; i <= pageCount; i++)
        {
            //Selecting a page.
            oGdPicturePDF.SelectPage(i);
            if (oGdPicturePDF.OcrPage("eng", "C:\\GdPicture.NET 14\\Redist\\OCR", "", 200) != GdPictureStatus.OK)
            {
                MessageBox.Show("Error occurred on the page " + i.ToString() + ". Error: " + oGdPicturePDF.GetStat().ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }
        }
        //Saving to a different file.
        status = oGdPicturePDF.SaveToFile("output.pdf", true);
        if (status == GdPictureStatus.OK)
            MessageBox.Show("Done!", "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
        else
            MessageBox.Show("The document can't be saved." + status.ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
        //Closing and releasing resources.
        oGdPicturePDF.CloseDocument();
    }
    else
    {
        MessageBox.Show("The document can't be opened." + status.ToString(), "OCR Pages Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
    }
    oGdPicturePDF.Dispose();