In This Topic
Programming / OCR / How to OCR a single page or a multipage TIFF file

How to OCR a single page or a multipage TIFF file

In This Topic

OCRing multipage tiff images is as easy as looping through the image pages and OCRing each one. The resulting text is subsequently stored in a text file.
Here is how to do it.

Copy Code
'We assume that GdPicture has been correctly installed and unlocked.
Dim oGdPictureImaging As GdPictureImaging = New GdPictureImaging()
Dim pageCount As Integer = 1
Dim imageID As Integer = oGdPictureImaging.CreateGdPictureImageFromFile("")
If oGdPictureImaging.GetStat() = GdPictureStatus.OK Then
    'Setting up a correct number of pages for multi-page input file.
    If oGdPictureImaging.TiffIsMultiPage(imageID) Then
        pageCount = oGdPictureImaging.TiffGetPageCount(imageID)
    End If
    'Setting up the OCR engine.
    Dim oGdPictureOCR As GdPictureOCR = New GdPictureOCR()
    oGdPictureOCR.ResourceFolder = "C:\GdPicture.NET 14\Redist\OCR"
    oGdPictureOCR.CharacterSet = ""
    oGdPictureOCR.AddLanguage(OCRLanguage.English)
    Dim resID As String = "page"
    Dim content As String = Nothing
    'Creating a text file to store the resulting text.
    Dim stream As System.IO.StreamWriter = New System.IO.StreamWriter("output.txt")
    'Looping through multi-page image.
    For i As Integer = 1 To pageCount
        'Selecting a page.
        oGdPictureImaging.TiffSelectPage(imageID, i)
        'Setting up the image.
        If oGdPictureOCR.SetImage(imageID) = GdPictureStatus.OK Then
            'Runnig the OCR on the current page.
            oGdPictureOCR.RunOCR(resID)
            If oGdPictureOCR.GetStat() = GdPictureStatus.OK Then
                'Saving the page content as a text.
                content = oGdPictureOCR.GetOCRResultText(resID)
                If oGdPictureOCR.GetStat() = GdPictureStatus.OK Then
                    stream.WriteLine("Text on the page nr." + i.ToString() + ":" + vbCrLf + "---------------------------------" + vbCrLf + content)
                Else
                    stream.WriteLine("Error occurred on the page nr." + i.ToString() + ": " + oGdPictureOCR.GetStat().ToString())
               End If
            Else
                stream.WriteLine("Error occurred on the page nr." + i.ToString() + ": " + oGdPictureOCR.GetStat().ToString())
            End If
        Else
            MessageBox.Show("The image can't be set. Error: " + oGdPictureOCR.GetStat().ToString(), "OCR TIFF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
        End If
        'Releasing the previous result in order to reuse the result identifier.
        oGdPictureOCR.ReleaseOCRResult(resID)
    Next
    stream.Close()
    oGdPictureImaging.ReleaseGdPictureImage(imageID)
    MessageBox.Show("Done!", "OCR TIFF Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
    oGdPictureOCR.Dispose()
Else
    MessageBox.Show("The file can't be opened. Error: " + oGdPictureImaging.GetStat().ToString(), "OCR TIFF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
End If
oGdPictureImaging.Dispose()
Copy Code
//We assume that GdPicture has been correctly installed and unlocked.
GdPictureImaging oGdPictureImaging = new GdPictureImaging();
int pageCount = 1;
int imageID = oGdPictureImaging.CreateGdPictureImageFromFile("");
if (oGdPictureImaging.GetStat() == GdPictureStatus.OK)
{
    //Setting up a correct number of pages for multi-page input file.
    if (oGdPictureImaging.TiffIsMultiPage(imageID))
    {
        pageCount = oGdPictureImaging.TiffGetPageCount(imageID);
    }
    //Setting up the OCR engine.
    GdPictureOCR oGdPictureOCR = new GdPictureOCR();
    oGdPictureOCR.ResourceFolder = "C:\\GdPicture.NET 14\\Redist\\OCR";
    oGdPictureOCR.CharacterSet = "";
    oGdPictureOCR.AddLanguage(OCRLanguage.English);
    string resID = "page";
    string content = null;
    //Creating a text file to store the resulting text.
    System.IO.StreamWriter stream = new System.IO.StreamWriter("output.txt");
    //Looping through multi-page image.
    for (int i = 1; i <= pageCount; i++)
    {
        //Selecting a page.
        oGdPictureImaging.TiffSelectPage(imageID, i);
        //Setting up the image.
        if (oGdPictureOCR.SetImage(imageID) == GdPictureStatus.OK)
        {
            //Runnig the OCR on the current page.
            oGdPictureOCR.RunOCR(resID);
            if (oGdPictureOCR.GetStat() == GdPictureStatus.OK)
            {
                //Saving the page content as a text.
                content = oGdPictureOCR.GetOCRResultText(resID);
                if (oGdPictureOCR.GetStat() == GdPictureStatus.OK)
                    stream.WriteLine("Text on the page nr." + i.ToString() + ":\n---------------------------------\n" + content);
                else
                    stream.WriteLine("Error occurred on the page nr." + i.ToString() + ": " + oGdPictureOCR.GetStat().ToString());
            }
            else
            {
                stream.WriteLine("Error occurred on the page nr." + i.ToString() + ": " + oGdPictureOCR.GetStat().ToString());
            }
        }
        else
        {
            MessageBox.Show("The image can't be set. Error: " + oGdPictureOCR.GetStat().ToString(), "OCR TIFF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
        }                       
        //Releasing the previous result in order to reuse the result identifier.
        oGdPictureOCR.ReleaseOCRResult(resID);
    }
    stream.Close();
    oGdPictureImaging.ReleaseGdPictureImage(imageID);
    MessageBox.Show("Done!", "OCR TIFF Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
    oGdPictureOCR.Dispose();
}
else
{
    MessageBox.Show("The file can't be opened. Error: " + oGdPictureImaging.GetStat().ToString(), "OCR TIFF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
oGdPictureImaging.Dispose();