Reference Guide
Programming / OCR / How to OCR a specific zone of a PDF document
In This Topic
    How to OCR a specific zone of a PDF document
    In This Topic

    If you know the position of the text within your PDF document or you only want to OCR a certain part of the page and ignore the rest, all you have to do is convert the page to an image, set the ROI to the area you desire, and run the OCR engine.

    This example makes use of the optional GdPicture.NET Managed PDF Plugin.
    Copy Code
    'We assume GdPicture has been correctly installed and unlocked.
    Dim oGdPictureOCR As GdPictureOCR = New GdPictureOCR()
    Dim oGdPicturePDF As GdPicturePDF = New GdPicturePDF()
    'Loading the PDF document.
    If oGdPicturePDF.LoadFromFile("input.pdf", False) = GdPictureStatus.OK Then
        'Selecting the first page.
        oGdPicturePDF.SelectPage(1)
        Dim rasterPageID As Integer = oGdPicturePDF.RenderPageToGdPictureImage(200, True)
        'Setting up the image.
        If (oGdPicturePDF.GetStat() = GdPictureStatus.OK) AndAlso
           (oGdPictureOCR.SetImage(rasterPageID) = GdPictureStatus.OK) Then
            Dim results As List(Of String) = New List(Of String)()
            'Setting up the OCR parameters.
            oGdPictureOCR.ResourceFolder = "C:\GdPicture.NET 14\Redist\OCR"
            oGdPictureOCR.AddLanguage(OCRLanguage.English)
            'Setting up the OCR context and the character list.
            oGdPictureOCR.Context = OCRContext.OCRContextSingleLine
            oGdPictureOCR.CharacterSet = "0123456789"
            'Setting the area to be processed by the OCR.
            oGdPictureOCR.SetROI(100, 100, 200, 50)
            'Running the OCR process to recognize the phone number.
            oGdPictureOCR.RunOCR("PhoneNumber")
            If oGdPictureOCR.GetStat() = GdPictureStatus.OK Then
                results.Add("PhoneNumber")
            Else
                MessageBox.Show("Error occurred when performing the first OCR. Status: " + oGdPictureOCR.GetStat().ToString(), "OCR zone Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
            End If
            'Setting up the OCR context and the character list.
            oGdPictureOCR.Context = OCRContext.OCRContextSingleBlock
            oGdPictureOCR.CharacterSet = ""
            'Setting the area to be processed by the OCR.
            oGdPictureOCR.SetROI(100, 200, 200, 200)
            'Running the OCR process to recognize the address.
            oGdPictureOCR.RunOCR("Address")
            If oGdPictureOCR.GetStat() = GdPictureStatus.OK Then
                results.Add("Address")
            Else
                MessageBox.Show("Error occurred when performing the second OCR. Status: " + oGdPictureOCR.GetStat().ToString(), "OCR zone Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
            End If
            If results.Count > 0 Then
                'Saving both results to a text file.
                If oGdPictureOCR.SaveAsText(results, "OCR.txt", OCROutputTextFormat.Utf16, True) = GdPictureStatus.OK Then
                    MessageBox.Show("Done!", "OCR zone Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
                End If
            End If
            'Releasing the image.
            GdPictureDocumentUtilities.DisposeImage(rasterPageID)
        Else
            MessageBox.Show("Error occurred when setting up the page/image. Status: " + oGdPicturePDF.GetStat().ToString() + " or " + oGdPictureOCR.GetStat().ToString(), "OCR zone Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
        End If
        'Closing the document.
        oGdPicturePDF.CloseDocument()
    Else
        MessageBox.Show("The file can't be loaded. Status: " + oGdPicturePDF.GetStat().ToString(), "OCR zone Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
    End If
    oGdPictureOCR.Dispose()
    oGdPicturePDF.Dispose()
    Copy Code
    //We assume GdPicture has been correctly installed and unlocked.
    GdPictureOCR oGdPictureOCR = new GdPictureOCR();
    GdPicturePDF oGdPicturePDF = new GdPicturePDF();
    //Loading the PDF document.
    if (oGdPicturePDF.LoadFromFile("input.pdf", false) == GdPictureStatus.OK)
    {
        //Selecting the first page.
        oGdPicturePDF.SelectPage(1);
        //Rendering the page to a 200 DPI image.
        int rasterPageID = oGdPicturePDF.RenderPageToGdPictureImage(200, true);
        if ((oGdPicturePDF.GetStat() == GdPictureStatus.OK) &&
            //Setting up the image.
            (oGdPictureOCR.SetImage(rasterPageID) == GdPictureStatus.OK))
        {
            List<string> results = new List<string>();
            //Setting up the OCR parameters.
            oGdPictureOCR.ResourceFolder = "C:\\GdPicture.NET 14\\Redist\\OCR";
            oGdPictureOCR.AddLanguage(OCRLanguage.English);
            //Setting up the OCR context and the character list.
            oGdPictureOCR.Context = OCRContext.OCRContextSingleLine;
            oGdPictureOCR.CharacterSet = "0123456789";
            //Setting the area to be processed by the OCR.
            oGdPictureOCR.SetROI(100, 100, 200, 50);
            //Running the OCR process to recognize the phone number.
            oGdPictureOCR.RunOCR("PhoneNumber");
            if (oGdPictureOCR.GetStat() == GdPictureStatus.OK)
                results.Add("PhoneNumber");
            else
                MessageBox.Show("Error occurred when performing the first OCR. Status: " + oGdPictureOCR.GetStat().ToString(), "OCR zone Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
            //Setting up the OCR context and the character list.
            oGdPictureOCR.Context = OCRContext.OCRContextSingleBlock;
            oGdPictureOCR.CharacterSet = "";
            //Setting the area to be processed by the OCR.
            oGdPictureOCR.SetROI(300, 100, 200, 200);
            //Running the OCR process to recognize the address.
            oGdPictureOCR.RunOCR("Address");
            if (oGdPictureOCR.GetStat() == GdPictureStatus.OK)
                results.Add("Address");
            else
                MessageBox.Show("Error occurred when performing the second OCR. Status: " + oGdPictureOCR.GetStat().ToString(), "OCR zone Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
            if (results.Count > 0)
            {
                //Saving both results to a text file.
                if (oGdPictureOCR.SaveAsText(results, "OCR.txt", OCROutputTextFormat.Utf16, true) == GdPictureStatus.OK)
                    MessageBox.Show("Done!", "OCR zone Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
            }
            //Releasing the image.
            GdPictureDocumentUtilities.DisposeImage(rasterPageID);
        }
        else
        {
            MessageBox.Show("Error occurred when setting up the page/image. Status: " + oGdPicturePDF.GetStat().ToString() + " or " + oGdPictureOCR.GetStat().ToString(), "OCR zone Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
        }
        //Closing the document.
        oGdPicturePDF.CloseDocument();
    }
    else
    {
        MessageBox.Show("The file can't be loaded. Status: " + oGdPicturePDF.GetStat().ToString(), "OCR zone Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
    }
    //Clearing resources.
    oGdPictureOCR.Dispose();
    oGdPicturePDF.Dispose();