In This Topic
Programming / OCR / Creating a searchable PDF (PDF/A) document from an image file (both single and multi-page TIFF image)

Creating a searchable PDF (PDF/A) document from an image file (both single and multi-page TIFF image)

In This Topic

This is how to easily convert an image file to a searchable PDF (PDF/A) document. We'll see two different scenarios, one for a multipage file and another one for a single page file.

 If your input file is a multipage TIFF file
Copy Code
'We assume GdPicture has been correctly installed and unlocked.
Dim oGdPictureImaging As GdPictureImaging = New GdPictureImaging()
'Selecting an image to process.
Dim imageID As Integer = oGdPictureImaging.TiffCreateMultiPageFromFile("")
If oGdPictureImaging.GetStat() = GdPictureStatus.OK Then
    'Retrieving the number of pages.
    Dim pageCount As Integer = 0
    If oGdPictureImaging.TiffIsMultiPage(imageID) Then pageCount = oGdPictureImaging.TiffGetPageCount(imageID)
    'Setting up the OCR engine.
    Dim oGdPictureOCR As GdPictureOCR = New GdPictureOCR()
    oGdPictureOCR.ResourceFolder = "C:\GdPicture.NET 14\Redist\OCR"
    oGdPictureOCR.CharacterSet = ""
    oGdPictureOCR.AddLanguage(OCRLanguage.English)
    Dim resID As String = "page"
    Dim content As String = Nothing
    'Creating a resulting PDF document.
    Dim oGdPicturePDF As GdPicturePDF = New GdPicturePDF()
    If oGdPicturePDF.NewPDF(PdfConformance.PDF_A_1b) = GdPictureStatus.OK Then
        oGdPicturePDF.SetOrigin(PdfOrigin.PdfOriginTopLeft)
        Dim fontResName As String = oGdPicturePDF.AddStandardFont(PdfStandardFont.PdfStandardFontCourier)
        If oGdPicturePDF.GetStat() = GdPictureStatus.OK Then
            'Looping through pages of the image file.
            For i As Integer = 1 To pageCount
                'Selecting the current page and set up the image for OCR.
                If (oGdPictureImaging.TiffSelectPage(imageID, i) = GdPictureStatus.OK) AndAlso
                   (oGdPictureOCR.SetImage(imageID) = GdPictureStatus.OK) Then
                    'Runnig the OCR process on the current page.
                    oGdPictureOCR.RunOCR(resID)
                    If oGdPictureOCR.GetStat() = GdPictureStatus.OK Then
                        'Getting the result.
                        content = oGdPictureOCR.GetOCRResultText(resID)
                        If (oGdPictureOCR.GetStat() = GdPictureStatus.OK) AndAlso
                           (oGdPicturePDF.NewPage(PdfPageSizes.PdfPageSizeA4) = GdPictureStatus.OK) AndAlso
                           (oGdPicturePDF.DrawText(fontResName, 0, 0, content) = GdPictureStatus.OK) Then
                            MessageBox.Show("The page nr. " + i.ToString() + " has been successfully processed.")
                        End If
                        'Releasing the previous OCR result to improve the memory management and to allow reusing of the result identifier.
                        oGdPictureOCR.ReleaseOCRResult(resID)
                    End If
                End If
            Next
        End If
        'Saving the resulting PDF document.
        If oGdPicturePDF.SaveToFile("OCR.pdf", True, True) = GdPictureStatus.OK Then MessageBox.Show("The created PDF document has been successfully saved.")
        oGdPicturePDF.CloseDocument()
    End If
    oGdPicturePDF.Dispose()
    oGdPictureOCR.Dispose()
End If
oGdPictureImaging.ReleaseGdPictureImage(imageID)
oGdPictureImaging.Dispose()
Copy Code
//We assume GdPicture has been correctly installed and unlocked.
GdPictureImaging oGdPictureImaging = new GdPictureImaging();
//Selecting an image to process.
int imageID = oGdPictureImaging.TiffCreateMultiPageFromFile("");
if (oGdPictureImaging.GetStat() == GdPictureStatus.OK)
{
    //Retrieving the number of pages.
    int pageCount = 0;
    if (oGdPictureImaging.TiffIsMultiPage(imageID))
        pageCount = oGdPictureImaging.TiffGetPageCount(imageID);
    //Setting up the OCR engine.
    GdPictureOCR oGdPictureOCR = new GdPictureOCR();
    oGdPictureOCR.ResourceFolder = "C:\\GdPicture.NET 14\\Redist\\OCR";
    oGdPictureOCR.CharacterSet = "";
    oGdPictureOCR.AddLanguage(OCRLanguage.English);
    string resID = "page";
    string content = null;
    //Creating a resulting PDF document.
    GdPicturePDF oGdPicturePDF = new GdPicturePDF();
    if (oGdPicturePDF.NewPDF(PdfConformance.PDF_A_1b) == GdPictureStatus.OK)
    {
        oGdPicturePDF.SetOrigin(PdfOrigin.PdfOriginTopLeft);
        string fontResName = oGdPicturePDF.AddStandardFont(PdfStandardFont.PdfStandardFontCourier);
        if (oGdPicturePDF.GetStat() == GdPictureStatus.OK)
        {
            //Loopign through pages of the image file.
            for (int i = 1; i <= pageCount; i++)
            {
                //Selecting the current page and set up the image for OCR.
                if ((oGdPictureImaging.TiffSelectPage(imageID, i) == GdPictureStatus.OK) &&
                     (oGdPictureOCR.SetImage(imageID) == GdPictureStatus.OK))
                {
                    //Running the OCR process on the current page.
                    oGdPictureOCR.RunOCR(resID);
                    if (oGdPictureOCR.GetStat() == GdPictureStatus.OK)
                    {
                        //Getting the result.
                        content = oGdPictureOCR.GetOCRResultText(resID);
                        if ((oGdPictureOCR.GetStat() == GdPictureStatus.OK) &&
                            (oGdPicturePDF.NewPage(PdfPageSizes.PdfPageSizeA4) == GdPictureStatus.OK) &&
                            (oGdPicturePDF.DrawText(fontResName, 0, 0, content) == GdPictureStatus.OK))
                        {
                            MessageBox.Show("The page nr. " + i.ToString() + " has been successfully processed.");
                        }
                        //Releasing the previous OCR result to improve the memory management and to allow reusing of the result identifier.
                        oGdPictureOCR.ReleaseOCRResult(resID);
                    }
                }
            }
        }
        //Saving the resulting PDF document.
        if (oGdPicturePDF.SaveToFile("OCR.pdf", true, true) == GdPictureStatus.OK)
            MessageBox.Show("The created PDF document has been successfully saved.");
        oGdPicturePDF.CloseDocument();
    }
    oGdPicturePDF.Dispose();
    oGdPictureOCR.Dispose();
}
oGdPictureImaging.ReleaseGdPictureImage(imageID);
oGdPictureImaging.Dispose();
 If your input file is a single page TIFF file
Copy Code
'We assume GdPicture has been correctly installed and unlocked.
Dim oGdPictureImaging As GdPictureImaging = New GdPictureImaging()
'Selectign an image to process.
Dim imageID As Integer = oGdPictureImaging.CreateGdPictureImageFromFile("")
If oGdPictureImaging.GetStat() = GdPictureStatus.OK Then
    Dim oGdPictureOCR As GdPictureOCR = New GdPictureOCR()
    'Setting the OCR parameters.
    oGdPictureOCR.ResourceFolder = "C:\Program Files\GdPicture.NET 14\Redist\OCR"
    oGdPictureOCR.CharacterSet = ""
    'Setting up the language and the image.
    If (oGdPictureOCR.AddLanguage(OCRLanguage.English) = GdPictureStatus.OK) AndAlso
       (oGdPictureOCR.SetImage(imageID) = GdPictureStatus.OK) Then
        'Running the OCR process.
        Dim resID As String = oGdPictureOCR.RunOCR()
        If oGdPictureOCR.GetStat() = GdPictureStatus.OK Then
            'Getting the result as a text.
            Dim content As String = oGdPictureOCR.GetOCRResultText(resID)
            If oGdPictureOCR.GetStat() = GdPictureStatus.OK Then
                'Creating a searchable PDF document.
                Using oGdPicturePDF As GdPicturePDF = New GdPicturePDF()
                    'Setting up your prefered page size and font parameters.
                    If (oGdPicturePDF.CreateFromText(PdfConformance.PDF_A_1b, 595, 842, 10, 10, 10, 10,
                                                        TextAlignment.TextAlignmentNear, content, 12, "Arial",
                                                        False, False, True, False) = GdPictureStatus.OK) AndAlso
                        (oGdPicturePDF.SaveToFile("OCR.pdf", True, True) = GdPictureStatus.OK) Then
                        MessageBox.Show("Done!", "OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
                    Else
                        MessageBox.Show("Error when saving the document: " + oGdPicturePDF.GetStat().ToString(), "OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
                    End If
                End Using
            End If
        Else
            MessageBox.Show("Error when processing the OCR: " + oGdPictureOCR.GetStat().ToString(), "OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
        End If
    End If
    oGdPictureImaging.ReleaseGdPictureImage(imageID)
    oGdPictureOCR.Dispose()
End If
oGdPictureImaging.Dispose()
Copy Code
//We assume GdPicture has been correctly installed and unlocked.
GdPictureImaging oGdPictureImaging = new GdPictureImaging();
//Selecting an image to process.
int imageID = oGdPictureImaging.CreateGdPictureImageFromFile("");
if (oGdPictureImaging.GetStat() == GdPictureStatus.OK)
{
    GdPictureOCR oGdPictureOCR = new GdPictureOCR();
    //Setting the OCR parameters.
    oGdPictureOCR.ResourceFolder = "C:\\Program Files\\GdPicture.NET 14\\Redist\\OCR";
    oGdPictureOCR.CharacterSet = "";
    //Setting up the language and the image.
    if ((oGdPictureOCR.AddLanguage(OCRLanguage.English) == GdPictureStatus.OK) &&
        (oGdPictureOCR.SetImage(imageID) == GdPictureStatus.OK))
    {
        //Running the OCR process.
        string resID = oGdPictureOCR.RunOCR();
        if (oGdPictureOCR.GetStat() == GdPictureStatus.OK)
        {
            //Getting the result as a text.
            string content = oGdPictureOCR.GetOCRResultText(resID);
            if (oGdPictureOCR.GetStat() == GdPictureStatus.OK)
            {
                //Creating a searchable PDF document.
                using (GdPicturePDF oGdPicturePDF = new GdPicturePDF())
                {
                    //Setting up your prefered page size and font parameters.
                    if ((oGdPicturePDF.CreateFromText(PdfConformance.PDF_A_1b, 595, 842, 10, 10, 10, 10,
                                                        TextAlignment.TextAlignmentNear, content, 12, "Arial",
                                                        false, false, true, false) == GdPictureStatus.OK) &&
                        (oGdPicturePDF.SaveToFile("OCR.pdf", true, true) == GdPictureStatus.OK))
                    {
                        MessageBox.Show("Done!", "OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
                    }
                    else
                    {
                        MessageBox.Show("Error when saving the document: " + oGdPicturePDF.GetStat().ToString(), "OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
                    }
                }
            }
        }
        else
        {
            MessageBox.Show("Error when processing the OCR: " + oGdPictureOCR.GetStat().ToString(), "OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
        }
    }
    oGdPictureImaging.ReleaseGdPictureImage(imageID);
    oGdPictureOCR.Dispose();
}
oGdPictureImaging.Dispose();