In This Topic
Programming / Document Conversion / Converting a TIFF image to a searchable PDF document using multithreading

Converting a TIFF image to a searchable PDF document using multithreading

In This Topic

Converting scanned images to searchable PDF documents is now very easy and quick process if you take advantages of the GdPicturePDF class and its OCR methods using multithreading environment. Now you can just process OCR on any converted document and the data will be added as invisible text on the page.

  • The first example shows you a very quick and easy two-steps process how it works using the GdPictureDocumentConverter class.
    Copy Code
    'We assume that GdPicture has been correctly installed and unlocked.
    
    Dim oGdPicturePDF As New GdPicturePDF()
    'Adding the OcrPagesDone event.
    AddHandler oGdPicturePDF.OcrPagesDone, AddressOf OcrPagesDone
    Sub OcrPagesDone(status As GdPictureStatus) Handles oGdPicturePDF.OcrPagesDone
        'Saving the resulting document when the OCR process is finished.
        If oGdPicturePDF.SaveToFile("output.pdf") = GdPictureStatus.OK Then
            MessageBox.Show("PDF: The OCR-ed file has been saved successfully.", "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
        Else
            MessageBox.Show("PDF: The OCR-ed file has failed to save. Status: " + oGdPicturePDF.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
        End If
    End Sub
    
    Dim status As GdPictureStatus = GdPictureStatus.OK
    'First step - Converting your source TIF file to PDF document.
    Using oConverter As GdPictureDocumentConverter = New GdPictureDocumentConverter()
        status = oConverter.LoadFromFile("input.tif", GdPicture14.DocumentFormat.DocumentFormatTIFF)
        If status = GdPictureStatus.OK Then
            status = oConverter.SaveAsPDF("output.pdf", PdfConformance.PDF)
            If status = GdPictureStatus.OK Then
                MessageBox.Show("Converter: The PDF file has been saved successfully.", "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
            Else
                MessageBox.Show("Converter: The PDF file has failed to save. Status: " + status.ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
            End If
        Else
            MessageBox.Show("Converter: The TIF file has failed to load. Status: " + status.ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
        End If
    End Using
    'Second step - Processing OCR on the created PDF document.
    If status = GdPictureStatus.OK Then
        status = oGdPicturePDF.LoadFromFile("output.pdf", False)
        If status = GdPictureStatus.OK Then
            status = oGdPicturePDF.OcrPages("*", 0, "eng", "C:\GdPicture.NET 14\Redist\OCR", "", 300, 2, True)
            If status = GdPictureStatus.OK Then
                MessageBox.Show("PDF: The OCR process has been finished successfully.", "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
            Else
                MessageBox.Show("PDF: The OCR process has failed. Status: " + status.ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
            End If
        Else
            MessageBox.Show("PDF: The PDF file has failed to load. Status: " + status.ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
        End If
    End If
    
    'Releasing resources only if all processes are finished.
    oGdPicturePDF.Dispose()
    Copy Code
    //We assume that GdPicture has been correctly installed and unlocked.
    
    GdPicturePDF oGdPicturePDF = new GdPicturePDF();
    //Adding the OcrPagesDone event.
    oGdPicturePDF.OcrPagesDone += OcrPagesDone;
    
    void OcrPagesDone(GdPictureStatus status)
    {
        //Saving the resulting document when the OCR process is finished.
        if (oGdPicturePDF.SaveToFile("output.pdf") == GdPictureStatus.OK)
            MessageBox.Show("PDF: The OCR-ed file has been saved successfully.", "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
        else
           MessageBox.Show("PDF: The OCR-ed file has failed to save. Status: " + oGdPicturePDF.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
    }
    
    GdPictureStatus status = GdPictureStatus.OK;
    //First step - Converting your source TIF file to PDF document.
    using (GdPictureDocumentConverter oConverter = new GdPictureDocumentConverter())
    {
        status = oConverter.LoadFromFile("input.tif", GdPicture14.DocumentFormat.DocumentFormatTIFF);
        if (status == GdPictureStatus.OK)
        {
            status = oConverter.SaveAsPDF("output.pdf", PdfConformance.PDF);
            if (status == GdPictureStatus.OK)
            {
                MessageBox.Show("Converter: The PDF file has been saved successfully.", "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
            }
            else
            {
                MessageBox.Show("Converter: The PDF file has failed to save. Status: " + status.ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }
        }
        else
        {
            MessageBox.Show("Converter: The TIF file has failed to load. Status: " + status.ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
        }
    }
    //Second step - Processing OCR on the created PDF document.
    if (status == GdPictureStatus.OK)
    {
        status = oPDF.LoadFromFile("output.pdf", false);
        if (status == GdPictureStatus.OK)
        {
            status = oGdPicturePDF.OcrPages("*", 0, "eng", "C:\\GdPicture.NET 14\\Redist\\OCR", "", 300, 2, true);
            if (status == GdPictureStatus.OK)
            {
                MessageBox.Show("PDF: The OCR process has been finished successfully.", "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
            }
            else
            {
                MessageBox.Show("PDF: The OCR process has failed. Status: " + status.ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }
        }
        else
        {
            MessageBox.Show("PDF: The PDF file has failed to load. Status: " + status.ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
        }
    }
    
    //Releasing resources only if all processes are finished.
    oGdPicturePDF.Dispose();
  •  The second example demonstrates it using another approach by converting page by page to images through the GdPictureImaging class.
    Copy Code
    'We assume that GdPicture has been correctly installed and unlocked.
    
    Dim oGdPicturePDF As New GdPicturePDF()
    'Adding the OcrPagesDone event.
    AddHandler oGdPicturePDF.OcrPagesDone, AddressOf OcrPagesDone
    
    Sub OcrPagesDone(status As GdPictureStatus) Handles oGdPicturePDF.OcrPagesDone
        'Saving the resulting document when the OCR process is finished.
        If oGdPicturePDF.SaveToFile("output.pdf") = GdPictureStatus.OK Then
            MessageBox.Show("The resulting document is saved.", "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
        Else
            MessageBox.Show("The resulting document can't be saved. Status: " + oGdPicturePDF.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
        End If
    End Sub
    
    Using oGdPictureImaging As New GdPictureImaging()
        'Loading an image from a file.
        Dim imageId As Integer = oGdPictureImaging.CreateGdPictureImageFromFile("image.tif")
        If oGdPictureImaging.GetStat() = GdPictureStatus.OK Then
            If oGdPicturePDF.NewPDF() = GdPictureStatus.OK Then
                If oGdPictureImaging.TiffIsMultiPage(imageId) = False Then
                    'One-page tiff image.
                    'Adding an image as a resource and drawing it onto a new page.
                    oGdPicturePDF.AddImageFromGdPictureImage(imageId, false, false)
                Else
                    'Multi-page tiff image.
                    Dim NumberOfPages As Integer = oGdPictureImaging.TiffGetPageCount(imageId)
                    'Loop through pages.
                    For i As Integer = 1 To NumberOfPages
                        'Selecting each page in the tiff file.
                        If oGdPictureImaging.TiffSelectPage(imageId, i) = GdPictureStatus.OK Then
                            'Adding the selected tiff page as a resource to a PDF document and drawing it on a new page.
                            oGdPicturePDF.AddImageFromGdPictureImage(imageId, false, false)
                            If oGdPicturePDF.GetStat() <> GdPictureStatus.OK Then
                                Exit For
                            End If
                        Else
                            Exit For
                        End If
                    Next
                End If
                'Checking whether any error occurred in adding any image to the PDF document.
                If oGdPicturePDF.GetStat() = GdPictureStatus.OK Then
                    If oGdPicturePDF.OcrPages("*", 0, "eng", "C:\GdPicture.NET 14\Redist\OCR", "", 300, 2, True) = GdPictureStatus.OK Then
                        MessageBox.Show("OcrPages done! Status: " + oGdPicturePDF.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
                    End If
                End If
            End If
            MessageBox.Show("Finished! Status: " + oGdPicturePDF.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
            'Clearing resource.
            oGdPictureImaging.ReleaseGdPictureImage(imageId)
        Else
            MessageBox.Show("The image file can't be loaded. Status: " + oGdPictureImaging.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
        End If
    End Using
    
    'Releasing resources only if all processes are finished.
    oGdPicturePDF.Dispose()
    Copy Code
    //We assume that GdPicture has been correctly installed and unlocked.
    
    GdPicturePDF oGdPicturePDF = new GdPicturePDF();
    //Adding the OcrPagesDone event.
    oGdPicturePDF.OcrPagesDone += OcrPagesDone;
    
    void OcrPagesDone(GdPictureStatus status)
    {
        //Saving the resulting document when the OCR process is finished.
        if (oGdPicturePDF.SaveToFile("output.pdf") == GdPictureStatus.OK)
            MessageBox.Show("The resulting document is saved.", "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
        else
            MessageBox.Show("The resulting document can't be saved. Status: " + oGdPicturePDF.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
    }
    
    using (GdPictureImaging oGdPictureImaging = new GdPictureImaging())
    {
        //Loading an image from a file.
        int imageId = oGdPictureImaging.CreateGdPictureImageFromFile("image.tif");
        if (oGdPictureImaging.GetStat() == GdPictureStatus.OK)
        {
            if (oGdPicturePDF.NewPDF() == GdPictureStatus.OK)
            {
                if (oGdPictureImaging.TiffIsMultiPage(imageId) == false) //One-page tiff image.
                {
                    //Adding an image as a resource and drawing it onto a new page.
                    oGdPicturePDF.AddImageFromGdPictureImage(imageId, false, false);
                }
                else //Multi-page tiff image.
                {
                    int NumberOfPages = oGdPictureImaging.TiffGetPageCount(imageId);
                    //Loop through pages.
                    for (int i = 1; i <= NumberOfPages; i++)
                    {
                        //Selecting each page in a tiff file.
                        if (oGdPictureImaging.TiffSelectPage(imageId, i) == GdPictureStatus.OK)
                        {
                            //Adding the selected tiff page as a resource to a PDF document and drawing it on a new page.
                            oGdPicturePDF.AddImageFromGdPictureImage(imageId, false, false);
                            if (oGdPicturePDF.GetStat() != GdPictureStatus.OK)
                                break;
                        }
                        else
                            break;
                    }
                }
                //Checking whether any error occurred in adding any image to the PDF document.
                if (oGdPicturePDF.GetStat() == GdPictureStatus.OK)
                {
                    if (oGdPicturePDF.OcrPages("*", 0, "eng", @"C:\GdPicture.NET 14\Redist\OCR", "", 300, 2, true) == GdPictureStatus.OK)
                        MessageBox.Show("OcrPages done! Status: " + oGdPicturePDF.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
                }
            }
            MessageBox.Show("Finished! Status: " + oGdPicturePDF.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
            //Clearing resource.
            oGdPictureImaging.ReleaseGdPictureImage(imageId);
        }
        else
        {
            MessageBox.Show("The image file can't be loaded. Status: " + oGdPictureImaging.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
        }
    }
    
    //Releasing resources only if all processes are finished.
    oGdPicturePDF.Dispose();