Reference Guide
Programming / OCR / Creating a searchable PDF (PDF/A) document from the content of the document feeder of a scanner
In This Topic
    Creating a searchable PDF (PDF/A) document from the content of the document feeder of a scanner
    In This Topic

    In this example, you can see how to convert a physical document into a searchable 1.4 PDF/A document through the document feeder of the scanner.

    Firstly, you have to do some usual declaration and set up the scanner (TWAIN protocol is used here). Then you need to create the PDF file to receive the pages. From the scanner you read the image-based pages one-by-one, you add them into the PDF document and subsequently you can OCR each page. When you are finished with all pages, you simply close the TWAIN source and clean the resources.

    This example makes use of the optional GdPicture.NET Managed PDF Plugin.
    Copy Code
    'We assume that GdPicture has been correctly installed and unlocked.
    Dim ImageID As Integer = 0
    Dim bContinue As Boolean = False
    Dim message As String = "Done !" + vbCrLf 
    Dim oGdPictureImaging As New GdPictureImaging()
    Dim oGdPicturePDF As New GdPicturePDF()
    If (oGdPictureImaging.TwainSelectSource(Me.Handle) AndAlso
        oGdPictureImaging.TwainOpenDefaultSource(Me.Handle)) Then
        oGdPictureImaging.TwainSetAutoFeed(True) 'Enabling AutoFeed option.
        oGdPictureImaging.TwainSetAutoScan(True) 'Achieving the maximum scanning rate.
        oGdPictureImaging.TwainSetResolution(200)
        oGdPictureImaging.TwainSetPixelType(TwainPixelType.TWPT_BW) 'Setting the image to be Black & White.
        oGdPictureImaging.TwainSetBitDepth(1) '1 bpp
        'Creating the destination PDF document.
        oGdPicturePDF.NewPDF(PdfConformance.PDF_A_1b)
        Do
            ImageID = oGdPictureImaging.TwainAcquireToGdPictureImage(Me.Handle)
            If oGdPictureImaging.GetStat() = GdPictureStatus.OK
                'Creating an image-based page in the destination document.
                If oGdPicturePDF.AddImageFromGdPictureImage(ImageID, false, false) = GdPictureStatus.OK Then
                    'OCR-ing the currently created page, if the creation has been successful.
                    oGdPicturePDF.OcrPage("eng", "C:\GdPicture.NET 14\Redist\OCR", "", 300)
                End If
                message = message + "Page nr." + oGdPicturePDF.GetCurrentPage().ToString() + " - status: " + oGdPicturePDF.GetStat().ToString() + vbCrLf
                'Releasing the image.
                oGdPictureImaging.ReleaseGdPictureImage(ImageID)
            End If
            If oGdPictureImaging.TwainGetState() <= TwainStatus.TWAIN_SOURCE_ENABLED Then
                If MessageBox.Show("Do you want to acquire other pages?", "", MessageBoxButtons.YesNo, MessageBoxIcon.Question) = DialogResult.Yes Then
                    bContinue = True
                Else
                    bContinue = False
                End If
            Else
                 bContinue = True
            End If
        Loop While bContinue
        oGdPicturePDF.SaveToFile("pdfocr.pdf", True)
        message = message + "Saving - status: " + oGdPicturePDF.GetStat().ToString()
        oGdPictureImaging.TwainCloseSource()
        MessageBox.Show(message, "TWAIN + OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
    Else
        MessageBox.Show("Can't open the default source.\nresult code: " + oGdPictureImaging.TwainGetLastResultCode() +
                                   "\ncondition code: " + oGdPictureImaging.TwainGetLastConditionCode(), "TWAIN + OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
    End If
    oGdPictureImaging.Dispose()
    oGdPicturePDF.Dispose()
    Copy Code
    //We assume that GdPicture has been correctly installed and unlocked.
    int ImageID = 0;
    bool bContinue = false;
    string message = "Done !\n"; 
    GdPictureImaging oGdPictureImaging = new GdPictureImaging();
    GdPicturePDF oGdPicturePDF = new GdPicturePDF();
    if (oGdPictureImaging.TwainSelectSource(this.Handle) &&
        oGdPictureImaging.TwainOpenDefaultSource(this.Handle))
    {
        oGdPictureImaging.TwainOpenDefaultSource(this.Handle);
        oGdPictureImaging.TwainSetAutoFeed(true); //Enabling AutoFeed option.
        oGdPictureImaging.TwainSetAutoScan(true); //Achieving the maximum scanning rate.
        oGdPictureImaging.TwainSetResolution(200);
        oGdPictureImaging.TwainSetPixelType(TwainPixelType.TWPT_BW); //Setting the image to be Black & White.
        oGdPictureImaging.TwainSetBitDepth(1); //1 bpp
        oGdPicturePDF.NewPDF(PdfConformance.PDF_A_1b); //Creating the destination PDF document.
        do
        {
            ImageID = oGdPictureImaging.TwainAcquireToGdPictureImage(this.Handle);
            if (oGdPictureImaging.GetStat() == GdPictureStatus.OK)
            {
                //Creating an image-based page in the destination document.
                if (oGdPicturePDF.AddImageFromGdPictureImage(ImageID, false, false) == GdPictureStatus.OK)
                {
                    //OCR-ing the currently created page, if the creation has been successful.
                    oGdPicturePDF.OcrPage("eng", "C:\\GdPicture.NET 14\\Redist\\OCR", "", 300);
                }
                message = message + "Page nr." + oGdPicturePDF.GetCurrentPage().ToString() + " - status: " + oGdPicturePDF.GetStat().ToString() + "\n";
                //Releasing the image.
                oGdPictureImaging.ReleaseGdPictureImage(ImageID);
            }
            if (oGdPictureImaging.TwainGetState() <= TwainStatus.TWAIN_SOURCE_ENABLED)
            {
                if (MessageBox.Show("Do you want to acquire other pages?", "", MessageBoxButtons.YesNo, MessageBoxIcon.Question) == DialogResult.Yes)
                {
                    bContinue = true;
                }
                else
                {
                    bContinue = false;
                }
            }
            else
            {
                bContinue = true;
            }
        } while (bContinue);
        oGdPicturePDF.SaveToFile("pdfocr.pdf", true);
        message = message + "Saving - status: " + oGdPicturePDF.GetStat().ToString();
        oGdPictureImaging.TwainCloseSource();
        MessageBox.Show(message, "TWAIN + OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
    }
    else
    {
        MessageBox.Show("Can't open the default source.\nresult code: " + oGdPictureImaging.TwainGetLastResultCode() +
                                   "\ncondition code: " + oGdPictureImaging.TwainGetLastConditionCode(), "TWAIN + OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
    }
    oGdPictureImaging.Dispose();
    oGdPicturePDF.Dispose();