In This Topic
Programming / OCR / Creating a searchable PDF (PDF/A) document from the content of the document feeder of a scanner

Creating a searchable PDF (PDF/A) document from the content of the document feeder of a scanner

In This Topic

In this example, you can see how to convert a physical document into a searchable 1.4 PDF/A document through the document feeder of the scanner.

Firstly, you have to do some usual declaration and set up the scanner (TWAIN protocol is used here). Then you need to create the PDF file to receive the pages. From the scanner you read the image-based pages one-by-one, you add them into the PDF document and subsequently you can OCR each page. When you are finished with all pages, you simply close the TWAIN source and clean the resources.

Copy Code
'We assume that GdPicture has been correctly installed and unlocked.
Dim ImageID As Integer = 0
Dim bContinue As Boolean = False
Dim message As String = "Done !" + vbCrLf 
Dim oGdPictureImaging As New GdPictureImaging()
Dim oGdPicturePDF As New GdPicturePDF()
If (oGdPictureImaging.TwainSelectSource(Me.Handle) AndAlso
    oGdPictureImaging.TwainOpenDefaultSource(Me.Handle)) Then
    oGdPictureImaging.TwainSetAutoFeed(True) 'Enabling AutoFeed option.
    oGdPictureImaging.TwainSetAutoScan(True) 'Achieving the maximum scanning rate.
    oGdPictureImaging.TwainSetResolution(200)
    oGdPictureImaging.TwainSetPixelType(TwainPixelType.TWPT_BW) 'Setting the image to be Black & White.
    oGdPictureImaging.TwainSetBitDepth(1) '1 bpp
    'Creating the destination PDF document.
    oGdPicturePDF.NewPDF(PdfConformance.PDF_A_1b)
    Do
        ImageID = oGdPictureImaging.TwainAcquireToGdPictureImage(Me.Handle)
        If oGdPictureImaging.GetStat() = GdPictureStatus.OK
            'Creating an image-based page in the destination document.
            If oGdPicturePDF.AddImageFromGdPictureImage(ImageID, false, false) = GdPictureStatus.OK Then
                'OCR-ing the currently created page, if the creation has been successful.
                oGdPicturePDF.OcrPage("eng", "C:\GdPicture.NET 14\Redist\OCR", "", 300)
            End If
            message = message + "Page nr." + oGdPicturePDF.GetCurrentPage().ToString() + " - status: " + oGdPicturePDF.GetStat().ToString() + vbCrLf
            'Releasing the image.
            oGdPictureImaging.ReleaseGdPictureImage(ImageID)
        End If
        If oGdPictureImaging.TwainGetState() <= TwainStatus.TWAIN_SOURCE_ENABLED Then
            If MessageBox.Show("Do you want to acquire other pages?", "", MessageBoxButtons.YesNo, MessageBoxIcon.Question) = DialogResult.Yes Then
                bContinue = True
            Else
                bContinue = False
            End If
        Else
             bContinue = True
        End If
    Loop While bContinue
    oGdPicturePDF.SaveToFile("pdfocr.pdf", True)
    message = message + "Saving - status: " + oGdPicturePDF.GetStat().ToString()
    oGdPictureImaging.TwainCloseSource()
    MessageBox.Show(message, "TWAIN + OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
Else
    MessageBox.Show("Can't open the default source.\nresult code: " + oGdPictureImaging.TwainGetLastResultCode() +
                               "\ncondition code: " + oGdPictureImaging.TwainGetLastConditionCode(), "TWAIN + OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
End If
oGdPictureImaging.Dispose()
oGdPicturePDF.Dispose()
Copy Code
//We assume that GdPicture has been correctly installed and unlocked.
int ImageID = 0;
bool bContinue = false;
string message = "Done !\n"; 
GdPictureImaging oGdPictureImaging = new GdPictureImaging();
GdPicturePDF oGdPicturePDF = new GdPicturePDF();
if (oGdPictureImaging.TwainSelectSource(this.Handle) &&
    oGdPictureImaging.TwainOpenDefaultSource(this.Handle))
{
    oGdPictureImaging.TwainOpenDefaultSource(this.Handle);
    oGdPictureImaging.TwainSetAutoFeed(true); //Enabling AutoFeed option.
    oGdPictureImaging.TwainSetAutoScan(true); //Achieving the maximum scanning rate.
    oGdPictureImaging.TwainSetResolution(200);
    oGdPictureImaging.TwainSetPixelType(TwainPixelType.TWPT_BW); //Setting the image to be Black & White.
    oGdPictureImaging.TwainSetBitDepth(1); //1 bpp
    oGdPicturePDF.NewPDF(PdfConformance.PDF_A_1b); //Creating the destination PDF document.
    do
    {
        ImageID = oGdPictureImaging.TwainAcquireToGdPictureImage(this.Handle);
        if (oGdPictureImaging.GetStat() == GdPictureStatus.OK)
        {
            //Creating an image-based page in the destination document.
            if (oGdPicturePDF.AddImageFromGdPictureImage(ImageID, false, false) == GdPictureStatus.OK)
            {
                //OCR-ing the currently created page, if the creation has been successful.
                oGdPicturePDF.OcrPage("eng", "C:\\GdPicture.NET 14\\Redist\\OCR", "", 300);
            }
            message = message + "Page nr." + oGdPicturePDF.GetCurrentPage().ToString() + " - status: " + oGdPicturePDF.GetStat().ToString() + "\n";
            //Releasing the image.
            oGdPictureImaging.ReleaseGdPictureImage(ImageID);
        }
        if (oGdPictureImaging.TwainGetState() <= TwainStatus.TWAIN_SOURCE_ENABLED)
        {
            if (MessageBox.Show("Do you want to acquire other pages?", "", MessageBoxButtons.YesNo, MessageBoxIcon.Question) == DialogResult.Yes)
            {
                bContinue = true;
            }
            else
            {
                bContinue = false;
            }
        }
        else
        {
            bContinue = true;
        }
    } while (bContinue);
    oGdPicturePDF.SaveToFile("pdfocr.pdf", true);
    message = message + "Saving - status: " + oGdPicturePDF.GetStat().ToString();
    oGdPictureImaging.TwainCloseSource();
    MessageBox.Show(message, "TWAIN + OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
}
else
{
    MessageBox.Show("Can't open the default source.\nresult code: " + oGdPictureImaging.TwainGetLastResultCode() +
                               "\ncondition code: " + oGdPictureImaging.TwainGetLastConditionCode(), "TWAIN + OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
oGdPictureImaging.Dispose();
oGdPicturePDF.Dispose();