I have modified your sample for creating searchable PDF from PDF image. My application requires that I essentially crop the input document sheets into four quadrants because each sheet has four miniaturized pages (I tried to include the input file but get a message stating that PDF files cannot be attached). I programmatically extract the four quadrants and build a new file with four times the number of sheets as the input document. After several hours of frustration wherein everything was working fine except that at the beginning of each pass through the loop the first of the four quadrants for the sheet being processed was the upper left quadrant (Q1) from the first sheet instead of the upper left quadrant (Q1) from the sheet being processed. To workaround this problem I inserted the code highlighted in the following submission. Have I overlooked something?
Thanks, and I really like this product!
Dave
Code: Select all
Button1.Enabled = False
If TextBox1.Text <> "" Then
'
Dim LeftSideLeft As Integer
Dim RightSideLeft As Integer
Dim TopSideTop As Integer
Dim BottomSideTop As Integer
'
Dim DeltaWidth As Integer
Dim DeltaHeight As Integer
'
Dim InputFilePath As String = TextBox1.Text
Dim OutputFilePath As String = Mid(InputFilePath, 1, Len(InputFilePath) - 4) + "_ocr.pdf"
Dim ImageID As Integer
Dim Dict As GdPicture.TesseractDictionary
Dim InputPDFID As Integer
Dim PdfID As Integer
Dim Resolution As Integer = Val(TextBox2.Text)
oGdPictureImaging.SetLicenseNumberUpgrade("XXX,XXX")
Dict = GdPicture.TesseractDictionary.TesseractDictionaryEnglish
PdfID = oGdPictureImaging.PdfOCRStart(OutputFilePath, True, "", "", "", "", "")
InputPDFID = oGdPictureImaging.PdfReaderLoadFromFile(TextBox1.Text)
ImageID = oGdPictureImaging.PdfReaderRenderPageToGdPictureImage(InputPDFID, Resolution, True)
If ImageID <> 0 Then
LeftSideLeft = 120
TopSideTop = 220
DeltaHeight = 30
DeltaWidth = 30
'
' Get standard width and height
'
If CutWidth = 0 Then
CutWidth = (oGdPictureImaging.GetWidth(ImageID) / 2) - LeftSideLeft + DeltaWidth
End If
If CutHeight = 0 Then
CutHeight = CInt(oGdPictureImaging.GetHeight(ImageID) / 2) - TopSideTop + DeltaHeight
End If
'
RightSideLeft = LeftSideLeft + CutWidth - 80
BottomSideTop = TopSideTop + CutHeight - 120
End If
oGdPictureImaging.ReleaseGdPictureImage(ImageID)
'
If InputPDFID <> 0 Then
ProgressBar1.Maximum = oGdPictureImaging.PdfReaderGetPageCount(InputPDFID)
For i As Integer = 1 To oGdPictureImaging.PdfReaderGetPageCount(InputPDFID)
'
'Q1
'
' Start of workaround''''''''''''''''''''''''''''''''''''
'
' Had to use the following to avoid inserting the first sheet Q1 at the beginning of each
' pass through. Posted to GdPicture on 01/08/11.
'
ImageID = oGdPictureImaging.PdfReaderRenderPageToGdPictureImage(InputPDFID, Resolution, True)
oGdPictureImaging.PdfReaderSelectPage(InputPDFID, i)
oGdPictureImaging.ReleaseGdPictureImage(ImageID)
'
' End of workaround''''''''''''''''''''''''''''''''''''''
'
ImageID = oGdPictureImaging.PdfReaderRenderPageToGdPictureImage(InputPDFID, Resolution, True)
oGdPictureImaging.PdfReaderSelectPage(InputPDFID, i)
oGdPictureImaging.ConvertTo1Bpp(ImageID)
oGdPictureImaging.Crop(ImageID, LeftSideLeft, TopSideTop, CutWidth, CutHeight)
oGdPictureImaging.PdfAddGdPictureImageToPdfOCR(PdfID, ImageID, Dict, TextBox3.Text, "")
oGdPictureImaging.ReleaseGdPictureImage(ImageID)
'
' Q4
'
ImageID = oGdPictureImaging.PdfReaderRenderPageToGdPictureImage(InputPDFID, Resolution, True)
oGdPictureImaging.PdfReaderSelectPage(InputPDFID, i)
oGdPictureImaging.ConvertTo1Bpp(ImageID)
oGdPictureImaging.Crop(ImageID, LeftSideLeft, BottomSideTop, CutWidth, CutHeight)
oGdPictureImaging.PdfAddGdPictureImageToPdfOCR(PdfID, ImageID, Dict, TextBox3.Text, "")
oGdPictureImaging.ReleaseGdPictureImage(ImageID)
'
' Q2
'
ImageID = oGdPictureImaging.PdfReaderRenderPageToGdPictureImage(InputPDFID, Resolution, True)
oGdPictureImaging.PdfReaderSelectPage(InputPDFID, i)
oGdPictureImaging.ConvertTo1Bpp(ImageID)
oGdPictureImaging.Crop(ImageID, RightSideLeft, TopSideTop, CutWidth, CutHeight)
oGdPictureImaging.PdfAddGdPictureImageToPdfOCR(PdfID, ImageID, Dict, TextBox3.Text, "")
oGdPictureImaging.ReleaseGdPictureImage(ImageID)
'
' Q3
'
ImageID = oGdPictureImaging.PdfReaderRenderPageToGdPictureImage(InputPDFID, Resolution, True)
oGdPictureImaging.PdfReaderSelectPage(InputPDFID, i)
oGdPictureImaging.ConvertTo1Bpp(ImageID)
oGdPictureImaging.Crop(ImageID, RightSideLeft, BottomSideTop, CutWidth, CutHeight)
oGdPictureImaging.PdfAddGdPictureImageToPdfOCR(PdfID, ImageID, Dict, TextBox3.Text, "")
oGdPictureImaging.ReleaseGdPictureImage(ImageID)
'
If oGdPictureImaging.GetStat <> GdPicture.GdPictureStatus.OK Then
MsgBox("OCR problem on page " + Str(i) + ". Error: " + oGdPictureImaging.GetStat.ToString)
End If
ProgressBar1.Value = i
Application.DoEvents()
Next i
MsgBox("Done ! See file " & OutputFilePath)
Else
MsgBox("Can't open file: " & InputFilePath)
End If
oGdPictureImaging.PdfOCRStop(PdfID)
Else
MsgBox("Please, select a file to convert.")
End If
Button1.Enabled = True