I am using ADR to split large PDFs created on a copier into individual documents, however I get a lot of false positives where the scanned image looks nothing like the template image. (This happens in about 10-20% of non-matching pages)
Code: Select all
For pgTurner = 1 To gdIncomingPDF.GetPageCount
Dim ImgNo As Integer
gdIncomingPDF.SelectPage(pgTurner)
ImgNo = gdIncomingPDF.ExtractPageImage(1)
Dim nCloserTemplate As Integer = gdImage.ADRGetCloserTemplateForGdPictureImage(ImgNo)
If gdImage.ADRGetLastConfidence > TemplateConfidence And EndPage > 0 Then
Dim conf As Integer = gdImage.ADRGetLastConfidence
gdNewPDF.NewPDF()
gdNewPDF.SetKeyWords(String.Format("TemplateID={0};Confidence={1}", PrevTemplateID, PrevTemplateConfidence))
For pg = StartPage To EndPage
gdNewPDF.ClonePage(gdIncomingPDF, pg)
Next
StartPage = pgTurner
gdNewPDF.SaveToFile(String.Format(filename + "_{0}.pdf", DocNo))
gdNewPDF.CloseDocument()
DocNo += 1
End If
If gdImage.ADRGetLastConfidence > TemplateConfidence Then
Dim tmpTemplate As TemplateItems = CurrentTemplates.First(Function(tmp As TemplateItems) tmp.templateID = gdImage.ADRGetCloserTemplateForGdPictureImage(ImgNo))
PrevTemplateConfidence = gdImage.ADRGetLastConfidence
PrevTemplateID = tmpTemplate.templateID
End If
EndPage = pgTurner
Next
If PrevTemplateID = -1 Then
tmpItem.Processed = True
Else
gdNewPDF.NewPDF()
gdNewPDF.SetKeyWords(String.Format("TemplateID={0};Confidence={1}", PrevTemplateID, PrevTemplateConfidence))
For pg = StartPage To EndPage
gdNewPDF.ClonePage(gdIncomingPDF, pg)
Next
gdNewPDF.SaveToFile(String.Format(filename + "_{0}.pdf", DocNo))
gdNewPDF.CloseDocument()
End If
The attached files are heavily redacted, but included here so you can get a feel for the different layouts of the documents.
Thanks
Shane