How to extract text from OCRed PDF to string using C# ?
i tried to use this code but got empty results..
Code: Select all
ocrText = oGdPicturePDF.GetPageText();
if (String.IsNullOrEmpty(ocrText))
{
if (oGdPicturePDF.OcrPage("eng", Environment.CurrentDirectory + @"\OCR\", "", (float)200.0) != GdPictureStatus.OK)
{
Console.WriteLine("OCR problem on page " + i.ToString() + ". Error: " + oGdPicturePDF.GetStat().ToString());
}
ocrText = oGdPicturePDF.GetPageText();
}
using (StreamWriter sw = File.AppendText("D:\\output.txt"))
{
sw.WriteLine(ocrText);
sw.WriteLine("=====================================================");
}