In This Topic
Programming / OCR / Using an external OCR engine during PDF/OCR generation

Using an external OCR engine during PDF/OCR generation

In This Topic
This topic assumes you are already familiar about how to use the GdPicturePDF class to build PDF/OCR using the GdPicturePDF class.

Overview

It is possible to easily use any external OCR engine during PDF/OCR generation using the GdPicturePDF class.

The concept is quite straightforward: you need to provide the OCR result to a GdPicturePDF instance, through a specific event, passing a string variable which is the serialization of a specific model. Several serialization methods will be supported, please read the "Supported models for serialization" section of this topic to get them.

After GdPicture.NET Toolkit installation, please have a look at our csharp  "PDF to PDF-OCR" demo included into the demo folder. You will be able to find complete implementation of external OCR engines:

- GdPicture.NET built-in OCR using the GdPictureOCR class.

- OmniPage.

- Other engines shall be exposed soon...

 

Step by step instructions

  • 1: tells to the instance to use an external OCR engine.

gdpicturePDF.SetOverrideOcrEngine(true);

  • 2: intercept the ExternalOcrRequest event.

gdpicturePDF.ExternalOcrPageRequest += this.ExternalOcrRequest;

  • 3: implement the logic to provide the OCR result through the ExternalOcrRequest event handler.
Copy Code
       //this version is using the "gdpictureocr-json" model. (the recommended one).
        private void ExternalOcrRequest(int ImageID, PdfOcrOptions PdfOcrOptions, out GdPictureStatus Status, out string ResultEncoding, out string OcrResult)
        {
            using (GdPictureOCR gdpictureOCR = new GdPictureOCR())
            {
                gdpictureOCR.ResourceFolder = PdfOcrOptions.ResourcePath;
                gdpictureOCR.AddCustomDictionary(PdfOcrOptions.Dictionary);
                gdpictureOCR.OCRMode = PdfOcrOptions.OCRMode;
                gdpictureOCR.EnableOrientationDetection = PdfOcrOptions.DetectOrientation;
                gdpictureOCR.EnableSkewDetection = PdfOcrOptions.DetectSkew;
                gdpictureOCR.SetImage(ImageID);
                string resultID = gdpictureOCR.RunOCR();
                Status = gdpictureOCR.GetStat();
                if (Status == GdPictureStatus.OK)
                {
                    ResultEncoding = "gdpictureocr-json";
                    OcrResult = gdpictureOCR.GetSerializedResult(resultID);
                    Status = gdpictureOCR.GetStat();
                }
                else
                {
                    ResultEncoding = OcrResult = null;
                }               
            }
        }

       //this version is using the "json" model.
       private void ExternalOcrRequest(int ImageID, PdfOcrOptions PdfOcrOptions, out GdPictureStatus Status, out string ResultEncoding, out string OcrResult)
        {
            using (GdPictureOCR gdpictureOCR = new GdPictureOCR())
            {
                gdpictureOCR.ResourceFolder = PdfOcrOptions.ResourcePath;
                gdpictureOCR.AddCustomDictionary(PdfOcrOptions.Dictionary);
                gdpictureOCR.OCRMode = PdfOcrOptions.OCRMode;
                gdpictureOCR.EnableOrientationDetection = PdfOcrOptions.DetectOrientation;
                gdpictureOCR.EnableSkewDetection = PdfOcrOptions.DetectSkew;
                gdpictureOCR.SetImage(ImageID);
                string resultID = gdpictureOCR.RunOCR();
                Status = gdpictureOCR.GetStat();
                if (Status == GdPictureStatus.OK)
                {
                    GdPictureOcrResult ocrResult = new GdPictureOcrResult()
                    {
                        Paragraphs = new List<GdPictureOcrParagraph>(),
                        PageRotation = gdpictureOCR.GetOrientation()
                    };
                    for (int paragraphIdx = 0; paragraphIdx < gdpictureOCR.GetParagraphCount(resultID); paragraphIdx++)
                    {
                        OCRBlockType blockType = gdpictureOCR.GetBlockType(resultID, gdpictureOCR.GetParagraphBlockIndex(resultID, paragraphIdx));
                        //rejecting non text block.
                        if (blockType != OCRBlockType.CaptionText &&
                            blockType != OCRBlockType.FlowingText &&
                            blockType != OCRBlockType.HeadingText &&
                            blockType != OCRBlockType.PulloutText &&
                            blockType != OCRBlockType.VerticalText &&
                            blockType != OCRBlockType.Table)
                        {
                            continue;
                        }
                        GdPictureOcrParagraph paragraph = new GdPictureOcrParagraph()
                        {
                            Lines = new List<GdPictureOcrLine>()
                        };
                        ((List<GdPictureOcrParagraph>)ocrResult.Paragraphs).Add(paragraph);
                        int firstLineIdx = gdpictureOCR.GetParagraphFirstTextLineIndex(resultID, paragraphIdx);
                        int lineCount = gdpictureOCR.GetParagraphTextLineCount(resultID, paragraphIdx);
                        for (int lineIdx = firstLineIdx; lineIdx < firstLineIdx + lineCount; lineIdx++)
                        {
                            GdPictureOcrLine line = new GdPictureOcrLine()
                            {
                                Words = new List<GdPictureOcrWord>()
                            };
                            ((List<GdPictureOcrLine>)paragraph.Lines).Add(line);
                            int firstWordIdx = gdpictureOCR.GetTextLineFirstWordIndex(resultID, lineIdx);
                            int wordCount = gdpictureOCR.GetTextLineWordCount(resultID, lineIdx);
                            for (int wordIdx = firstWordIdx; wordIdx < firstWordIdx + wordCount; wordIdx++)
                            {
                                GdPictureOcrWord word = new GdPictureOcrWord()
                                {
                                    Characters = new List<GdPictureOcrCharacter>()
                                };
                                ((List<GdPictureOcrWord>)line.Words).Add(word);
                                int firstCharacterIdx = gdpictureOCR.GetWordFirstCharacterIndex(resultID, wordIdx);
                                int characterCount = gdpictureOCR.GetWordCharacterCount(resultID, wordIdx);
                                for (int characterIdx = firstCharacterIdx; characterIdx < firstCharacterIdx + characterCount; characterIdx++)
                                {
                                    int characterLeft = gdpictureOCR.GetCharacterLeft(resultID, characterIdx);
                                    int characterTop = gdpictureOCR.GetCharacterTop(resultID, characterIdx);
                                    int characterRight = gdpictureOCR.GetCharacterRight(resultID, characterIdx);
                                    int characterBottom = gdpictureOCR.GetCharacterBottom(resultID, characterIdx);
                                    GdPictureOcrCharacter character = new GdPictureOcrCharacter()
                                    {
                                        BBox = new GdPictureOcrRect(characterLeft, characterTop, characterRight, characterBottom),
                                        Value = gdpictureOCR.GetCharacterValue(resultID, characterIdx)
                                    };
                                    ((List<GdPictureOcrCharacter>)word.Characters).Add(character);
                                }
                            }
                        }
                    }
                    ResultEncoding = "json";
                    OcrResult = JsonConvert.SerializeObject(ocrResult);
                }
                else
                {
                    ResultEncoding = OcrResult = null;
                }
            }
        }

 

 Supported models for serialization

Model name: "gdpictureocr-json".

Model information: the model is not public. To obtain serialized data from such model the method GetSerializedResult of the GdPictureOcr class must be used.

 


 

Model name: "json".

Model information: the provided data must be an enumeration of paragraphs containing lines containing words containing characters.

Model definition (CSharp):

Copy Code
    /// <summary>
    /// The OcrResult class manages the ocr result.
    /// </summary>
    [Serializable]
    public sealed class GdPictureOcrResult
    {
        /// <summary>
        /// The standard rotation applied to the page before starting the OCR process.
        /// Accepted values are 0, 90, 180 and 270.
        /// </summary>
        public int PageRotation;

        /// <summary>
        /// The detected page skew angle, in degrees, clockwise.
        /// </summary>
        public float PageSkewAngle;

        /// <summary>
        /// The paragraphs of the page.
        /// </summary>
        public IEnumerable<GdPictureOcrParagraph> Paragraphs;
    }

    [Serializable]
    public sealed class GdPictureOcrParagraph
    {
        /// <summary>
        /// The standard rotation of the paragraph.
        /// Accepted values are 0, 90, 180 and 270.
        /// </summary>
        public int ParagraphRotation;

        /// <summary>
        /// The text writing direction.
        /// Supported values are: 0 for left to right, 1 for right to left, 2 for top to bottom.
        public int TextWritingDirection;

        /// <summary>
        /// The lines of the paragraph.
        /// </summary>
        public IEnumerable<GdPictureOcrLine> Lines;
    }

    [Serializable]
    public sealed class GdPictureOcrLine
    {
        /// <summary>
        /// The words of the line.
        /// </summary>
        public IEnumerable<GdPictureOcrWord> Words;
    }

    [Serializable]
    public sealed class GdPictureOcrWord
    {
        /// <summary>
        /// The bounding box.
        /// It is not mandatory to provide it since it can be computed from character boxes.
        /// </summary>
        public GdPictureOcrRect BBox;

        /// <summary>
        /// The characters of the word.
        /// </summary>
        public IEnumerable<GdPictureOcrCharacter> Characters;
    }

    [Serializable]
    public sealed class GdPictureOcrCharacter
    {
        /// <summary>
        /// The bounding box.
        /// </summary>
        public GdPictureOcrRect BBox;

        /// <summary>
        /// The character value.
        /// </summary>
        public char Value;
    }

    [Serializable]
    public sealed class GdPictureOcrRect
    {
        public int Left;
        public int Top;
        public int Right;
        public int Bottom;

        public GdPictureOcrRect(int Left, int Top, int Right, int Bottom)
        {
            this.Left = Left;
            this.Top = Top;
            this.Right = Right;
            this.Bottom = Bottom;
        }
    }