A bitwise combination of values of the TextExtractionOptions enumeration. Specifies required options for both text search and text extraction.
Example





In This Topic
GdPicture14 Namespace / GdPicturePDF Class / SetTextExtractionOptions Method

SetTextExtractionOptions Method (GdPicturePDF)

In This Topic
Sets the various settings of the text search/extraction engine for further use when searching or extracting text from the currently loaded PDF document.

Please note that you need to create or load the PDF document to allow these settings to work properly. At the same, the settings are reset to an undefined value (TextExtractionOptions.Default), when creating or loading a new PDF document.

Syntax
'Declaration

 

Public Sub SetTextExtractionOptions( _

   ByVal Options As TextExtractionOptions _

) 
public void SetTextExtractionOptions( 

   TextExtractionOptions Options

)
public procedure SetTextExtractionOptions( 

    Options: TextExtractionOptions

); 
public function SetTextExtractionOptions( 

   Options : TextExtractionOptions

);
public: void SetTextExtractionOptions( 

   TextExtractionOptions Options

) 
public:

void SetTextExtractionOptions( 

   TextExtractionOptions Options

) 

Parameters

Options
A bitwise combination of values of the TextExtractionOptions enumeration. Specifies required options for both text search and text extraction.
Remarks
It is recommend to use the GetStat method to identify the specific reason for the method's failure, if any. Please ensure that you have successfully created or loaded a PDF document, otherwise the method does nothing.

Likewise, the defined settings are only valid when processing (searching or extracting text) currently loaded PDF document. They are reset to an undefined value, when creating or loading a new PDF document.

Example
How to extract text of all pages in the PDF document to a text file using custom text extraction engine options.
Dim caption As String = "Example: SetTextExtractionOptions"

Dim gdpicturePDF As New GdPicturePDF()

Dim status As GdPictureStatus = gdpicturePDF.LoadFromFile("test.pdf", False)

If status = GdPictureStatus.OK Then

    Dim text_file As New System.IO.StreamWriter("text_from_pages.txt")

    Dim pageCount As Integer = gdpicturePDF.GetPageCount()

    status = gdpicturePDF.GetStat()

    If status = GdPictureStatus.OK Then

        Dim message As String = Nothing

        Dim page_text As String = Nothing

        oGDPicturePDF.SetTextExtractionOptions(TextExtractionOptions.ExactWordLineMatching)

        For i As Integer = 1 To pageCount

            status = gdpicturePDF.SelectPage(i)

            If status = GdPictureStatus.OK Then

                page_text = gdpicturePDF.GetPageText()

                status = gdpicturePDF.GetStat()

                If status = GdPictureStatus.OK Then

                    message = "Page: " + i.ToString() + " Status: " + status.ToString()

                    MessageBox.Show(message, caption)

                    text_file.WriteLine(message)

                    text_file.WriteLine(page_text)

                End If

            Else

                MessageBox.Show("The SelectPage() method has failed with the status: " + status.ToString(), caption)

            End If

        Next

    Else

        MessageBox.Show("The GetPageCount() method has failed with the status: " + status.ToString(), caption)

    End If

    text_file.Close()

Else

    MessageBox.Show("The file can't be loaded.", caption)

End If

gdpicturePDF.Dispose()
string caption = "Example: SetTextExtractionOptions";

GdPicturePDF gdpicturePDF = new GdPicturePDF();

GdPictureStatus status = gdpicturePDF.LoadFromFile("test.pdf", false);

if (status == GdPictureStatus.OK)

{

    System.IO.StreamWriter text_file = new System.IO.StreamWriter("text_from_pages.txt");

    int pageCount = gdpicturePDF.GetPageCount();

    status = gdpicturePDF.GetStat();

    if (status == GdPictureStatus.OK)

    {

        string message = null;

        string page_text = null;

        oGDPicturePDF.SetTextExtractionOptions(TextExtractionOptions.ExactWordLineMatching);

        for (int i = 1; i <= pageCount; i++)

        {

            status = gdpicturePDF.SelectPage(i);

            if (status == GdPictureStatus.OK)

            {

                page_text = gdpicturePDF.GetPageText();

                status = gdpicturePDF.GetStat();

                if (status == GdPictureStatus.OK)

                {

                    message = "Page: " + i.ToString() + " Status: " + status.ToString();

                    MessageBox.Show(message, caption);

                    text_file.WriteLine(message);

                    text_file.WriteLine(page_text);

                }

            }

            else

            {

                MessageBox.Show("The SelectPage() method has failed with the status: " + status.ToString(), caption);

            }

        }

    }

    else

    {

        MessageBox.Show("The GetPageCount() method has failed with the status: " + status.ToString(), caption);

    }

    text_file.Close();

}

else

{

    MessageBox.Show("The file can't be loaded.", caption);

}

gdpicturePDF.Dispose();
See Also