GetPageText Method (GdPicturePDF)
In This Topic
Returns the whole text, regardless if visible or hidden, of the current page of the loaded PDF document.
Just to inform you, that you can use the GuessPageTextRotation method to determine if the presented text is rotated on the current page.
Syntax
'Declaration
Public Function GetPageText() As String
public string GetPageText()
public function GetPageText(): String;
public function GetPageText() : String;
public: string* GetPageText();
public:
String^ GetPageText();
Return Value
The whole text of the currently selected page as a string. The
GetStat method can be subsequently used to determine if this method has been successful.
Example
How to extract text of all pages in the PDF document to a text file.
Dim caption As String = "Example: GetPageText"
Dim gdpicturePDF As New GdPicturePDF()
Dim status As GdPictureStatus = gdpicturePDF.LoadFromFile("test.pdf", False)
If status = GdPictureStatus.OK Then
Dim text_file As New System.IO.StreamWriter("text_from_pages.txt")
Dim pageCount As Integer = gdpicturePDF.GetPageCount()
status = gdpicturePDF.GetStat()
If status = GdPictureStatus.OK Then
Dim message As String = Nothing
Dim page_text As String = Nothing
For i As Integer = 1 To pageCount
status = gdpicturePDF.SelectPage(i)
If status = GdPictureStatus.OK Then
page_text = gdpicturePDF.GetPageText()
status = gdpicturePDF.GetStat()
If status = GdPictureStatus.OK Then
message = "Page: " + i.ToString() + " Status: " + status.ToString()
MessageBox.Show(message, caption)
text_file.WriteLine(message)
text_file.WriteLine(page_text)
End If
Else
MessageBox.Show("The SelectPage() method has failed with the status: " + status.ToString(), caption)
End If
Next
Else
MessageBox.Show("The GetPageCount() method has failed with the status: " + status.ToString(), caption)
End If
text_file.Close()
Else
MessageBox.Show("The file can't be loaded.", caption)
End If
gdpicturePDF.Dispose()
string caption = "Example: GetPageText";
GdPicturePDF gdpicturePDF = new GdPicturePDF();
GdPictureStatus status = gdpicturePDF.LoadFromFile("test.pdf", false);
if (status == GdPictureStatus.OK)
{
System.IO.StreamWriter text_file = new System.IO.StreamWriter("text_from_pages.txt");
int pageCount = gdpicturePDF.GetPageCount();
status = gdpicturePDF.GetStat();
if (status == GdPictureStatus.OK)
{
string message = null;
string page_text = null;
for (int i = 1; i <= pageCount; i++)
{
status = gdpicturePDF.SelectPage(i);
if (status == GdPictureStatus.OK)
{
page_text = gdpicturePDF.GetPageText();
status = gdpicturePDF.GetStat();
if (status == GdPictureStatus.OK)
{
message = "Page: " + i.ToString() + " Status: " + status.ToString();
MessageBox.Show(message, caption);
text_file.WriteLine(message);
text_file.WriteLine(page_text);
}
}
else
{
MessageBox.Show("The SelectPage() method has failed with the status: " + status.ToString(), caption);
}
}
}
else
{
MessageBox.Show("The GetPageCount() method has failed with the status: " + status.ToString(), caption);
}
text_file.Close();
}
else
{
MessageBox.Show("The file can't be loaded.", caption);
}
gdpicturePDF.Dispose();
Example
How to extract text of all pages in the PDF document to a text file.
Dim caption As String = "Example: GetPageText"
Dim gdpicturePDF As New GdPicturePDF()
Dim status As GdPictureStatus = gdpicturePDF.LoadFromFile("test.pdf", False)
If status = GdPictureStatus.OK Then
Dim text_file As New System.IO.StreamWriter("text_from_pages.txt")
Dim pageCount As Integer = gdpicturePDF.GetPageCount()
status = gdpicturePDF.GetStat()
If status = GdPictureStatus.OK Then
Dim message As String = Nothing
Dim page_text As String = Nothing
For i As Integer = 1 To pageCount
status = gdpicturePDF.SelectPage(i)
If status = GdPictureStatus.OK Then
page_text = gdpicturePDF.GetPageText()
status = gdpicturePDF.GetStat()
If status = GdPictureStatus.OK Then
message = "Page: " + i.ToString() + " Status: " + status.ToString()
MessageBox.Show(message, caption)
text_file.WriteLine(message)
text_file.WriteLine(page_text)
End If
Else
MessageBox.Show("The SelectPage() method has failed with the status: " + status.ToString(), caption)
End If
Next
Else
MessageBox.Show("The GetPageCount() method has failed with the status: " + status.ToString(), caption)
End If
text_file.Close()
Else
MessageBox.Show("The file can't be loaded.", caption)
End If
gdpicturePDF.Dispose()
string caption = "Example: GetPageText";
GdPicturePDF gdpicturePDF = new GdPicturePDF();
GdPictureStatus status = gdpicturePDF.LoadFromFile("test.pdf", false);
if (status == GdPictureStatus.OK)
{
System.IO.StreamWriter text_file = new System.IO.StreamWriter("text_from_pages.txt");
int pageCount = gdpicturePDF.GetPageCount();
status = gdpicturePDF.GetStat();
if (status == GdPictureStatus.OK)
{
string message = null;
string page_text = null;
for (int i = 1; i <= pageCount; i++)
{
status = gdpicturePDF.SelectPage(i);
if (status == GdPictureStatus.OK)
{
page_text = gdpicturePDF.GetPageText();
status = gdpicturePDF.GetStat();
if (status == GdPictureStatus.OK)
{
message = "Page: " + i.ToString() + " Status: " + status.ToString();
MessageBox.Show(message, caption);
text_file.WriteLine(message);
text_file.WriteLine(page_text);
}
}
else
{
MessageBox.Show("The SelectPage() method has failed with the status: " + status.ToString(), caption);
}
}
}
else
{
MessageBox.Show("The GetPageCount() method has failed with the status: " + status.ToString(), caption);
}
text_file.Close();
}
else
{
MessageBox.Show("The file can't be loaded.", caption);
}
gdpicturePDF.Dispose();
See Also