Read Table in Document

This code example demonstrates how to use the IronTesseract OCR engine to extract text and table data from a PDF document.

  1. An instance of the IronTesseract OCR engine is created.
  2. An OcrInput object is initialized, and a PDF file ("table.pdf") is loaded using the LoadPdf method.
  3. The OCR engine processes the document using the ReadDocumentAdvanced method, which returns a more detailed OcrResult object.
  4. The first table found in the document is accessed using result.Tables.First(), and the cell information for that table is extracted with CellInfos.
  5. The list of cell data (cellList) now contains the table's cells, including the text content and other details (e.g., cell position, size).
  6. This method is useful for extracting structured data like tables from PDFs, allowing the text within each table cell to be programmatically accessed and processed.
using IronOcr; // Include the IronOcr namespace for OCR functionality
using System.Linq; // Include System.Linq for using LINQ methods

class Program
{
    static void Main()
    {
        // Create an instance of the IronTesseract OCR engine
        var Ocr = new IronTesseract();

        // Initialize an OcrInput object and load the PDF file
        using var Input = new OcrInput();
        Input.LoadPdf("table.pdf");

        // Process the document to obtain a detailed OcrResult object
        var Result = Ocr.ReadDocumentAdvanced(Input);

        // Access the first table found in the document
        if (Result.Tables.Any()) // Ensure there's at least one table detected
        {
            var firstTable = Result.Tables.First();

            // Extract the cell information from the first table
            var cellList = firstTable.CellInfos;

            // Iterate over each cell and display its text content and details
            foreach (var cell in cellList)
            {
                Console.WriteLine($"Cell Text: {cell.Text}");
                Console.WriteLine($"Position: {cell.Bounds}");
                Console.WriteLine($"Size: {cell.Size}");
            }
        }
    }
}
using IronOcr; // Include the IronOcr namespace for OCR functionality
using System.Linq; // Include System.Linq for using LINQ methods

class Program
{
    static void Main()
    {
        // Create an instance of the IronTesseract OCR engine
        var Ocr = new IronTesseract();

        // Initialize an OcrInput object and load the PDF file
        using var Input = new OcrInput();
        Input.LoadPdf("table.pdf");

        // Process the document to obtain a detailed OcrResult object
        var Result = Ocr.ReadDocumentAdvanced(Input);

        // Access the first table found in the document
        if (Result.Tables.Any()) // Ensure there's at least one table detected
        {
            var firstTable = Result.Tables.First();

            // Extract the cell information from the first table
            var cellList = firstTable.CellInfos;

            // Iterate over each cell and display its text content and details
            foreach (var cell in cellList)
            {
                Console.WriteLine($"Cell Text: {cell.Text}");
                Console.WriteLine($"Position: {cell.Bounds}");
                Console.WriteLine($"Size: {cell.Size}");
            }
        }
    }
}
Imports IronOcr ' Include the IronOcr namespace for OCR functionality
Imports System.Linq ' Include System.Linq for using LINQ methods

Friend Class Program
	Shared Sub Main()
		' Create an instance of the IronTesseract OCR engine
		Dim Ocr = New IronTesseract()

		' Initialize an OcrInput object and load the PDF file
		Dim Input = New OcrInput()
		Input.LoadPdf("table.pdf")

		' Process the document to obtain a detailed OcrResult object
		Dim Result = Ocr.ReadDocumentAdvanced(Input)

		' Access the first table found in the document
		If Result.Tables.Any() Then ' Ensure there's at least one table detected
			Dim firstTable = Result.Tables.First()

			' Extract the cell information from the first table
			Dim cellList = firstTable.CellInfos

			' Iterate over each cell and display its text content and details
			For Each cell In cellList
				Console.WriteLine($"Cell Text: {cell.Text}")
				Console.WriteLine($"Position: {cell.Bounds}")
				Console.WriteLine($"Size: {cell.Size}")
			Next cell
		End If
	End Sub
End Class
$vbLabelText   $csharpLabel