using IronOcr;
using System.Drawing; //for image export
// We can delve deep into OCR results as an object model of
// Pages, Barcodes, Paragraphs, Lines, Words and Characters
// This allows us to explore, export and draw OCR content using other APIs/
var Ocr = new IronTesseract();
Ocr.Configuration.EngineMode = TesseractEngineMode.TesseractAndLstm;
Ocr.Configuration.ReadBarCodes = true;
using (var Input = new OcrInput(@"example.tiff"))
{
OcrResult Result = Ocr.Read(Input);
foreach (var Page in Result.Pages)
{
// Page object
int PageNumber = Page.PageNumber;
string PageText = Page.Text;
int PageWordCount = Page.WordCount;
// null if we dont set Ocr.Configuration.ReadBarCodes = true;
OcrResult.Barcode[] Barcodes = Page.Barcodes;
System.Drawing.Bitmap PageImage = Page.ToBitmap(Input);
int PageWidth = Page.Width;
int PageHeight = Page.Height;
int PagRotation = Page.Rotation; // angular correction in degrees from OcrInput.Deskew()
foreach (var Paragraph in Page.Paragraphs)
{
// Pages -> Paragraphs
int ParagraphNumber = Paragraph.ParagraphNumber;
String ParagraphText = Paragraph.Text;
System.Drawing.Bitmap ParagraphImage = Paragraph.ToBitmap(Input);
int ParagraphX_location = Paragraph.X;
int ParagraphY_location = Paragraph.Y;
int ParagraphWidth = Paragraph.Width;
int ParagraphHeight = Paragraph.Height;
double ParagraphOcrAccuracy = Paragraph.Confidence;
OcrResult.TextFlow paragrapthText_direction = Paragraph.TextDirection;
foreach (var Line in Paragraph.Lines)
{
// Pages -> Paragraphs -> Lines
int LineNumber = Line.LineNumber;
String LineText = Line.Text;
System.Drawing.Bitmap LineImage = Line.ToBitmap(Input); ;
int LineX_location = Line.X;
int LineY_location = Line.Y;
int LineWidth = Line.Width;
int LineHeight = Line.Height;
double LineOcrAccuracy = Line.Confidence;
double LineSkew = Line.BaselineAngle;
double LineOffset = Line.BaselineOffset;
foreach (var Word in Line.Words)
{
// Pages -> Paragraphs -> Lines -> Words
int WordNumber = Word.WordNumber;
String WordText = Word.Text;
System.Drawing.Image WordImage = Word.ToBitmap(Input);
int WordX_location = Word.X;
int WordY_location = Word.Y;
int WordWidth = Word.Width;
int WordHeight = Word.Height;
double WordOcrAccuracy = Word.Confidence;
if (Word.Font != null)
{
// Word.Font is only set when using Tesseract Engine Modes rather than LTSM
String FontName = Word.Font.FontName;
double FontSize = Word.Font.FontSize;
bool IsBold = Word.Font.IsBold;
bool IsFixedWidth = Word.Font.IsFixedWidth;
bool IsItalic = Word.Font.IsItalic;
bool IsSerif = Word.Font.IsSerif;
bool IsUnderLined = Word.Font.IsUnderlined;
bool IsFancy = Word.Font.IsCaligraphic;
}
foreach (var Character in Word.Characters)
{
// Pages -> Paragraphs -> Lines -> Words -> Characters
int CharacterNumber = Character.CharacterNumber;
String CharacterText = Character.Text;
System.Drawing.Bitmap CharacterImage = Character.ToBitmap(Input);
int CharacterX_location = Character.X;
int CharacterY_location = Character.Y;
int CharacterWidth = Character.Width;
int CharacterHeight = Character.Height;
double CharacterOcrAccuracy = Character.Confidence;
// Output alternative symbols choices and their probability.
// Very useful for spellchecking
OcrResult.Choice[] Choices = Character.Choices;
}
}
}
}
}
}
Imports IronOcr
Imports System.Drawing 'for image export
' We can delve deep into OCR results as an object model of
' Pages, Barcodes, Paragraphs, Lines, Words and Characters
' This allows us to explore, export and draw OCR content using other APIs/
Private Ocr = New IronTesseract()
Ocr.Configuration.EngineMode = TesseractEngineMode.TesseractAndLstm
Ocr.Configuration.ReadBarCodes = True
Using Input = New OcrInput("example.tiff")
Dim Result As OcrResult = Ocr.Read(Input)
For Each Page In Result.Pages
' Page object
Dim PageNumber As Integer = Page.PageNumber
Dim PageText As String = Page.Text
Dim PageWordCount As Integer = Page.WordCount
' null if we dont set Ocr.Configuration.ReadBarCodes = true;
Dim Barcodes() As OcrResult.Barcode = Page.Barcodes
Dim PageImage As System.Drawing.Bitmap = Page.ToBitmap(Input)
Dim PageWidth As Integer = Page.Width
Dim PageHeight As Integer = Page.Height
Dim PagRotation As Integer = Page.Rotation ' angular correction in degrees from OcrInput.Deskew()
For Each Paragraph In Page.Paragraphs
' Pages -> Paragraphs
Dim ParagraphNumber As Integer = Paragraph.ParagraphNumber
Dim ParagraphText As String = Paragraph.Text
Dim ParagraphImage As System.Drawing.Bitmap = Paragraph.ToBitmap(Input)
Dim ParagraphX_location As Integer = Paragraph.X
Dim ParagraphY_location As Integer = Paragraph.Y
Dim ParagraphWidth As Integer = Paragraph.Width
Dim ParagraphHeight As Integer = Paragraph.Height
Dim ParagraphOcrAccuracy As Double = Paragraph.Confidence
Dim paragrapthText_direction As OcrResult.TextFlow = Paragraph.TextDirection
For Each Line In Paragraph.Lines
' Pages -> Paragraphs -> Lines
Dim LineNumber As Integer = Line.LineNumber
Dim LineText As String = Line.Text
Dim LineImage As System.Drawing.Bitmap = Line.ToBitmap(Input)
Dim LineX_location As Integer = Line.X
Dim LineY_location As Integer = Line.Y
Dim LineWidth As Integer = Line.Width
Dim LineHeight As Integer = Line.Height
Dim LineOcrAccuracy As Double = Line.Confidence
Dim LineSkew As Double = Line.BaselineAngle
Dim LineOffset As Double = Line.BaselineOffset
For Each Word In Line.Words
' Pages -> Paragraphs -> Lines -> Words
Dim WordNumber As Integer = Word.WordNumber
Dim WordText As String = Word.Text
Dim WordImage As System.Drawing.Image = Word.ToBitmap(Input)
Dim WordX_location As Integer = Word.X
Dim WordY_location As Integer = Word.Y
Dim WordWidth As Integer = Word.Width
Dim WordHeight As Integer = Word.Height
Dim WordOcrAccuracy As Double = Word.Confidence
If Word.Font IsNot Nothing Then
' Word.Font is only set when using Tesseract Engine Modes rather than LTSM
Dim FontName As String = Word.Font.FontName
Dim FontSize As Double = Word.Font.FontSize
Dim IsBold As Boolean = Word.Font.IsBold
Dim IsFixedWidth As Boolean = Word.Font.IsFixedWidth
Dim IsItalic As Boolean = Word.Font.IsItalic
Dim IsSerif As Boolean = Word.Font.IsSerif
Dim IsUnderLined As Boolean = Word.Font.IsUnderlined
Dim IsFancy As Boolean = Word.Font.IsCaligraphic
End If
For Each Character In Word.Characters
' Pages -> Paragraphs -> Lines -> Words -> Characters
Dim CharacterNumber As Integer = Character.CharacterNumber
Dim CharacterText As String = Character.Text
Dim CharacterImage As System.Drawing.Bitmap = Character.ToBitmap(Input)
Dim CharacterX_location As Integer = Character.X
Dim CharacterY_location As Integer = Character.Y
Dim CharacterWidth As Integer = Character.Width
Dim CharacterHeight As Integer = Character.Height
Dim CharacterOcrAccuracy As Double = Character.Confidence
' Output alternative symbols choices and their probability.
' Very useful for spellchecking
Dim Choices() As OcrResult.Choice = Character.Choices
Next Character
Next Word
Next Line
Next Paragraph
Next Page
End Using