Free for development. Licenses from $399
// Nuget PM> Install Package IronOcr
using IronOcr;
var Result = new IronTesseract().Read(@"images\image.png").Text;
' Nuget PM> Install Package IronOcr
Imports IronOcr
Private Result = (New IronTesseract()).Read("images\image.png").Text
using IronOcr;
var Ocr = new IronTesseract();
using (var Input = new OcrInput())
{
// OCR entire document
Input.AddPdf("example.pdf", "password");
// Alternatively OCR selected page numbers
Input.AddPdfPages("example.pdf", new[] { 1, 2, 3 }, "password");
var Result = Ocr.Read(Input);
Console.WriteLine(Result.Text);
}
Imports IronOcr
Private Ocr = New IronTesseract()
Using Input = New OcrInput()
' OCR entire document
Input.AddPdf("example.pdf", "password")
' Alternatively OCR selected page numbers
Input.AddPdfPages("example.pdf", { 1, 2, 3 }, "password")
Dim Result = Ocr.Read(Input)
Console.WriteLine(Result.Text)
End Using
var Ocr = new IronTesseract();
Ocr.Configuration.ReadBarCodes = true;
using (var Input = new OcrInput(@"images\imageWithBarcode.png"))
{
var Result = Ocr.Read(Input);
foreach (var barcode in Result.Barcodes) {
Console.WriteLine(barcode.Value);
}
}
Dim Ocr = New IronTesseract()
Ocr.Configuration.ReadBarCodes = True
Using Input = New OcrInput("images\imageWithBarcode.png")
Dim Result = Ocr.Read(Input)
For Each barcode In Result.Barcodes
Console.WriteLine(barcode.Value)
Next barcode
End Using
//PM> Install Package IronOcr.Languages.Arabic
using IronOcr;
var Ocr = new IronTesseract();
Ocr.Language = OcrLanguage.Arabic;
using (var Input = new OcrInput(@"images\arabic.gif"))
{
var Result = Ocr.Read(Input);
Console.WriteLine(Result.Text);
}
///A more advanced Example
var Ocr2 = new IronTesseract();
Ocr2.UseCustomTesseractLanguageFile("custom_tesseract_files/custom.traineddata");
Ocr2.AddSecondaryLanguage(OcrLanguage.EnglishBest);
using (var Input = new OcrInput(@"images\mixed-lang.pdf"))
{
var Result = Ocr2.Read(Input);
Console.WriteLine(Result.Text);
}
'PM> Install Package IronOcr.Languages.Arabic
Imports IronOcr
Private Ocr = New IronTesseract()
Ocr.Language = OcrLanguage.Arabic
Using Input = New OcrInput("images\arabic.gif")
Dim Result = Ocr.Read(Input)
Console.WriteLine(Result.Text)
End Using
'''A more advanced Example
Dim Ocr2 = New IronTesseract()
Ocr2.UseCustomTesseractLanguageFile("custom_tesseract_files/custom.traineddata")
Ocr2.AddSecondaryLanguage(OcrLanguage.EnglishBest)
Using Input = New OcrInput("images\mixed-lang.pdf")
Dim Result = Ocr2.Read(Input)
Console.WriteLine(Result.Text)
End Using
using IronOcr;
var Ocr = new IronTesseract();
using (var Input = new OcrInput(@"images\image.png"))
{
Input.Deskew();
// Input.DeNoise(); // only use if accuracy <97%
var Result = Ocr.Read(Input);
Console.WriteLine(Result.Text);
}
Imports IronOcr
Private Ocr = New IronTesseract()
Using Input = New OcrInput("images\image.png")
Input.Deskew()
' Input.DeNoise(); // only use if accuracy <97%
Dim Result = Ocr.Read(Input)
Console.WriteLine(Result.Text)
End Using
var Ocr = new IronTesseract();
// Fast Dictionary
Ocr.Language = OcrLanguage.EnglishFast;
// Latest Engine
Ocr.Configuration.TesseractVersion = TesseractVersion.Tesseract5;
//AI OCR only without font analysis
Ocr.Configuration.EngineMode = TesseractEngineMode.LstmOnly;
//Turn off unneeded options
Ocr.Configuration.ReadBarCodes = false;
Ocr.Configuration.RenderSearchablePdfsAndHocr = false;
// Assume text is laid out neatly in an orthagonal document
Ocr.Configuration.PageSegmentationMode = TesseractPageSegmentationMode.Auto;
using (var Input = new OcrInput(@"images\image.png"))
{
var Result = Ocr.Read(Input);
Console.WriteLine(Result.Text);
}
Dim Ocr = New IronTesseract()
' Fast Dictionary
Ocr.Language = OcrLanguage.EnglishFast
' Latest Engine
Ocr.Configuration.TesseractVersion = TesseractVersion.Tesseract5
'AI OCR only without font analysis
Ocr.Configuration.EngineMode = TesseractEngineMode.LstmOnly
'Turn off unneeded options
Ocr.Configuration.ReadBarCodes = False
Ocr.Configuration.RenderSearchablePdfsAndHocr = False
' Assume text is laid out neatly in an orthagonal document
Ocr.Configuration.PageSegmentationMode = TesseractPageSegmentationMode.Auto
Using Input = New OcrInput("images\image.png")
Dim Result = Ocr.Read(Input)
Console.WriteLine(Result.Text)
End Using
using IronOcr;
var Ocr = new IronTesseract();
using (var Input = new OcrInput(@"images\image.png"))
{
Input.WithTitle("My Document");
Input.Binarize();
Input.Contrast();
Input.Deskew();
Input.DeNoise();
Input.Dilate();
Input.EnhanceResolution(300);
Input.Invert();
Input.Rotate(90);
Input.Scale(150); // or Input.Scale(3000, 2000);
Input.Sharpen();
Input.ToGrayScale();
// you don't need all of them
// most users only need Deskew() and occasionally DeNoise()
var Result = Ocr.Read(Input);
Console.WriteLine(Result.Text);
}
Imports IronOcr
Private Ocr = New IronTesseract()
Using Input = New OcrInput("images\image.png")
Input.WithTitle("My Document")
Input.Binarize()
Input.Contrast()
Input.Deskew()
Input.DeNoise()
Input.Dilate()
Input.EnhanceResolution(300)
Input.Invert()
Input.Rotate(90)
Input.Scale(150) ' or Input.Scale(3000, 2000);
Input.Sharpen()
Input.ToGrayScale()
' you don't need all of them
' most users only need Deskew() and occasionally DeNoise()
Dim Result = Ocr.Read(Input)
Console.WriteLine(Result.Text)
End Using
using IronOcr;
using System.Drawing; //for image export
// We can delve deep into OCR results as an object model of
// Pages, Barcodes, Paragraphs, Lines, Words and Characters
// This allows us to explore, export and draw OCR content using other APIs/
var Ocr = new IronTesseract();
Ocr.Configuration.EngineMode = TesseractEngineMode.TesseractAndLstm;
Ocr.Configuration.ReadBarCodes = true;
using (var Input = new OcrInput(@"example.tiff"))
{
OcrResult Result = Ocr.Read(Input);
foreach (var Page in Result.Pages)
{
// Page object
int PageNumber = Page.PageNumber;
string PageText = Page.Text;
int PageWordCount = Page.WordCount;
// null if we dont set Ocr.Configuration.ReadBarCodes = true;
OcrResult.Barcode[] Barcodes = Page.Barcodes;
System.Drawing.Bitmap PageImage = Page.ToBitmap(Input);
int PageWidth = Page.Width;
int PageHeight = Page.Height;
foreach (var Paragraph in Page.Paragraphs)
{
// Pages -> Paragraphs
int ParagraphNumber = Paragraph.ParagraphNumber;
String ParagraphText = Paragraph.Text;
System.Drawing.Bitmap ParagraphImage = Paragraph.ToBitmap(Input);
int ParagraphX_location = Paragraph.X;
int ParagraphY_location = Paragraph.Y;
int ParagraphWidth = Paragraph.Width;
int ParagraphHeight = Paragraph.Height;
double ParagraphOcrAccuracy = Paragraph.Confidence;
OcrResult.TextFlow paragrapthText_direction = Paragraph.TextDirection;
foreach (var Line in Paragraph.Lines)
{
// Pages -> Paragraphs -> Lines
int LineNumber = Line.LineNumber;
String LineText = Line.Text;
System.Drawing.Bitmap LineImage = Line.ToBitmap(Input); ;
int LineX_location = Line.X;
int LineY_location = Line.Y;
int LineWidth = Line.Width;
int LineHeight = Line.Height;
double LineOcrAccuracy = Line.Confidence;
double LineSkew = Line.BaselineAngle;
double LineOffset = Line.BaselineOffset;
foreach (var Word in Line.Words)
{
// Pages -> Paragraphs -> Lines -> Words
int WordNumber = Word.WordNumber;
String WordText = Word.Text;
System.Drawing.Image WordImage = Word.ToBitmap(Input);
int WordX_location = Word.X;
int WordY_location = Word.Y;
int WordWidth = Word.Width;
int WordHeight = Word.Height;
double WordOcrAccuracy = Word.Confidence;
if (Word.Font != null)
{
// Word.Font is only set when using Tesseract Engine Modes rather than LTSM
String FontName = Word.Font.FontName;
double FontSize = Word.Font.FontSize;
bool IsBold = Word.Font.IsBold;
bool IsFixedWidth = Word.Font.IsFixedWidth;
bool IsItalic = Word.Font.IsItalic;
bool IsSerif = Word.Font.IsSerif;
bool IsUnderLined = Word.Font.IsUnderlined;
bool IsFancy = Word.Font.IsCaligraphic;
}
foreach (var Character in Word.Characters)
{
// Pages -> Paragraphs -> Lines -> Words -> Characters
int CharacterNumber = Character.CharacterNumber;
String CharacterText = Character.Text;
System.Drawing.Bitmap CharacterImage = Character.ToBitmap(Input);
int CharacterX_location = Character.X;
int CharacterY_location = Character.Y;
int CharacterWidth = Character.Width;
int CharacterHeight = Character.Height;
double CharacterOcrAccuracy = Character.Confidence;
// Output alternative symbols choices and their probability.
// Very useful for spellchecking
OcrResult.Choice[] Choices = Character.Choices;
}
}
}
}
}
}
Imports IronOcr
Imports System.Drawing 'for image export
' We can delve deep into OCR results as an object model of
' Pages, Barcodes, Paragraphs, Lines, Words and Characters
' This allows us to explore, export and draw OCR content using other APIs/
Private Ocr = New IronTesseract()
Ocr.Configuration.EngineMode = TesseractEngineMode.TesseractAndLstm
Ocr.Configuration.ReadBarCodes = True
Using Input = New OcrInput("example.tiff")
Dim Result As OcrResult = Ocr.Read(Input)
For Each Page In Result.Pages
' Page object
Dim PageNumber As Integer = Page.PageNumber
Dim PageText As String = Page.Text
Dim PageWordCount As Integer = Page.WordCount
' null if we dont set Ocr.Configuration.ReadBarCodes = true;
Dim Barcodes() As OcrResult.Barcode = Page.Barcodes
Dim PageImage As System.Drawing.Bitmap = Page.ToBitmap(Input)
Dim PageWidth As Integer = Page.Width
Dim PageHeight As Integer = Page.Height
For Each Paragraph In Page.Paragraphs
' Pages -> Paragraphs
Dim ParagraphNumber As Integer = Paragraph.ParagraphNumber
Dim ParagraphText As String = Paragraph.Text
Dim ParagraphImage As System.Drawing.Bitmap = Paragraph.ToBitmap(Input)
Dim ParagraphX_location As Integer = Paragraph.X
Dim ParagraphY_location As Integer = Paragraph.Y
Dim ParagraphWidth As Integer = Paragraph.Width
Dim ParagraphHeight As Integer = Paragraph.Height
Dim ParagraphOcrAccuracy As Double = Paragraph.Confidence
Dim paragrapthText_direction As OcrResult.TextFlow = Paragraph.TextDirection
For Each Line In Paragraph.Lines
' Pages -> Paragraphs -> Lines
Dim LineNumber As Integer = Line.LineNumber
Dim LineText As String = Line.Text
Dim LineImage As System.Drawing.Bitmap = Line.ToBitmap(Input)
Dim LineX_location As Integer = Line.X
Dim LineY_location As Integer = Line.Y
Dim LineWidth As Integer = Line.Width
Dim LineHeight As Integer = Line.Height
Dim LineOcrAccuracy As Double = Line.Confidence
Dim LineSkew As Double = Line.BaselineAngle
Dim LineOffset As Double = Line.BaselineOffset
For Each Word In Line.Words
' Pages -> Paragraphs -> Lines -> Words
Dim WordNumber As Integer = Word.WordNumber
Dim WordText As String = Word.Text
Dim WordImage As System.Drawing.Image = Word.ToBitmap(Input)
Dim WordX_location As Integer = Word.X
Dim WordY_location As Integer = Word.Y
Dim WordWidth As Integer = Word.Width
Dim WordHeight As Integer = Word.Height
Dim WordOcrAccuracy As Double = Word.Confidence
If Word.Font IsNot Nothing Then
' Word.Font is only set when using Tesseract Engine Modes rather than LTSM
Dim FontName As String = Word.Font.FontName
Dim FontSize As Double = Word.Font.FontSize
Dim IsBold As Boolean = Word.Font.IsBold
Dim IsFixedWidth As Boolean = Word.Font.IsFixedWidth
Dim IsItalic As Boolean = Word.Font.IsItalic
Dim IsSerif As Boolean = Word.Font.IsSerif
Dim IsUnderLined As Boolean = Word.Font.IsUnderlined
Dim IsFancy As Boolean = Word.Font.IsCaligraphic
End If
For Each Character In Word.Characters
' Pages -> Paragraphs -> Lines -> Words -> Characters
Dim CharacterNumber As Integer = Character.CharacterNumber
Dim CharacterText As String = Character.Text
Dim CharacterImage As System.Drawing.Bitmap = Character.ToBitmap(Input)
Dim CharacterX_location As Integer = Character.X
Dim CharacterY_location As Integer = Character.Y
Dim CharacterWidth As Integer = Character.Width
Dim CharacterHeight As Integer = Character.Height
Dim CharacterOcrAccuracy As Double = Character.Confidence
' Output alternative symbols choices and their probability.
' Very useful for spellchecking
Dim Choices() As OcrResult.Choice = Character.Choices
Next Character
Next Word
Next Line
Next Paragraph
Next Page
End Using
using IronOcr;
var Ocr = new IronTesseract();
using (var Input = new OcrInput())
{
Input.Add(@"images\page1.png")
Input.Add(@"images\page2.bmp")
Input.Add(@"images\page3.tiff")
Input.Deskew();
var Result = Ocr.Read(Input);
Result.SaveAsSearchablePdf("searchable.pdf");
}
Imports IronOcr
Private Ocr = New IronTesseract()
Using Input = New OcrInput()
Input.Add("images\page1.png") Input.Add("images\page2.bmp") Input.Add("images\page3.tiff") Input.Deskew()
Dim Result = Ocr.Read(Input)
Result.SaveAsSearchablePdf("searchable.pdf")
End Using
using IronOcr;
//..
var Ocr = new IronTesseract();
Ocr.Configuration.TesseractVersion = TesseractVersion.Tesseract5;
// This is default
using (var Input = new OcrInput(@"images\image.png"))
{
var Result = Ocr.Read(Input);
Console.WriteLine(Result.Text);
}
Imports IronOcr
'..
Private Ocr = New IronTesseract()
Ocr.Configuration.TesseractVersion = TesseractVersion.Tesseract5
' This is default
Using Input = New OcrInput("images\image.png")
Dim Result = Ocr.Read(Input)
Console.WriteLine(Result.Text)
End Using
Install-Package IronOcr
Have a question? Get in touch with our development team.