Fixing Low Quality Scans & Images
The IronTesseract
OCR Class provides granular control to C# and .NET developers to add OCR (image and PDF to text) functionality to their applications, and also to fine-tune performance to their specific use cases.
By setting variables, a perfect balance between speed and accuracy can be achieved through working with real-world examples. Settings include: CleanBackgroundNoise
, EnhanceContrast
, EnhanceResolution
, Language
, Strategy
, RotateAndStraighten
, ColorSpace
, DetectWhiteTextOnDarkBackgrounds
, and InputImageType
.
This allows for the reading of low-quality scans which free Tesseract would not be able to handle.
// Sample C# code demonstrating how to use IronTesseract
using IronOcr;
public class OcrExample
{
public static void Main()
{
// Initialize the Tesseract OCR engine
var Ocr = new IronTesseract();
// Sample settings to enhance OCR performance:
// Cleans background noise that may interfere with text recognition
Ocr.Configuration.CleanBackgroundNoise = true;
// Increase the contrast of the image for better text clarity
Ocr.Configuration.EnhanceContrast = true;
// Enhances the resolution of the input image, useful for low-res images
Ocr.Configuration.EnhanceResolution = true;
// Specify the language of the text to be recognized
Ocr.Configuration.Language = OcrLanguage.English;
// By default, the strategy is to find text in multiple directions
Ocr.Configuration.Strategy = IronOcr.PageSegmentationStrategy.AutoOSD;
// Automatically rotate and straighten skewed documents
Ocr.Configuration.RotateAndStraighten = true;
// Detects text with a different color space
Ocr.Configuration.ColorSpace = IronOcr.ColorSpace.Rgb;
// Detect white text on a dark background
Ocr.Configuration.DetectWhiteTextOnDarkBackgrounds = true;
// Process an image file
using (var Input = new OcrInput(@"example-image.png"))
{
var Result = Ocr.Read(Input);
// Display the recognized text
Console.WriteLine(Result.Text);
}
}
}
// Sample C# code demonstrating how to use IronTesseract
using IronOcr;
public class OcrExample
{
public static void Main()
{
// Initialize the Tesseract OCR engine
var Ocr = new IronTesseract();
// Sample settings to enhance OCR performance:
// Cleans background noise that may interfere with text recognition
Ocr.Configuration.CleanBackgroundNoise = true;
// Increase the contrast of the image for better text clarity
Ocr.Configuration.EnhanceContrast = true;
// Enhances the resolution of the input image, useful for low-res images
Ocr.Configuration.EnhanceResolution = true;
// Specify the language of the text to be recognized
Ocr.Configuration.Language = OcrLanguage.English;
// By default, the strategy is to find text in multiple directions
Ocr.Configuration.Strategy = IronOcr.PageSegmentationStrategy.AutoOSD;
// Automatically rotate and straighten skewed documents
Ocr.Configuration.RotateAndStraighten = true;
// Detects text with a different color space
Ocr.Configuration.ColorSpace = IronOcr.ColorSpace.Rgb;
// Detect white text on a dark background
Ocr.Configuration.DetectWhiteTextOnDarkBackgrounds = true;
// Process an image file
using (var Input = new OcrInput(@"example-image.png"))
{
var Result = Ocr.Read(Input);
// Display the recognized text
Console.WriteLine(Result.Text);
}
}
}
' Sample C# code demonstrating how to use IronTesseract
Imports IronOcr
Public Class OcrExample
Public Shared Sub Main()
' Initialize the Tesseract OCR engine
Dim Ocr = New IronTesseract()
' Sample settings to enhance OCR performance:
' Cleans background noise that may interfere with text recognition
Ocr.Configuration.CleanBackgroundNoise = True
' Increase the contrast of the image for better text clarity
Ocr.Configuration.EnhanceContrast = True
' Enhances the resolution of the input image, useful for low-res images
Ocr.Configuration.EnhanceResolution = True
' Specify the language of the text to be recognized
Ocr.Configuration.Language = OcrLanguage.English
' By default, the strategy is to find text in multiple directions
Ocr.Configuration.Strategy = IronOcr.PageSegmentationStrategy.AutoOSD
' Automatically rotate and straighten skewed documents
Ocr.Configuration.RotateAndStraighten = True
' Detects text with a different color space
Ocr.Configuration.ColorSpace = IronOcr.ColorSpace.Rgb
' Detect white text on a dark background
Ocr.Configuration.DetectWhiteTextOnDarkBackgrounds = True
' Process an image file
Using Input = New OcrInput("example-image.png")
Dim Result = Ocr.Read(Input)
' Display the recognized text
Console.WriteLine(Result.Text)
End Using
End Sub
End Class