Fixing Low Quality Scans & Images

The IronTesseract OCR Class provides granular control to C# and .NET developers to add OCR (image and PDF to text) functionality to their applications, and also to fine-tune performance to their specific use cases.

By setting variables, a perfect balance between speed and accuracy can be achieved through working with real-world examples. Settings include: CleanBackgroundNoise, EnhanceContrast, EnhanceResolution, Language, Strategy, RotateAndStraighten, ColorSpace, DetectWhiteTextOnDarkBackgrounds, and InputImageType.

This allows for the reading of low-quality scans which free Tesseract would not be able to handle.

// Sample C# code demonstrating how to use IronTesseract
using IronOcr;

public class OcrExample
{
    public static void Main()
    {
        // Initialize the Tesseract OCR engine
        var Ocr = new IronTesseract();

        // Sample settings to enhance OCR performance:

        // Cleans background noise that may interfere with text recognition
        Ocr.Configuration.CleanBackgroundNoise = true;

        // Increase the contrast of the image for better text clarity
        Ocr.Configuration.EnhanceContrast = true;

        // Enhances the resolution of the input image, useful for low-res images
        Ocr.Configuration.EnhanceResolution = true;

        // Specify the language of the text to be recognized
        Ocr.Configuration.Language = OcrLanguage.English;

        // By default, the strategy is to find text in multiple directions
        Ocr.Configuration.Strategy = IronOcr.PageSegmentationStrategy.AutoOSD;

        // Automatically rotate and straighten skewed documents
        Ocr.Configuration.RotateAndStraighten = true;

        // Detects text with a different color space
        Ocr.Configuration.ColorSpace = IronOcr.ColorSpace.Rgb;

        // Detect white text on a dark background
        Ocr.Configuration.DetectWhiteTextOnDarkBackgrounds = true;

        // Process an image file
        using (var Input = new OcrInput(@"example-image.png"))
        {
            var Result = Ocr.Read(Input);

            // Display the recognized text
            Console.WriteLine(Result.Text);
        }
    }
}
// Sample C# code demonstrating how to use IronTesseract
using IronOcr;

public class OcrExample
{
    public static void Main()
    {
        // Initialize the Tesseract OCR engine
        var Ocr = new IronTesseract();

        // Sample settings to enhance OCR performance:

        // Cleans background noise that may interfere with text recognition
        Ocr.Configuration.CleanBackgroundNoise = true;

        // Increase the contrast of the image for better text clarity
        Ocr.Configuration.EnhanceContrast = true;

        // Enhances the resolution of the input image, useful for low-res images
        Ocr.Configuration.EnhanceResolution = true;

        // Specify the language of the text to be recognized
        Ocr.Configuration.Language = OcrLanguage.English;

        // By default, the strategy is to find text in multiple directions
        Ocr.Configuration.Strategy = IronOcr.PageSegmentationStrategy.AutoOSD;

        // Automatically rotate and straighten skewed documents
        Ocr.Configuration.RotateAndStraighten = true;

        // Detects text with a different color space
        Ocr.Configuration.ColorSpace = IronOcr.ColorSpace.Rgb;

        // Detect white text on a dark background
        Ocr.Configuration.DetectWhiteTextOnDarkBackgrounds = true;

        // Process an image file
        using (var Input = new OcrInput(@"example-image.png"))
        {
            var Result = Ocr.Read(Input);

            // Display the recognized text
            Console.WriteLine(Result.Text);
        }
    }
}
' Sample C# code demonstrating how to use IronTesseract
Imports IronOcr

Public Class OcrExample
	Public Shared Sub Main()
		' Initialize the Tesseract OCR engine
		Dim Ocr = New IronTesseract()

		' Sample settings to enhance OCR performance:

		' Cleans background noise that may interfere with text recognition
		Ocr.Configuration.CleanBackgroundNoise = True

		' Increase the contrast of the image for better text clarity
		Ocr.Configuration.EnhanceContrast = True

		' Enhances the resolution of the input image, useful for low-res images
		Ocr.Configuration.EnhanceResolution = True

		' Specify the language of the text to be recognized
		Ocr.Configuration.Language = OcrLanguage.English

		' By default, the strategy is to find text in multiple directions
		Ocr.Configuration.Strategy = IronOcr.PageSegmentationStrategy.AutoOSD

		' Automatically rotate and straighten skewed documents
		Ocr.Configuration.RotateAndStraighten = True

		' Detects text with a different color space
		Ocr.Configuration.ColorSpace = IronOcr.ColorSpace.Rgb

		' Detect white text on a dark background
		Ocr.Configuration.DetectWhiteTextOnDarkBackgrounds = True

		' Process an image file
		Using Input = New OcrInput("example-image.png")
			Dim Result = Ocr.Read(Input)

			' Display the recognized text
			Console.WriteLine(Result.Text)
		End Using
	End Sub
End Class
$vbLabelText   $csharpLabel