开始使用 Azure 的 OCR

国际语言

using IronOcr;
using System;

var ocrTesseract = new IronTesseract();

ocrTesseract.Language = OcrLanguage.Arabic;

using (var ocrInput = new OcrInput())
{
    ocrInput.LoadImage(@"images\arabic.gif");
    var ocrResult = ocrTesseract.Read(ocrInput);
    Console.WriteLine(ocrResult.Text);
}

// Example with a Custom Trained Font Being used:

var ocrTesseractCustomerLang = new IronTesseract();
ocrTesseractCustomerLang.UseCustomTesseractLanguageFile("custom_tesseract_files/custom.traineddata");
ocrTesseractCustomerLang.AddSecondaryLanguage(OcrLanguage.EnglishBest);

using (var ocrInput = new OcrInput())
{
    ocrInput.LoadPdf(@"images\mixed-lang.pdf");
    var ocrResult = ocrTesseractCustomerLang.Read(ocrInput);
    Console.WriteLine(ocrResult.Text);
}

Imports IronOcr
Imports System

Private ocrTesseract = New IronTesseract()

ocrTesseract.Language = OcrLanguage.Arabic

Using ocrInput As New OcrInput()
	ocrInput.LoadImage("images\arabic.gif")
	Dim ocrResult = ocrTesseract.Read(ocrInput)
	Console.WriteLine(ocrResult.Text)
End Using

' Example with a Custom Trained Font Being used:

Dim ocrTesseractCustomerLang = New IronTesseract()
ocrTesseractCustomerLang.UseCustomTesseractLanguageFile("custom_tesseract_files/custom.traineddata")
ocrTesseractCustomerLang.AddSecondaryLanguage(OcrLanguage.EnglishBest)

Using ocrInput As New OcrInput()
	ocrInput.LoadPdf("images\mixed-lang.pdf")
	Dim ocrResult = ocrTesseractCustomerLang.Read(ocrInput)
	Console.WriteLine(ocrResult.Text)
End Using

Install-Package IronOcr

IronOCR 语言支持

IronOCR 支持 125 种国际语言。除了默认安装的英语之外，还可以通过 NuGet 将其他语言包添加到您的 .NET 项目中，或者从我们的语言页面下载其他语言包。

大多数语言可在Best质量中获得。 Best质量选项可能提供更准确的结果，但处理时间也会更慢。

使用 IronOCR 探索多种语言的 OCR 技术。

结果对象

using IronOcr;
using IronSoftware.Drawing;

// We can delve deep into OCR results as an object model of
// Pages, Barcodes, Paragraphs, Lines, Words and Characters
// This allows us to explore, export and draw OCR content using other APIs/
var ocrTesseract = new IronTesseract();

ocrTesseract.Configuration.ReadBarCodes = true;

using var ocrInput = new OcrInput();
var pages = new int[] { 1, 2 };
ocrInput.LoadImageFrames("example.tiff", pages);

OcrResult ocrResult = ocrTesseract.Read(ocrInput);
foreach (var page in ocrResult.Pages)
{
    // Page object
    int PageNumber = page.PageNumber;
    string PageText = page.Text;
    int PageWordCount = page.WordCount;
    // null if we dont set Ocr.Configuration.ReadBarCodes = true;
    OcrResult.Barcode[] Barcodes = page.Barcodes;
    AnyBitmap PageImage = page.ToBitmap(ocrInput);
    double PageWidth = page.Width;
    double PageHeight = page.Height;
    double PageRotation = page.Rotation; // angular correction in degrees from OcrInput.Deskew()

    foreach (var paragraph in page.Paragraphs)
    {
        // Pages -> Paragraphs
        int ParagraphNumber = paragraph.ParagraphNumber;
        string ParagraphText = paragraph.Text;
        AnyBitmap ParagraphImage = paragraph.ToBitmap(ocrInput);
        int ParagraphX_location = paragraph.X;
        int ParagraphY_location = paragraph.Y;
        int ParagraphWidth = paragraph.Width;
        int ParagraphHeight = paragraph.Height;
        double ParagraphOcrAccuracy = paragraph.Confidence;
        OcrResult.TextFlow paragrapthText_direction = paragraph.TextDirection;
        foreach (var line in paragraph.Lines)
        {
            // Pages -> Paragraphs -> Lines
            int LineNumber = line.LineNumber;
            string LineText = line.Text;
            AnyBitmap LineImage = line.ToBitmap(ocrInput);
            int LineX_location = line.X;
            int LineY_location = line.Y;
            int LineWidth = line.Width;
            int LineHeight = line.Height;
            double LineOcrAccuracy = line.Confidence;
            double LineSkew = line.BaselineAngle;
            double LineOffset = line.BaselineOffset;
            foreach (var word in line.Words)
            {
                // Pages -> Paragraphs -> Lines -> Words
                int WordNumber = word.WordNumber;
                string WordText = word.Text;
                AnyBitmap WordImage = word.ToBitmap(ocrInput);
                int WordX_location = word.X;
                int WordY_location = word.Y;
                int WordWidth = word.Width;
                int WordHeight = word.Height;
                double WordOcrAccuracy = word.Confidence;
                foreach (var character in word.Characters)
                {
                    // Pages -> Paragraphs -> Lines -> Words -> Characters
                    int CharacterNumber = character.CharacterNumber;
                    string CharacterText = character.Text;
                    AnyBitmap CharacterImage = character.ToBitmap(ocrInput);
                    int CharacterX_location = character.X;
                    int CharacterY_location = character.Y;
                    int CharacterWidth = character.Width;
                    int CharacterHeight = character.Height;
                    double CharacterOcrAccuracy = character.Confidence;
                    // Output alternative symbols choices and their probability.
                    // Very useful for spellchecking
                    OcrResult.Choice[] Choices = character.Choices;
                }
            }
        }
    }
}

Imports IronOcr
Imports IronSoftware.Drawing

' We can delve deep into OCR results as an object model of
' Pages, Barcodes, Paragraphs, Lines, Words and Characters
' This allows us to explore, export and draw OCR content using other APIs/
Private ocrTesseract = New IronTesseract()

ocrTesseract.Configuration.ReadBarCodes = True

Dim ocrInput As New OcrInput()
Dim pages = New Integer() { 1, 2 }
ocrInput.LoadImageFrames("example.tiff", pages)

Dim ocrResult As OcrResult = ocrTesseract.Read(ocrInput)
For Each page In ocrResult.Pages
	' Page object
	Dim PageNumber As Integer = page.PageNumber
	Dim PageText As String = page.Text
	Dim PageWordCount As Integer = page.WordCount
	' null if we dont set Ocr.Configuration.ReadBarCodes = true;
	Dim Barcodes() As OcrResult.Barcode = page.Barcodes
	Dim PageImage As AnyBitmap = page.ToBitmap(ocrInput)
	Dim PageWidth As Double = page.Width
	Dim PageHeight As Double = page.Height
	Dim PageRotation As Double = page.Rotation ' angular correction in degrees from OcrInput.Deskew()

	For Each paragraph In page.Paragraphs
		' Pages -> Paragraphs
		Dim ParagraphNumber As Integer = paragraph.ParagraphNumber
		Dim ParagraphText As String = paragraph.Text
		Dim ParagraphImage As AnyBitmap = paragraph.ToBitmap(ocrInput)
		Dim ParagraphX_location As Integer = paragraph.X
		Dim ParagraphY_location As Integer = paragraph.Y
		Dim ParagraphWidth As Integer = paragraph.Width
		Dim ParagraphHeight As Integer = paragraph.Height
		Dim ParagraphOcrAccuracy As Double = paragraph.Confidence
		Dim paragrapthText_direction As OcrResult.TextFlow = paragraph.TextDirection
		For Each line In paragraph.Lines
			' Pages -> Paragraphs -> Lines
			Dim LineNumber As Integer = line.LineNumber
			Dim LineText As String = line.Text
			Dim LineImage As AnyBitmap = line.ToBitmap(ocrInput)
			Dim LineX_location As Integer = line.X
			Dim LineY_location As Integer = line.Y
			Dim LineWidth As Integer = line.Width
			Dim LineHeight As Integer = line.Height
			Dim LineOcrAccuracy As Double = line.Confidence
			Dim LineSkew As Double = line.BaselineAngle
			Dim LineOffset As Double = line.BaselineOffset
			For Each word In line.Words
				' Pages -> Paragraphs -> Lines -> Words
				Dim WordNumber As Integer = word.WordNumber
				Dim WordText As String = word.Text
				Dim WordImage As AnyBitmap = word.ToBitmap(ocrInput)
				Dim WordX_location As Integer = word.X
				Dim WordY_location As Integer = word.Y
				Dim WordWidth As Integer = word.Width
				Dim WordHeight As Integer = word.Height
				Dim WordOcrAccuracy As Double = word.Confidence
				For Each character In word.Characters
					' Pages -> Paragraphs -> Lines -> Words -> Characters
					Dim CharacterNumber As Integer = character.CharacterNumber
					Dim CharacterText As String = character.Text
					Dim CharacterImage As AnyBitmap = character.ToBitmap(ocrInput)
					Dim CharacterX_location As Integer = character.X
					Dim CharacterY_location As Integer = character.Y
					Dim CharacterWidth As Integer = character.Width
					Dim CharacterHeight As Integer = character.Height
					Dim CharacterOcrAccuracy As Double = character.Confidence
					' Output alternative symbols choices and their probability.
					' Very useful for spellchecking
					Dim Choices() As OcrResult.Choice = character.Choices
				Next character
			Next word
		Next line
	Next paragraph
Next page

Install-Package IronOcr

IronOCR 使用 Tesseract 5\ 为它扫描的每一页返回高级结果对象。这包含位置数据、图像、文本、统计置信度、备用符号选择、字体名、字体大小、装饰、字体权重和位置：

Page
Paragraph
文本行
Word 个人特质
Barcode

探索如何使用 IronOCR 读取 OCR 结果

直接来自我们开发团队的人工支持

无论是产品、集成还是授权问题，Iron 产品开发团队随时准备回答您所有问题。立即联系并与 Iron 开始对话，以便在您的项目中充分利用我们的库。

提问

.NET 中适用于 Azure 的 OCR 阅读引擎

处理不完美图像的最强力 Microsoft Azure OCR 解决方案

无论是护照页、发票、银行对账单、邮件、名片或收据；光学字符识别（OCR）是一个基于模式识别、计算机视觉和机器学习的研究领域。公司跨部门使用 OCR 提取会计和财务系统、业务数字化、企业内容管理和数据报告系统中的文本。

除了搭建其他成功案例之外。IronOCR 为 Google Tesseract 和 Microsoft 2021 Azure 认知服务增加了价值，结合 IronOCR - 一款原生 C# OCR 库。

如果您希望以 99% 的准确性转化现实世界的图像 - 然后继续阅读，看看 IronOCR 如何让您构建高效、准确、可扩展且几乎像人一样的光学字符识别应用。

IronOCR 是市场竞争与市场领先的光学字符识别之间的区别

光学字符识别（OCR）由于不同 API 对保护的极大信心，被认为是一个已解决的现象。然而，各种产品通常是僵硬且不准确的，无法在真实世界应用中完成任务。同样地，Tesseract OCR 处理机器打印的高分辨率完美文本。

听起来不错？

但现实世界中并不总是有完美打印的和手写的高分辨率文本。相反，旋转、倾斜、低 DPI、背景噪声和所有数字瑕疵都由 IronOCR 处理，包括从图像文件中提取手写文本。我们确保 99.8 - 100% 的准确性，可搜索的文档支持跨平台，包括 Windows、Linux、macOS、Microsoft Azure、AWS 和 Docker - 这就是为何 C# 开发者选择 IronOCR 而不是（基础）Tesseract OCR 的原因 - 已经增加了价值。

为自己配备最好的工具！

除上述之外，IronOCR 使您能够及时处理图像文档。这还不止，IronOCR API 功能还包括以下内容：

通过 OCR 从几乎任何文件、图像或 PDF 提取打印文本，具有出色的准确性和闪电般的速度
将文本从 PDF 中提取为可搜索的文档，具备完美的视觉和空间表示
无需 exes 或 C++ 代码
完整的 PDF OCR 支持
MVC、WebApp、桌面、控制台和服务器应用兼容
完整的 .NET Core、Standard 和 FrameWork 支持
使用 C# 和 VB .NET 阅读
将 OCR 导出为 XHTML
支持多线程
支持 125 种国际语言 - 即用型语言包和自定义构建
提取图像、坐标、统计数据、字体等
在商业和专有应用中重新分发 Tesseract OCR
本地运行，无需 SaaS 支持
是 Microsoft 认知服务 OCR 服务的绝佳替代品

几乎无限的功能 - IronOCR 是数字工作空间的“光学字符识别 OCR 工具”

从本地 .dll 或 exes 安装过渡到单一的真理来源 - 使用简单的 .NET 组件库进行开发，使用一个简单的 C# API 支持：

.NET Framework 4.5 及以上
.NET Standard 2.0 及以上（包括 3.x 和 .NET 5 Beta）
.NET Core 2.0 及以上（包括 3.x 和 .NET 5 Beta）
.NET 5
Xamarin for macOS

IronOCR API 的艺术并未就此结束；您可以继续探索我们的技术优势功能。我们一步一个脚印地减少商业复杂性，开发可靠的解决方案来简化文档处理应用，通过提供行业领先的功能来最大化业务收入，这些功能包括：

纯 .NET OCR API 能力
本地 OCR 操作，不需要云意味着更多的安全性
创建优化的低质量、噪声和失真的扫描资源
读取 PDF、多页 TIFFs
可以将任何 OCR 扫描样本保存为用户可以搜索的 PDF 文档或 XHTML
纯文本、条形码数据和包含段落、行、词和字符的 OCR 结果类

IronOCR API 优势：实现计算机视觉？

我们的光学字符识别过程从自动图像预处理开始，以增强图像文件，从而提高提取响应率。IronOCR 为您的工作增添价值，因为它能使用户将示例基础图像文件提取为自身的最佳版本。IronOCR 全面覆盖基础：

分辨率增强

由于 IronOCR 服务最优在 300DPI （每英寸点数）图像文件上工作，任何显著偏离 200-300 DPI 的图像都会重新采样以适合目标范围。

这意味着从 600 DPI 图像降低采样至 300 DPI 或将 100 DPI 图像提升采样至 200 DPI，具有 99% 的信心。

二值化

由于 IronOCR 认知服务被设计为在单色图像上工作，任何彩色或灰度图像都被转换为单色，使用自适应二值化算法。

该算法比较区域内的像素密度以确定转换像素为单色所使用的阈值。

自动旋转和去倾

IronOCR 寻找文本行和字符模式以自动去倾并旋转输入图像资源到期望的方向。

自适应噪声去除

通过 IronOCR，图像文件会自动分析噪声的存在和数量。噪声基本上是扫描图像中发现的‘小斑点’。我们的自适应算法根据噪声颗粒的大小来去除噪声。

一旦示例图像文件被预处理，IronOCR 然后会将输入图像文件划分为不同的处理区。

分区

另一项预准备阶段包括将参考图像划分为不同的逻辑区。IronOCR 首先利用空白和模式在图像中定位文本和图片；文本区域与图像分离。

然后划分为区 - 段落、列和文本块。在文本识别过程中不包括的图像和剩余的非文本像素被识别为智能输出。IronOCR 然后通过网格线和文本块将文本区标记为表格。

文本识别能力

执行多个、互相关联的步骤，将像素块转换为用户可以搜索的单行文本线程。这包括字符分割、自适应分类、字典引用和其他相关过程，均有助于最佳提取文本。

经过反复测试的多个参数

通过 IronOCR API 服务，我们已经通过多个数据文件示例在多种语言中测试了我们的工具，包括单词级别、符号准确性和 Microsoft Office 格式的布局保留。尽管一些参数会自动测试；其他包括视觉检查。

与 IronOCR 连接 - 理想的 OCR 认知服务解决方案

IronOCR 让您可以为可搜索的纯文本字符串添加跨平台的 OCR 能力，支持多种输入格式。为了通过使用 IronOCR 掌握生产力，请查看我们的免费教程文档，向您展示如何使用 IronOCR。今天就下载我们的 NuGet 包安装程序，并使用免费的试用密钥进行探索或联系 24/7 的个人支持。无论团队规模如何，我们的终身许可可以满足您的需要。

适用于 .NET, VB.NET, C#

查看许可证