Name: IronOCR
Brand: Iron Software
Availability: InStock
Rating: 4.86 (101 reviews)

開始使用 Azure OCR

國際語言

using IronOcr;
using System;

var ocrTesseract = new IronTesseract();

ocrTesseract.Language = OcrLanguage.Arabic;

using (var ocrInput = new OcrInput())
{
    ocrInput.LoadImage(@"images\arabic.gif");
    var ocrResult = ocrTesseract.Read(ocrInput);
    Console.WriteLine(ocrResult.Text);
}

// Example with a Custom Trained Font Being used:

var ocrTesseractCustomerLang = new IronTesseract();
ocrTesseractCustomerLang.UseCustomTesseractLanguageFile("custom_tesseract_files/custom.traineddata");
ocrTesseractCustomerLang.AddSecondaryLanguage(OcrLanguage.EnglishBest);

using (var ocrInput = new OcrInput())
{
    ocrInput.LoadPdf(@"images\mixed-lang.pdf");
    var ocrResult = ocrTesseractCustomerLang.Read(ocrInput);
    Console.WriteLine(ocrResult.Text);
}

Imports IronOcr
Imports System

Private ocrTesseract = New IronTesseract()

ocrTesseract.Language = OcrLanguage.Arabic

Using ocrInput As New OcrInput()
	ocrInput.LoadImage("images\arabic.gif")
	Dim ocrResult = ocrTesseract.Read(ocrInput)
	Console.WriteLine(ocrResult.Text)
End Using

' Example with a Custom Trained Font Being used:

Dim ocrTesseractCustomerLang = New IronTesseract()
ocrTesseractCustomerLang.UseCustomTesseractLanguageFile("custom_tesseract_files/custom.traineddata")
ocrTesseractCustomerLang.AddSecondaryLanguage(OcrLanguage.EnglishBest)

Using ocrInput As New OcrInput()
	ocrInput.LoadPdf("images\mixed-lang.pdf")
	Dim ocrResult = ocrTesseractCustomerLang.Read(ocrInput)
	Console.WriteLine(ocrResult.Text)
End Using

Install-Package IronOcr

IronOCR 語言支援

IronOCR 支援 125 種國際語言。除預設安裝的英語外，其他語言包可透過 NuGet 新增至您的 .NET 專案，或在我們的語言頁面下載。

大多數語言可在 Standard（建議）和 Best 質量中使用。 Best 質量選項可能提供更準確的結果，但處理時間會較慢。

使用 IronOCR 探索多語言 OCR。

結果物件

using IronOcr;
using IronSoftware.Drawing;

// We can delve deep into OCR results as an object model of
// Pages, Barcodes, Paragraphs, Lines, Words and Characters
// This allows us to explore, export and draw OCR content using other APIs/
var ocrTesseract = new IronTesseract();

ocrTesseract.Configuration.ReadBarCodes = true;

using var ocrInput = new OcrInput();
var pages = new int[] { 1, 2 };
ocrInput.LoadImageFrames("example.tiff", pages);

OcrResult ocrResult = ocrTesseract.Read(ocrInput);
foreach (var page in ocrResult.Pages)
{
    // Page object
    int PageNumber = page.PageNumber;
    string PageText = page.Text;
    int PageWordCount = page.WordCount;
    // null if we dont set Ocr.Configuration.ReadBarCodes = true;
    OcrResult.Barcode[] Barcodes = page.Barcodes;
    AnyBitmap PageImage = page.ToBitmap(ocrInput);
    double PageWidth = page.Width;
    double PageHeight = page.Height;
    double PageRotation = page.Rotation; // angular correction in degrees from OcrInput.Deskew()

    foreach (var paragraph in page.Paragraphs)
    {
        // Pages -> Paragraphs
        int ParagraphNumber = paragraph.ParagraphNumber;
        string ParagraphText = paragraph.Text;
        AnyBitmap ParagraphImage = paragraph.ToBitmap(ocrInput);
        int ParagraphX_location = paragraph.X;
        int ParagraphY_location = paragraph.Y;
        int ParagraphWidth = paragraph.Width;
        int ParagraphHeight = paragraph.Height;
        double ParagraphOcrAccuracy = paragraph.Confidence;
        OcrResult.TextFlow paragrapthText_direction = paragraph.TextDirection;
        foreach (var line in paragraph.Lines)
        {
            // Pages -> Paragraphs -> Lines
            int LineNumber = line.LineNumber;
            string LineText = line.Text;
            AnyBitmap LineImage = line.ToBitmap(ocrInput);
            int LineX_location = line.X;
            int LineY_location = line.Y;
            int LineWidth = line.Width;
            int LineHeight = line.Height;
            double LineOcrAccuracy = line.Confidence;
            double LineSkew = line.BaselineAngle;
            double LineOffset = line.BaselineOffset;
            foreach (var word in line.Words)
            {
                // Pages -> Paragraphs -> Lines -> Words
                int WordNumber = word.WordNumber;
                string WordText = word.Text;
                AnyBitmap WordImage = word.ToBitmap(ocrInput);
                int WordX_location = word.X;
                int WordY_location = word.Y;
                int WordWidth = word.Width;
                int WordHeight = word.Height;
                double WordOcrAccuracy = word.Confidence;
                foreach (var character in word.Characters)
                {
                    // Pages -> Paragraphs -> Lines -> Words -> Characters
                    int CharacterNumber = character.CharacterNumber;
                    string CharacterText = character.Text;
                    AnyBitmap CharacterImage = character.ToBitmap(ocrInput);
                    int CharacterX_location = character.X;
                    int CharacterY_location = character.Y;
                    int CharacterWidth = character.Width;
                    int CharacterHeight = character.Height;
                    double CharacterOcrAccuracy = character.Confidence;
                    // Output alternative symbols choices and their probability.
                    // Very useful for spellchecking
                    OcrResult.Choice[] Choices = character.Choices;
                }
            }
        }
    }
}

Imports IronOcr
Imports IronSoftware.Drawing

' We can delve deep into OCR results as an object model of
' Pages, Barcodes, Paragraphs, Lines, Words and Characters
' This allows us to explore, export and draw OCR content using other APIs/
Private ocrTesseract = New IronTesseract()

ocrTesseract.Configuration.ReadBarCodes = True

Dim ocrInput As New OcrInput()
Dim pages = New Integer() { 1, 2 }
ocrInput.LoadImageFrames("example.tiff", pages)

Dim ocrResult As OcrResult = ocrTesseract.Read(ocrInput)
For Each page In ocrResult.Pages
	' Page object
	Dim PageNumber As Integer = page.PageNumber
	Dim PageText As String = page.Text
	Dim PageWordCount As Integer = page.WordCount
	' null if we dont set Ocr.Configuration.ReadBarCodes = true;
	Dim Barcodes() As OcrResult.Barcode = page.Barcodes
	Dim PageImage As AnyBitmap = page.ToBitmap(ocrInput)
	Dim PageWidth As Double = page.Width
	Dim PageHeight As Double = page.Height
	Dim PageRotation As Double = page.Rotation ' angular correction in degrees from OcrInput.Deskew()

	For Each paragraph In page.Paragraphs
		' Pages -> Paragraphs
		Dim ParagraphNumber As Integer = paragraph.ParagraphNumber
		Dim ParagraphText As String = paragraph.Text
		Dim ParagraphImage As AnyBitmap = paragraph.ToBitmap(ocrInput)
		Dim ParagraphX_location As Integer = paragraph.X
		Dim ParagraphY_location As Integer = paragraph.Y
		Dim ParagraphWidth As Integer = paragraph.Width
		Dim ParagraphHeight As Integer = paragraph.Height
		Dim ParagraphOcrAccuracy As Double = paragraph.Confidence
		Dim paragrapthText_direction As OcrResult.TextFlow = paragraph.TextDirection
		For Each line In paragraph.Lines
			' Pages -> Paragraphs -> Lines
			Dim LineNumber As Integer = line.LineNumber
			Dim LineText As String = line.Text
			Dim LineImage As AnyBitmap = line.ToBitmap(ocrInput)
			Dim LineX_location As Integer = line.X
			Dim LineY_location As Integer = line.Y
			Dim LineWidth As Integer = line.Width
			Dim LineHeight As Integer = line.Height
			Dim LineOcrAccuracy As Double = line.Confidence
			Dim LineSkew As Double = line.BaselineAngle
			Dim LineOffset As Double = line.BaselineOffset
			For Each word In line.Words
				' Pages -> Paragraphs -> Lines -> Words
				Dim WordNumber As Integer = word.WordNumber
				Dim WordText As String = word.Text
				Dim WordImage As AnyBitmap = word.ToBitmap(ocrInput)
				Dim WordX_location As Integer = word.X
				Dim WordY_location As Integer = word.Y
				Dim WordWidth As Integer = word.Width
				Dim WordHeight As Integer = word.Height
				Dim WordOcrAccuracy As Double = word.Confidence
				For Each character In word.Characters
					' Pages -> Paragraphs -> Lines -> Words -> Characters
					Dim CharacterNumber As Integer = character.CharacterNumber
					Dim CharacterText As String = character.Text
					Dim CharacterImage As AnyBitmap = character.ToBitmap(ocrInput)
					Dim CharacterX_location As Integer = character.X
					Dim CharacterY_location As Integer = character.Y
					Dim CharacterWidth As Integer = character.Width
					Dim CharacterHeight As Integer = character.Height
					Dim CharacterOcrAccuracy As Double = character.Confidence
					' Output alternative symbols choices and their probability.
					' Very useful for spellchecking
					Dim Choices() As OcrResult.Choice = character.Choices
				Next character
			Next word
		Next line
	Next paragraph
Next page

Install-Package IronOcr

IronOCR 為每個使用 Tesseract 5\掃描的頁面返回一個高級結果物件。這包含位置資料、圖像、文字、統計信心、替代符號選擇、字體名稱、字體大小、裝飾、字體粗細和位置，每個項目都包括：

Page
Paragraph
文字行
Word
個別字元
Barcode

探索如何使用 IronOCR 閱讀 OCR 結果

由我們的開發團隊直接提供人工支援

無論是產品、整合或授權方面的疑問，Iron 產品開發團隊都隨時準備為您提供支援。請與我們聯繫並開啟對話，讓您在專案中充分發揮我們函式庫的優勢。

提問

適用於 Azure 的 .NET OCR 讀取引擎

處理瑕疵影像的首選 Microsoft Azure OCR 解決方案

無論是護照頁面、發票、銀行對帳單、郵件、名片或收據；光學字元辨識（OCR）是一門基於模式辨識、電腦視覺與機器學習的研究領域。企業跨部門運用 OCR 技術，在會計與財務系統、業務數位化、企業內容管理及資料報表系統中擷取文字。

除了打造其他成功案例外，IronOCR 更透過其原生 C# OCR 程式庫，為 Google Tesseract 及 Microsoft 2021 Azure Cognitive Services 增添價值。

若您希望以 99% 的準確度轉換現實世界的圖片，請繼續閱讀，了解 IronOCR 如何助您打造高效、精準、可擴展且近乎人類水準的光學字元辨識應用程式。

IronOCR 是市場競爭者與市場領導者之間的關鍵差異

由於各類 API 對其防護能力都極具信心，光學字元辨識（OCR）常被視為已解決的技術難題。然而，這些產品往往僵化且不精準，在實際應用中屢屢失敗。同樣地，Tesseract OCR 僅適用於機器印刷、高解析度且完美的文字。

聽起來很棒？

但現實世界中的文字，往往並非總是高解析度的完美印刷或手寫文字。相反地，旋轉、傾斜、低 DPI、背景雜訊，以及所有數位缺陷的痛點，IronOCR 都能妥善處理，包括從圖像檔案中擷取手寫文字。我們確保文件具備 99.8% 至 100% 的準確度與可搜尋性，並支援跨平台運作，涵蓋 Windows、Linux、macOS、Microsoft Azure、AWS 及 Docker —— C# 開發者選擇 IronOCR 而非（基礎版）Tesseract OCR 自有其道理，關鍵在於創造附加價值。

為自己配備最佳解決方案！

除了上述功能外，IronOCR 還能讓您迅速處理圖像文件。不僅如此，IronOCR API 還具備以下特色：

透過 OCR 從幾乎任何檔案、圖片或 PDF 中擷取印刷文字，具備卓越的準確度與閃電般的速度
將 PDF 和圖片中的文字擷取為可搜尋文件，並完美呈現視覺與空間資訊
無需執行檔或 C# 程式碼
完整支援 PDF OCR
相容於 MVC、WebApp、桌面、主控台及伺服器應用程式
完整支援 .NET Core、.NET Standard 及 .NET Framework
使用 C# 及 VB .NET 讀取
將 OCR 結果匯出為 XHTML
支援多執行緒
支援 125 種國際語言 — 即用型語言套件與自訂建置
可擷取圖片、座標、統計資料、字型等豐富內容
可在商業及專有應用程式中重新分發 Tesseract OCR
在地運行，無需 SaaS
Microsoft Cognitive Services OCR 服務的絕佳替代方案

功能幾乎無限制 — IronOCR 是數位工作空間中「首選」的光學字元辨識 (OCR) 工具

從原生 .dll 或 .exe 安裝轉向單一可信來源——透過支援以下功能的簡易 C# API，使用單一原生 .NET 元件庫進行開發：

.NET Framework 4.5 及以上版本
.NET Standard 2.0 及以上版本（包含 3.x 及 .NET 5 Beta）
.NET Core 2.0 及以上版本（包含 3.x 及 .NET 5 Beta）
.NET 5
適用於 macOS 的 Xamarin

IronOCR API 的精髓不僅於此；您可進一步探索我們的前沿技術功能。我們透過開發可靠的解決方案來逐步簡化業務複雜性，優化文件處理應用程式，並透過內建業界領先的功能來最大化業務收益：

純 .NET OCR API 功能
本地 OCR 運作，無需雲端，安全性更高
可處理低畫質、雜訊及變形的掃描檔案
支援讀取 PDF 及多頁 TIFF 檔案
可將任何 OCR 掃描樣本儲存為 PDF 文件或 XHTML，供使用者進行搜尋
純文字、BarCode 資料，以及包含段落、行、單字和字元的 OCR 結果類別

IronOCR API Edge：實現電腦視覺？

我們的光學字元辨識流程始於自動化影像預處理，藉此強化影像檔案以提升擷取成功率。IronOCR 為您的工作增添價值，讓使用者能將原始範例影像檔案擷取為其最佳版本。IronOCR 涵蓋所有基礎功能：

解析度增強

由於 IronOCR 服務在 300 DPI（每英寸點數）的影像檔案上運作最佳，任何顯著超出 200-300 DPI 範圍的影像，都會被重新取樣以符合目標範圍。

這意味著系統能以 99% 的精準度，將 600 DPI 的圖像降採樣至 300 DPI，或將 100 DPI 的圖像升採樣至 200 DPI。

二值化

由於 IronOCR 認知服務是針對單色影像設計的，因此任何彩色或灰階影像都會利用自適應二值化演算法轉換為單色。

該演算法會比較區域內的像素密度，以確定用於將像素轉換為單色圖的閾值。

自動旋轉與校正

IronOCR 會尋找文字行和字元模式，以自動校正輸入影像資源的傾斜度並旋轉至所需的方向。

自適應雜訊去除

IronOCR 會自動分析影像檔案中是否存在雜訊及其數量。雜訊基本上就是掃描影像上出現的「斑點」。我們的自適應演算法會根據雜訊粒子的大小來移除雜訊。

樣本圖像檔案完成預處理後，IronOCR 會將輸入圖像檔案劃分為不同的處理區域。

分區

另一個預處理階段涉及將參考圖像劃分為不同的邏輯區域。IronOCR 首先借助空白區域和圖案來定位圖像中的文字與圖片；文字區域會與圖片分離。

接著將其劃分為段落、欄位及文字區塊等區域。系統會識別出圖像與其餘非文字像素，並在文字辨識過程中將其排除，同時納入智慧輸出結果。IronOCR 隨後會利用網格線與文字區塊，將文字區域標記為表格。

文字辨識能力

執行多項相互關聯的步驟，將像素塊轉換為使用者可搜尋的單行文字串。這包含字元分割、自適應分類、字典參照，以及其他有助於獲得最佳提取文字的相關流程。

經過實證的多重參數

透過 IronOCR API 服務，我們已運用多種語言的資料檔案範例對工具進行測試，涵蓋 Microsoft Office 格式中的 WORD 層級、符號準確度及版面保留能力。雖然部分參數會自動測試，但其他參數則包含視覺檢查。

採用 IronOCR —— 理想的 OCR 認知服務解決方案

IronOCR 讓您能將支援多種輸入格式的跨平台 OCR 功能，整合至可供搜尋的純文字字串中。若要透過 IronOCR 提升工作效率，請從我們的免費教學文件開始，該文件將引導您逐步使用 IronOCR。立即下載我們的 NuGet 套件安裝程式，並透過免費試用授權進行探索，或聯繫 24/7 專人支援。無論您的團隊規模大小，皆可透過我們的終身授權方案，依需求彈性擴展服務。

適用於 .NET, VB.NET, C#

檢視授權