開始使用 Azure 的 OCR

C# + VB.NET: 國際語言 國際語言
using IronOcr;
using System;

var ocrTesseract = new IronTesseract();

ocrTesseract.Language = OcrLanguage.Arabic;

using (var ocrInput = new OcrInput())
{
    ocrInput.LoadImage(@"images\arabic.gif");
    var ocrResult = ocrTesseract.Read(ocrInput);
    Console.WriteLine(ocrResult.Text);
}

// Example with a Custom Trained Font Being used:

var ocrTesseractCustomerLang = new IronTesseract();
ocrTesseractCustomerLang.UseCustomTesseractLanguageFile("custom_tesseract_files/custom.traineddata");
ocrTesseractCustomerLang.AddSecondaryLanguage(OcrLanguage.EnglishBest);

using (var ocrInput = new OcrInput())
{
    ocrInput.LoadPdf(@"images\mixed-lang.pdf");
    var ocrResult = ocrTesseractCustomerLang.Read(ocrInput);
    Console.WriteLine(ocrResult.Text);
}
Imports IronOcr
Imports System

Private ocrTesseract = New IronTesseract()

ocrTesseract.Language = OcrLanguage.Arabic

Using ocrInput As New OcrInput()
	ocrInput.LoadImage("images\arabic.gif")
	Dim ocrResult = ocrTesseract.Read(ocrInput)
	Console.WriteLine(ocrResult.Text)
End Using

' Example with a Custom Trained Font Being used:

Dim ocrTesseractCustomerLang = New IronTesseract()
ocrTesseractCustomerLang.UseCustomTesseractLanguageFile("custom_tesseract_files/custom.traineddata")
ocrTesseractCustomerLang.AddSecondaryLanguage(OcrLanguage.EnglishBest)

Using ocrInput As New OcrInput()
	ocrInput.LoadPdf("images\mixed-lang.pdf")
	Dim ocrResult = ocrTesseractCustomerLang.Read(ocrInput)
	Console.WriteLine(ocrResult.Text)
End Using

IronOCR 支援 125 種國際語言。

除了預設安裝的英語之外,還可以通過 NuGet 或從我們的網站下載添加到您的 .NET 項目的語言包。語言頁面.

大多數語言都提供快速和標準版本。(推薦)及最佳品質。 更準確的可能會更慢。

C# + VB.NET: 結果對象 結果對象
using IronOcr;
using IronSoftware.Drawing;

// We can delve deep into OCR results as an object model of
// Pages, Barcodes, Paragraphs, Lines, Words and Characters
// This allows us to explore, export and draw OCR content using other APIs/
var ocrTesseract = new IronTesseract();

ocrTesseract.Configuration.ReadBarCodes = true;

using var ocrInput = new OcrInput();
var pages = new int[] { 1, 2 };
ocrInput.LoadImageFrames("example.tiff", pages);

OcrResult ocrResult = ocrTesseract.Read(ocrInput);
foreach (var page in ocrResult.Pages)
{
    // Page object
    int PageNumber = page.PageNumber;
    string PageText = page.Text;
    int PageWordCount = page.WordCount;
    // null if we dont set Ocr.Configuration.ReadBarCodes = true;
    OcrResult.Barcode[] Barcodes = page.Barcodes;
    AnyBitmap PageImage = page.ToBitmap(ocrInput);
    double PageWidth = page.Width;
    double PageHeight = page.Height;
    double PageRotation = page.Rotation; // angular correction in degrees from OcrInput.Deskew()

    foreach (var paragraph in page.Paragraphs)
    {
        // Pages -> Paragraphs
        int ParagraphNumber = paragraph.ParagraphNumber;
        string ParagraphText = paragraph.Text;
        AnyBitmap ParagraphImage = paragraph.ToBitmap(ocrInput);
        int ParagraphX_location = paragraph.X;
        int ParagraphY_location = paragraph.Y;
        int ParagraphWidth = paragraph.Width;
        int ParagraphHeight = paragraph.Height;
        double ParagraphOcrAccuracy = paragraph.Confidence;
        OcrResult.TextFlow paragrapthText_direction = paragraph.TextDirection;
        foreach (var line in paragraph.Lines)
        {
            // Pages -> Paragraphs -> Lines
            int LineNumber = line.LineNumber;
            string LineText = line.Text;
            AnyBitmap LineImage = line.ToBitmap(ocrInput);
            int LineX_location = line.X;
            int LineY_location = line.Y;
            int LineWidth = line.Width;
            int LineHeight = line.Height;
            double LineOcrAccuracy = line.Confidence;
            double LineSkew = line.BaselineAngle;
            double LineOffset = line.BaselineOffset;
            foreach (var word in line.Words)
            {
                // Pages -> Paragraphs -> Lines -> Words
                int WordNumber = word.WordNumber;
                string WordText = word.Text;
                AnyBitmap WordImage = word.ToBitmap(ocrInput);
                int WordX_location = word.X;
                int WordY_location = word.Y;
                int WordWidth = word.Width;
                int WordHeight = word.Height;
                double WordOcrAccuracy = word.Confidence;
                foreach (var character in word.Characters)
                {
                    // Pages -> Paragraphs -> Lines -> Words -> Characters
                    int CharacterNumber = character.CharacterNumber;
                    string CharacterText = character.Text;
                    AnyBitmap CharacterImage = character.ToBitmap(ocrInput);
                    int CharacterX_location = character.X;
                    int CharacterY_location = character.Y;
                    int CharacterWidth = character.Width;
                    int CharacterHeight = character.Height;
                    double CharacterOcrAccuracy = character.Confidence;
                    // Output alternative symbols choices and their probability.
                    // Very useful for spellchecking
                    OcrResult.Choice[] Choices = character.Choices;
                }
            }
        }
    }
}
Imports IronOcr
Imports IronSoftware.Drawing

' We can delve deep into OCR results as an object model of
' Pages, Barcodes, Paragraphs, Lines, Words and Characters
' This allows us to explore, export and draw OCR content using other APIs/
Private ocrTesseract = New IronTesseract()

ocrTesseract.Configuration.ReadBarCodes = True

Dim ocrInput As New OcrInput()
Dim pages = New Integer() { 1, 2 }
ocrInput.LoadImageFrames("example.tiff", pages)

Dim ocrResult As OcrResult = ocrTesseract.Read(ocrInput)
For Each page In ocrResult.Pages
	' Page object
	Dim PageNumber As Integer = page.PageNumber
	Dim PageText As String = page.Text
	Dim PageWordCount As Integer = page.WordCount
	' null if we dont set Ocr.Configuration.ReadBarCodes = true;
	Dim Barcodes() As OcrResult.Barcode = page.Barcodes
	Dim PageImage As AnyBitmap = page.ToBitmap(ocrInput)
	Dim PageWidth As Double = page.Width
	Dim PageHeight As Double = page.Height
	Dim PageRotation As Double = page.Rotation ' angular correction in degrees from OcrInput.Deskew()

	For Each paragraph In page.Paragraphs
		' Pages -> Paragraphs
		Dim ParagraphNumber As Integer = paragraph.ParagraphNumber
		Dim ParagraphText As String = paragraph.Text
		Dim ParagraphImage As AnyBitmap = paragraph.ToBitmap(ocrInput)
		Dim ParagraphX_location As Integer = paragraph.X
		Dim ParagraphY_location As Integer = paragraph.Y
		Dim ParagraphWidth As Integer = paragraph.Width
		Dim ParagraphHeight As Integer = paragraph.Height
		Dim ParagraphOcrAccuracy As Double = paragraph.Confidence
		Dim paragrapthText_direction As OcrResult.TextFlow = paragraph.TextDirection
		For Each line In paragraph.Lines
			' Pages -> Paragraphs -> Lines
			Dim LineNumber As Integer = line.LineNumber
			Dim LineText As String = line.Text
			Dim LineImage As AnyBitmap = line.ToBitmap(ocrInput)
			Dim LineX_location As Integer = line.X
			Dim LineY_location As Integer = line.Y
			Dim LineWidth As Integer = line.Width
			Dim LineHeight As Integer = line.Height
			Dim LineOcrAccuracy As Double = line.Confidence
			Dim LineSkew As Double = line.BaselineAngle
			Dim LineOffset As Double = line.BaselineOffset
			For Each word In line.Words
				' Pages -> Paragraphs -> Lines -> Words
				Dim WordNumber As Integer = word.WordNumber
				Dim WordText As String = word.Text
				Dim WordImage As AnyBitmap = word.ToBitmap(ocrInput)
				Dim WordX_location As Integer = word.X
				Dim WordY_location As Integer = word.Y
				Dim WordWidth As Integer = word.Width
				Dim WordHeight As Integer = word.Height
				Dim WordOcrAccuracy As Double = word.Confidence
				For Each character In word.Characters
					' Pages -> Paragraphs -> Lines -> Words -> Characters
					Dim CharacterNumber As Integer = character.CharacterNumber
					Dim CharacterText As String = character.Text
					Dim CharacterImage As AnyBitmap = character.ToBitmap(ocrInput)
					Dim CharacterX_location As Integer = character.X
					Dim CharacterY_location As Integer = character.Y
					Dim CharacterWidth As Integer = character.Width
					Dim CharacterHeight As Integer = character.Height
					Dim CharacterOcrAccuracy As Double = character.Confidence
					' Output alternative symbols choices and their probability.
					' Very useful for spellchecking
					Dim Choices() As OcrResult.Choice = character.Choices
				Next character
			Next word
		Next line
	Next paragraph
Next page

IronOCR 使用 Tesseract 5 掃描每個頁面時,會返回一個進階的結果物件。 此包含每個項目的位置數據、圖像、文本、統計信心、替代符號選擇、字體名稱、字體大小裝飾、字體粗細和位置

  • 頁面
  • 段落
  • 行文字
  • Word
  • 個別字符
  • 和條碼

Human Support related to Azure OCR API

由我們開發團隊提供的人力支援

無論是產品、整合或授權問題,Iron 產品開發團隊都隨時為您解答所有問題。請聯絡我們並開始與 Iron 對話,在您的項目中充分利用我們的庫。

提出問題
Image To Text related to Azure OCR API

在 .NET 中的 Azure 的 OCR 讀取引擎

Your Go-To Microsoft Azure OCR Solution to Process Imperfect Images

Whether it is passport pages, invoices, bank statements, mail, business cards, or receipts; Optical Character Recognition (OCR) is a research field based upon pattern recognition, computer vision, and machine learning. Firms utilize OCR cross-departmentally to extract text in accounting and finance systems, business digitization, enterprise content management, and data reporting systems.

In addition to building other success stories. IronOCR adds value to Google Tesseract and Microsoft 2021 Azure Cognitive Services with IronOCR - a native C# OCR library.

If you are looking to convert real-world pictures with 99 percent accuracy - then read on, to see how IronOCR lets you build an efficient, accurate, scalable, and almost-human Optical Character Recognition application.

IronOCR is the Difference Between Market-Competitive and Market Leading Optical Character Recognition

Optical Character Recognition (OCR) is considered a solved phenomenon due to the immense confidence different APIs claim towards protection. However, the various products are often rigid and inaccurate that fail in real-world applications. Similarly, Tesseract OCR works with machine-printed, high-resolution, perfect text.

Sounds good?

Only the real world does not always have perfectly printed and handwritten text with high-resolution. Instead, rotated, skewed, low DPI, background noise, and all the banes of digital imperfections are taken care of by IronOCR, including extracting handwritten text from images files. We ensure a 99.8 - 100 percent accurate, searchable document with cross-platform support that includes Windows, Linux, macOS, Microsoft Azure, AWS, and Docker - there is a reason C# developers choose IronOCR over (basic) Tesseract OCR - it is all about adding value.

Equip yourself with the best!

In addition to the above, IronOCR equips you to process image documents promptly. If that's not all, the IronOCR API features also include the following:

  • Extract printed text through OCR on almost any file, image, or PDF with exceptional accuracy and lightning speed
  • Text extraction PDFs and pictures into searchable documents with perfect and visual and spatial representation
  • Does not require exes or C++ code
  • Complete PDF OCR support
  • MVC, WebApp, Desktop, Console, and Server Application compatible
  • Complete .NET Core, Standard, and FrameWork support
  • Read using C# & VB .NET
  • Export OCR to XHTML
  • Supports multithreading
  • Supports 125 international languages - ready-to-use language packs and custom-builds
  • Extracts images, coordinates, statistics, fonts, and much more
  • Redistributes Tesseract OCR inside commercial and proprietary applications
  • Runs locally, with no SaaS required
  • Excellent Alternative to OCR service from Microsoft Cognitive Services

Virtually Unlimited Features - IronOCR is 'the' Optical Character Recognition OCR Tool for the Digital Workspace

Transition from native .dlls or exes installation to a single source of truth - develop using a single, native .NET component library using a simple C# APIs that supports:

  • .NET Framework 4.5 and above
  • .NET Standard 2.0 and aobve (including 3.x & .NET 5 Beta)
  • .NET Core 2.0 and above (including 3.x & .NET 5 Beta)
  • .NET 5
  • Xamarin for macOS

The art of IronOCR API does not end there; you can continue to explore our technical edge features further. We reduce the business complexities, one step at a time, by developing reliable solutions to streamline document processing applications and maximizing business revenues by offering industry-leading features have embedded:

  • Pure .NET OCR API capabilities
  • Local OCR operation, no cloud means more security
  • Create optimized low quality, noisy and distorted scan resources
  • Reads PDFs, multi-page TIFFs
  • Can save any OCR Scan sample to a PDF document or XHTML that users can search
  • Plain Text, Barcode Data, and an OCR Result class containing paragraphs, lines, words, and characters

IronOCR API Edge: Fulfil the Computer Vision?

Our optical character recognition process begins with automated image pre-processing, to enhance the image file that improves the extraction response rate. IronOCR adds value to your work as it enables the users to extract the example base image file into the optimum version of itself. IronOCR covers all bases:

Resolution Enhancement

As IronOCR service works optimally on 300DPI (Dots Per Inch) image files, any image that is significantly outside of 200-300 DPI is resampled to fit inside the targeted range.

This translates down-sampling from 600 DPI images to 300 DPI or up-sampling 100 DPI images to 200 DPI with 99 percent confidence.

Binarization

As IronOCR cognitive services are designed to function on monochromatic images, any colored or greyscale images are converted to monochromatic, utilizing an adaptive binarization algorithm.

The algorithm compares the pixel densities within an area that determines the threshold to use to convert pixels monochromatic.

Auto-Rotation and Deskewing

IronOCR looks for lines of texts and character patterns to automatically deskew and rotate input image resources to the desired orientation.

Adaptive Noise Removal

With IronOCR, image files are automatically analyzed for the presence and amount of noise. The noise is basically the ‘specks’ found on the scanned images. Our adaptive algorithm then removes the noise based upon the size of noise particles.

As soon as the sample image file is pre-processed, IronOCR then breaks the input image file into different processing zones.

Zoning

Another pre-preparation stage involves breaking the reference image into different logical zones. IronOCR first locates text and pictures within the image with the help of whitespace, and patterns; the text region is separated from images.

It is then partitioned into zones – paragraphs, columns, and text blocks. The images and remaining non-text pixels are identified to be omitted during text recognition and included in the smart output. IronOCR then flags the text zones as tables with the help of gridlines and text blocks.

Text Recognition Capabilities

Perform multiple, inter-connected steps that convert pixel blobs into single-line text threads that users can search. This includes character segmentation, adaptive classification, dictionary references, and other related processes that contribute towards the optimum extracted text.

Tried-and-Tested Multiple Parameters

With IronOCR API service, we have tested our tool through multiple data files examples in multiple languages that include word levels, symbol accuracy, and layout retention in Microsoft Office formats. Although some parameters are automatically tested; others include visual checks.

Connect with IronOCR - the Ideal OCR Cognitive Services Solution

IronOCR lets you add OCR cross-platform capabilities with multiple input formats to a plain text string that you can search. To empower your productivity with IronOCR, get started with our free tutorial documentation that guides you through using IronOCR. Download our NuGet package installer today, and explore with a free trial key or connect with 24/7 personal support. Scale your needs with our lifetime licensing, regardless of your team size.

與...相容 .NET, VB.NET, C#

查看許可證
支持:
  • .NET Framework 4.0及以上版本支持C#、VB、F#
  • Microsoft Visual Studio. .NET 開發 IDE 圖標
  • NuGet 安裝程式支援 Visual Studio
  • JetBrains ReSharper C# 語言助理相容
  • Microsoft Azure C#.NET 託管平台相容

授權與定價

免費社區開發許可證。商業許可證起價$749。

項目C# + VB.NET庫授權

專案

開發人員C# + VB.NET 庫許可證

開發人員

組織 C# + VB.NET 庫授權

組織

代理機構 C# + VB.NET 庫許可

代理商

SaaS C# + VB.NET 程式庫授權

SaaS

OEM C# + VB.NET 庫許可證

OEM

查看完整授權選項  

來自我們 .NET 社群的 OCR 教程

.NET Tesseract 替代方案 | IronOCR

C# Tesseract 光學字符識別

吉姆·貝克是 Iron 的一名開發工程師,負責開發 OCR 產品。

IronOCR 與 Tesseract 在 .NET 中的比較

Jim 一直是 IronOCR 開發方面的領導人物。Jim 設計並構建了影像處理算法及 OCR 閱讀方法。

查看比較
將文字轉換為圖片於 .NET | 教學

C# 光學字符識別 ASP.NET

傑瑪·貝克福德 - 微軟解決方案工程師

圖片文字轉換.NET

了解Gemma的團隊如何使用IronOCR從圖像中讀取文字,以用於他們的歸檔軟件。Gemma分享了她自己的程式碼範例。

影像轉文字 .NET 教程
數以千計的開發者使用 IronOCR 為了...

會計和金融系統

  • # 收據
  • # 報告
  • # 發票列印
為 ASP.NET 會計和財務系統添加 PDF 支持

企業數位化

  • # 文件資料
  • # 訂購與標籤
  • # 紙張替代
C# 業務數位化用例

企業內容管理

  • # 內容製作
  • # 文件管理
  • # 內容分發
.NET CMS PDF 支援

數據和報告應用程式

  • # 效能追蹤
  • # 趨勢映射
  • # 報告
C# PDF 報告
Iron Software 企業 .NET 組件開發者

成千上萬的企業、政府、中小企業和開發人員都信賴 Iron software 產品。

Iron 團隊在 .NET 軟體元件市場有超過 10 年的經驗。

Vireq
醫碼
福利
Nexudus
通用電氣
馬瓦爾
Equinor
澳新銀行