OcrResult 类
IronOCR使用Tesseract 5为它扫描的每个页面返回一个高级结果对象。 此包括每个项目的位置数据、图像、文本、统计置信度、备选符号选择、字体名称、字体大小、字体装饰、字体粗细和位置。
- 页次
- 段落
- 文本行
- 字词
- 单个字符
- 和条形码
using IronOcr; using IronSoftware.Drawing; // We can delve deep into OCR results as an object model of // Pages, Barcodes, Paragraphs, Lines, Words and Characters // This allows us to explore, export and draw OCR content using other APIs/ var ocrTesseract = new IronTesseract(); ocrTesseract.Configuration.ReadBarCodes = true; using var ocrInput = new OcrInput(); var pages = new int[] { 1, 2 }; ocrInput.LoadImageFrames("example.tiff", pages); OcrResult ocrResult = ocrTesseract.Read(ocrInput); foreach (var page in ocrResult.Pages) { // Page object int PageNumber = page.PageNumber; string PageText = page.Text; int PageWordCount = page.WordCount; // null if we dont set Ocr.Configuration.ReadBarCodes = true; OcrResult.Barcode[] Barcodes = page.Barcodes; AnyBitmap PageImage = page.ToBitmap(ocrInput); double PageWidth = page.Width; double PageHeight = page.Height; double PageRotation = page.Rotation; // angular correction in degrees from OcrInput.Deskew() foreach (var paragraph in page.Paragraphs) { // Pages -> Paragraphs int ParagraphNumber = paragraph.ParagraphNumber; string ParagraphText = paragraph.Text; AnyBitmap ParagraphImage = paragraph.ToBitmap(ocrInput); int ParagraphX_location = paragraph.X; int ParagraphY_location = paragraph.Y; int ParagraphWidth = paragraph.Width; int ParagraphHeight = paragraph.Height; double ParagraphOcrAccuracy = paragraph.Confidence; OcrResult.TextFlow paragrapthText_direction = paragraph.TextDirection; foreach (var line in paragraph.Lines) { // Pages -> Paragraphs -> Lines int LineNumber = line.LineNumber; string LineText = line.Text; AnyBitmap LineImage = line.ToBitmap(ocrInput); int LineX_location = line.X; int LineY_location = line.Y; int LineWidth = line.Width; int LineHeight = line.Height; double LineOcrAccuracy = line.Confidence; double LineSkew = line.BaselineAngle; double LineOffset = line.BaselineOffset; foreach (var word in line.Words) { // Pages -> Paragraphs -> Lines -> Words int WordNumber = word.WordNumber; string WordText = word.Text; AnyBitmap WordImage = word.ToBitmap(ocrInput); int WordX_location = word.X; int WordY_location = word.Y; int WordWidth = word.Width; int WordHeight = word.Height; double WordOcrAccuracy = word.Confidence; foreach (var character in word.Characters) { // Pages -> Paragraphs -> Lines -> Words -> Characters int CharacterNumber = character.CharacterNumber; string CharacterText = character.Text; AnyBitmap CharacterImage = character.ToBitmap(ocrInput); int CharacterX_location = character.X; int CharacterY_location = character.Y; int CharacterWidth = character.Width; int CharacterHeight = character.Height; double CharacterOcrAccuracy = character.Confidence; // Output alternative symbols choices and their probability. // Very useful for spellchecking OcrResult.Choice[] Choices = character.Choices; } } } } }
Imports IronOcr Imports IronSoftware.Drawing ' We can delve deep into OCR results as an object model of ' Pages, Barcodes, Paragraphs, Lines, Words and Characters ' This allows us to explore, export and draw OCR content using other APIs/ Private ocrTesseract = New IronTesseract() ocrTesseract.Configuration.ReadBarCodes = True Dim ocrInput As New OcrInput() Dim pages = New Integer() { 1, 2 } ocrInput.LoadImageFrames("example.tiff", pages) Dim ocrResult As OcrResult = ocrTesseract.Read(ocrInput) For Each page In ocrResult.Pages ' Page object Dim PageNumber As Integer = page.PageNumber Dim PageText As String = page.Text Dim PageWordCount As Integer = page.WordCount ' null if we dont set Ocr.Configuration.ReadBarCodes = true; Dim Barcodes() As OcrResult.Barcode = page.Barcodes Dim PageImage As AnyBitmap = page.ToBitmap(ocrInput) Dim PageWidth As Double = page.Width Dim PageHeight As Double = page.Height Dim PageRotation As Double = page.Rotation ' angular correction in degrees from OcrInput.Deskew() For Each paragraph In page.Paragraphs ' Pages -> Paragraphs Dim ParagraphNumber As Integer = paragraph.ParagraphNumber Dim ParagraphText As String = paragraph.Text Dim ParagraphImage As AnyBitmap = paragraph.ToBitmap(ocrInput) Dim ParagraphX_location As Integer = paragraph.X Dim ParagraphY_location As Integer = paragraph.Y Dim ParagraphWidth As Integer = paragraph.Width Dim ParagraphHeight As Integer = paragraph.Height Dim ParagraphOcrAccuracy As Double = paragraph.Confidence Dim paragrapthText_direction As OcrResult.TextFlow = paragraph.TextDirection For Each line In paragraph.Lines ' Pages -> Paragraphs -> Lines Dim LineNumber As Integer = line.LineNumber Dim LineText As String = line.Text Dim LineImage As AnyBitmap = line.ToBitmap(ocrInput) Dim LineX_location As Integer = line.X Dim LineY_location As Integer = line.Y Dim LineWidth As Integer = line.Width Dim LineHeight As Integer = line.Height Dim LineOcrAccuracy As Double = line.Confidence Dim LineSkew As Double = line.BaselineAngle Dim LineOffset As Double = line.BaselineOffset For Each word In line.Words ' Pages -> Paragraphs -> Lines -> Words Dim WordNumber As Integer = word.WordNumber Dim WordText As String = word.Text Dim WordImage As AnyBitmap = word.ToBitmap(ocrInput) Dim WordX_location As Integer = word.X Dim WordY_location As Integer = word.Y Dim WordWidth As Integer = word.Width Dim WordHeight As Integer = word.Height Dim WordOcrAccuracy As Double = word.Confidence For Each character In word.Characters ' Pages -> Paragraphs -> Lines -> Words -> Characters Dim CharacterNumber As Integer = character.CharacterNumber Dim CharacterText As String = character.Text Dim CharacterImage As AnyBitmap = character.ToBitmap(ocrInput) Dim CharacterX_location As Integer = character.X Dim CharacterY_location As Integer = character.Y Dim CharacterWidth As Integer = character.Width Dim CharacterHeight As Integer = character.Height Dim CharacterOcrAccuracy As Double = character.Confidence ' Output alternative symbols choices and their probability. ' Very useful for spellchecking Dim Choices() As OcrResult.Choice = character.Choices Next character Next word Next line Next paragraph Next page
Install-Package IronOcr
IronOCR使用Tesseract 5为它扫描的每个页面返回一个高级结果对象。 此包括每个项目的位置数据、图像、文本、统计置信度、备选符号选择、字体名称、字体大小、字体装饰、字体粗细和位置。