Test in production without watermarks.
Works wherever you need it to.
Get 30 days of fully functional product.
Have it up and running in minutes.
Full access to our support engineering team during your product trial
In this tutorial, you’ll learn how to extract text from multilingual PDF documents using IronOCR in C#. The video walks through setting up IronOCR and installing additional language packs—specifically English and Japanese. You'll see how to configure the OCR engine to support multiple languages and apply it to a sample PDF that includes both English and Japanese text. The tutorial demonstrates how to initialize the OCR engine, define the input file, and extract text using the Read method. The extracted content is then saved to a .txt file, with error handling in place for failed operations. This is a great example of how IronOCR supports global document processing by recognizing multiple languages in a single scan. Whether you're processing multilingual forms, international documents, or PDFs from global sources, this guide shows how easy it is to get accurate, language-aware OCR results in C#.
using IronOcr;
using System;
using System.IO;
class Program
{
static void Main()
{
// Initialize the OCR Engine with specific language packs (English and Japanese)
var ocr = new IronTesseract();
ocr.Language = OcrLanguage.English | OcrLanguage.Japanese;
// Define the path to the PDF file to be processed
var inputPath = @"path/to/your/sample.pdf";
try
{
using (var input = new OcrInput(inputPath))
{
// Extract text from the PDF
var result = ocr.Read(input);
// Define output path for extracted text
var outputPath = @"path/to/output.txt";
// Save the extracted text to a .txt file
File.WriteAllText(outputPath, result.Text);
Console.WriteLine("Text extraction was successful. Check the output file for results.");
}
}
catch (Exception e)
{
// Handle any exceptions that occur during processing
Console.WriteLine("An error occurred: " + e.Message);
}
}
}
using IronOcr;
using System;
using System.IO;
class Program
{
static void Main()
{
// Initialize the OCR Engine with specific language packs (English and Japanese)
var ocr = new IronTesseract();
ocr.Language = OcrLanguage.English | OcrLanguage.Japanese;
// Define the path to the PDF file to be processed
var inputPath = @"path/to/your/sample.pdf";
try
{
using (var input = new OcrInput(inputPath))
{
// Extract text from the PDF
var result = ocr.Read(input);
// Define output path for extracted text
var outputPath = @"path/to/output.txt";
// Save the extracted text to a .txt file
File.WriteAllText(outputPath, result.Text);
Console.WriteLine("Text extraction was successful. Check the output file for results.");
}
}
catch (Exception e)
{
// Handle any exceptions that occur during processing
Console.WriteLine("An error occurred: " + e.Message);
}
}
}
Imports IronOcr
Imports System
Imports System.IO
Friend Class Program
Shared Sub Main()
' Initialize the OCR Engine with specific language packs (English and Japanese)
Dim ocr = New IronTesseract()
ocr.Language = OcrLanguage.English Or OcrLanguage.Japanese
' Define the path to the PDF file to be processed
Dim inputPath = "path/to/your/sample.pdf"
Try
Using input = New OcrInput(inputPath)
' Extract text from the PDF
Dim result = ocr.Read(input)
' Define output path for extracted text
Dim outputPath = "path/to/output.txt"
' Save the extracted text to a .txt file
File.WriteAllText(outputPath, result.Text)
Console.WriteLine("Text extraction was successful. Check the output file for results.")
End Using
Catch e As Exception
' Handle any exceptions that occur during processing
Console.WriteLine("An error occurred: " & e.Message)
End Try
End Sub
End Class
Further Reading: Additional OCR Language Packs