Fix Image Orientation
Rotate
This function rotates images by a specified number of degrees in a clockwise direction. If you wish to rotate the image in an anti-clockwise direction, you can use negative numbers. Below is an example in Python:
from PIL import Image
def rotate_image(image_path, degrees):
"""
Rotates an image by the specified number of degrees.
Parameters:
image_path (str): The path to the image file.
degrees (float): The number of degrees to rotate the image. Positive values rotate clockwise, negative values rotate anti-clockwise.
Returns:
Image object: The rotated image.
"""
# Open an image file
with Image.open(image_path) as img:
# Rotate the image by the specified degrees
rotated_image = img.rotate(degrees, expand=True)
return rotated_image
# Example usage:
rotated_img = rotate_image('example.jpg', 45)
rotated_img.show()
from PIL import Image
def rotate_image(image_path, degrees):
"""
Rotates an image by the specified number of degrees.
Parameters:
image_path (str): The path to the image file.
degrees (float): The number of degrees to rotate the image. Positive values rotate clockwise, negative values rotate anti-clockwise.
Returns:
Image object: The rotated image.
"""
# Open an image file
with Image.open(image_path) as img:
# Rotate the image by the specified degrees
rotated_image = img.rotate(degrees, expand=True)
return rotated_image
# Example usage:
rotated_img = rotate_image('example.jpg', 45)
rotated_img.show()
Deskew
The deskew
function is used to correct the orientation of an image, ensuring it is perfectly aligned and orthogonal. This is particularly useful when working with Optical Character Recognition (OCR), as the accuracy of tools like Tesseract can be compromised by skewed images. Here's an example implementation:
import cv2
import numpy as np
def deskew_image(image_path):
"""
Automatically deskews an image to make it orthogonal.
Parameters:
image_path (str): The path to the image file.
Returns:
Image: The deskewed image.
"""
# Read the image
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
# Thresholding to binarize the image
_, binary_img = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Finding the coordinates of non-zero pixels
coords = np.column_stack(np.where(binary_img > 0))
# Getting the angle to rotate
angle = cv2.minAreaRect(coords)[-1]
# Correct the angle
if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
# Get the image center
(h, w) = img.shape[:2]
center = (w // 2, h // 2)
# Rotate the image
M = cv2.getRotationMatrix2D(center, angle, 1.0)
deskewed_img = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return deskewed_img
# Example usage:
deskewed_img = deskew_image('example.jpg')
cv2.imshow('Deskewed Image', deskewed_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
def deskew_image(image_path):
"""
Automatically deskews an image to make it orthogonal.
Parameters:
image_path (str): The path to the image file.
Returns:
Image: The deskewed image.
"""
# Read the image
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
# Thresholding to binarize the image
_, binary_img = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Finding the coordinates of non-zero pixels
coords = np.column_stack(np.where(binary_img > 0))
# Getting the angle to rotate
angle = cv2.minAreaRect(coords)[-1]
# Correct the angle
if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
# Get the image center
(h, w) = img.shape[:2]
center = (w // 2, h // 2)
# Rotate the image
M = cv2.getRotationMatrix2D(center, angle, 1.0)
deskewed_img = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return deskewed_img
# Example usage:
deskewed_img = deskew_image('example.jpg')
cv2.imshow('Deskewed Image', deskewed_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Scale
The Scale
function is used to resize or scale OCR input pages while maintaining their aspect ratio. This operation is crucial when dealing with images of various sizes, ensuring they are processed uniformly. Here is an example with explanations:
from PIL import Image
def scale_image(image_path, max_size):
"""
Scales an image proportionally to a specified maximum size for either width or height.
Parameters:
image_path (str): The path to the image file.
max_size (int): The maximum size for the width or height of the scaled image.
Returns:
Image object: The scaled image.
"""
# Open an image file
with Image.open(image_path) as img:
# Get current size
original_size = img.size
# Calculate the scaling factor
scaling_factor = min(max_size / original_size[0], max_size / original_size[1])
# Calculate the new size
new_size = (int(original_size[0] * scaling_factor), int(original_size[1] * scaling_factor))
# Resize the image
scaled_img = img.resize(new_size, Image.ANTIALIAS)
return scaled_img
# Example usage:
scaled_img = scale_image('example.jpg', 800)
scaled_img.show()
from PIL import Image
def scale_image(image_path, max_size):
"""
Scales an image proportionally to a specified maximum size for either width or height.
Parameters:
image_path (str): The path to the image file.
max_size (int): The maximum size for the width or height of the scaled image.
Returns:
Image object: The scaled image.
"""
# Open an image file
with Image.open(image_path) as img:
# Get current size
original_size = img.size
# Calculate the scaling factor
scaling_factor = min(max_size / original_size[0], max_size / original_size[1])
# Calculate the new size
new_size = (int(original_size[0] * scaling_factor), int(original_size[1] * scaling_factor))
# Resize the image
scaled_img = img.resize(new_size, Image.ANTIALIAS)
return scaled_img
# Example usage:
scaled_img = scale_image('example.jpg', 800)
scaled_img.show()