Python调用 tesseract将图片转换为文字 - 代码吧

level 10

tmtony 楼主

from PIL import Image
import pytesseract
# 设置 Tesseract 路径（如果环境变量未配置）
# Windows 示例：
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
# Mac/Linux 默认已加入环境变量，通常无需设置
def image_to_text(image_path, lang='eng'):
"""
将图片转换为文字
:param image_path: 图片路径
:param lang: 语言包（默认英文，中文用 'chi_sim'）
:return: 识别出的文本
"""
# 打开图片
img = Image.open(image_path)
# 使用 Tesseract 进行 OCR 识别
text = pytesseract.image_to_string(img, lang=lang)
return text.strip()
# 示例使用
if __name__ == "__main__":
# 英文图片识别
english_text = image_to_text("english_image.png")
print("英文识别结果:\n", english_text)
# 中文图片识别（需下载中文语言包）
chinese_text = image_to_text("chinese_image.jpg", lang='chi_sim')
print("\n中文识别结果:\n", chinese_text)

2025年06月07日 01点06分 1