将 tiff 图像文件转换为文本

将 tiff 图像文件转换为文本

将 tiff 图像转换为文本时出现错误。这是我的代码

import Image
import subprocess
import util
import errors
tesseract_exe_name = 'tesseract' # Name of executable to be called at command line
scratch_image_name = "temp.bmp" # This file must be .bmp or other Tesseract-compatible format
scratch_text_name_root = "temp" # Leave out the .txt extension
cleanup_scratch_flag = True  # Temporary files cleaned up after OCR operation
def call_tesseract(input_filename, output_filename):
    """Calls external tesseract.exe on input file (restrictions on types),
    outputting output_filename+'txt'"""
    args = [tesseract_exe_name, input_filename, output_filename]
    proc = subprocess.Popen(args)
    retcode = proc.wait()
    if retcode!=0:
        errors.check_for_errors()
def image_to_string(im, cleanup = cleanup_scratch_flag):
    """Converts im to file, applies tesseract, and fetches resulting text.
    If cleanup=True, delete scratch files after operation."""
    try:
        util.image_to_scratch(im, scratch_image_name)
        call_tesseract(scratch_image_name, scratch_text_name_root)
        text = util.retrieve_text(scratch_text_name_root)
    finally:
        if cleanup:
            util.perform_cleanup(scratch_image_name, scratch_text_name_root)
    return text

def image_file_to_string(filename, cleanup = cleanup_scratch_flag, graceful_errors=True):
    """Applies tesseract to filename; or, if image is incompatible and graceful_errors=True,
    converts to compatible format and then applies tesseract.  Fetches resulting text.
    If cleanup=True, delete scratch files after operation."""
    try:
        try:
            call_tesseract(filename, scratch_text_name_root)
            text = util.retrieve_text(scratch_text_name_root)
        except errors.Tesser_General_Exception:
            if graceful_errors:
                im = Image.open(filename)
                text = image_to_string(im, cleanup)
            else:
                raise
    finally:
        if cleanup:
            util.perform_cleanup(scratch_image_name, scratch_text_name_root)
    return text
if __name__=='__main__':
    im = Image.open("/home/oomsys/phototest.tif")
    text = image_to_string(im)
    print text
    try:
        text = image_file_to_string('fnord.tif', graceful_errors=False)
    except errors.Tesser_General_Exception, value:
        print "fnord.tif is incompatible filetype.  Try graceful_errors=True"
        print value
    text = image_file_to_string('fnord.tif', graceful_errors=True)
    print "fnord.tif contents:", text
    text = image_file_to_string('fonts_test.png', graceful_errors=True)
    print text

这是我的错误


Traceback (most recent call last):
  File "pytesser.py", line 60, in <module>
    text = image_to_string(im)
  File "pytesser.py", line 31, in image_to_string
    call_tesseract(scratch_image_name, scratch_text_name_root)
  File "pytesser.py", line 21, in call_tesseract
    proc = subprocess.Popen(args)
  File "/usr/lib/python2.7/subprocess.py", line 679, in __init__
    errread, errwrite)
  File "/usr/lib/python2.7/subprocess.py", line 1239, in _execute_child
    raise child_exception
OSError: [Errno 2] No such file or directory

答案1

您需要tesseract安装。请按照此处提供的步骤操作https://help.ubuntu.com/community/Tesseract3

相关内容