Jan-04-2019, 09:58 PM
Hi Iam having issue geeting text from scanned image using pytesseract. Please help me
Here is the code
Here is the code
from wand.image import Image as Img from PIL import Image import pytesseract import cv2 with Img(filename="JRF-DEO.pdf", resolution=300) as img: img.compression_quality = 99 img.save(filename="sample_scan.jpg") text = pytesseract.image_to_string(Image.open('sample_scan.jpg'))Got below error, but i have already installed tesseract in the system, configured environment valiable to tesseract path, pytesseract and tesseract both are in same path
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) e:\programs\python\python36\lib\site-packages\pytesseract\pytesseract.py in run_tesseract(input_filename, output_filename_base, extension, lang, config, nice) 183 try: --> 184 proc = subprocess.Popen(cmd_args, **subprocess_args()) 185 except OSError: e:\programs\python\python36\lib\subprocess.py in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors) 708 errread, errwrite, --> 709 restore_signals, start_new_session) 710 except: e:\programs\python\python36\lib\subprocess.py in _execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_start_new_session) 996 os.fspath(cwd) if cwd is not None else None, --> 997 startupinfo) 998 finally: FileNotFoundError: [WinError 2] The system cannot find the file specified During handling of the above exception, another exception occurred: TesseractNotFoundError Traceback (most recent call last) <ipython-input-4-2c509bfb5784> in <module>() ----> 1 text = pytesseract.image_to_string(Image.open('sample_scan.jpg')) e:\programs\python\python36\lib\site-packages\pytesseract\pytesseract.py in image_to_string(image, lang, config, nice, output_type) 307 Output.DICT: lambda: {'text': run_and_get_output(*args)}, 308 Output.STRING: lambda: run_and_get_output(*args), --> 309 }[output_type]() 310 311 e:\programs\python\python36\lib\site-packages\pytesseract\pytesseract.py in <lambda>() 306 Output.BYTES: lambda: run_and_get_output(*(args + [True])), 307 Output.DICT: lambda: {'text': run_and_get_output(*args)}, --> 308 Output.STRING: lambda: run_and_get_output(*args), 309 }[output_type]() 310 e:\programs\python\python36\lib\site-packages\pytesseract\pytesseract.py in run_and_get_output(image, extension, lang, config, nice, return_bytes) 216 } 217 --> 218 run_tesseract(**kwargs) 219 filename = kwargs['output_filename_base'] + os.extsep + extension 220 with open(filename, 'rb') as output_file: e:\programs\python\python36\lib\site-packages\pytesseract\pytesseract.py in run_tesseract(input_filename, output_filename_base, extension, lang, config, nice) 184 proc = subprocess.Popen(cmd_args, **subprocess_args()) 185 except OSError: --> 186 raise TesseractNotFoundError() 187 188 status_code, error_string = proc.wait(), proc.stderr.read() TesseractNotFoundError: tesseract is not installed or it's not in your path