Aug-19-2020, 03:16 PM
Hello,
I came across below code on the net. I would like to use this and develop a simple OCR software:
I would like to learn how to do tesseract process with only insşde ROI.
I came across below code on the net. I would like to use this and develop a simple OCR software:
import sys from os import path import cv2 import numpy as np from PyQt5 import QtCore from PyQt5 import QtWidgets from PyQt5 import QtGui import pytesseract from PIL import Image from pytesseract import image_to_string from gtts import gTTS import os pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" tessdata_dir_config = r'--tessdata-dir "C:\Program Files\Tesseract-OCR\tessdata"' class RecordVideo(QtCore.QObject): image_data = QtCore.pyqtSignal(np.ndarray) def __init__(self, parent=None): super().__init__(parent) self.camera = cv2.VideoCapture(0) self.timer = QtCore.QBasicTimer() def start_recording(self): self.timer.start(0, self) def timerEvent(self, event): if (event.timerId() != self.timer.timerId()): return read, data = self.camera.read() if read: self.image_data.emit(data) def framesave(self): read, data = self.camera.read() if read: cv2.imwrite('a.png',data) img=Image.fromarray(data) img.load() text=pytesseract.image_to_string(img, lang='spa', config=tessdata_dir_config) class FaceDetectionWidget(QtWidgets.QWidget): def __init__(self, parent=None): super().__init__(parent) self.image = QtGui.QImage() self._red = (0, 0, 255) self._width = 2 self._min_size = (30, 30) def image_data_slot(self, image_data): self.image = self.get_qimage(image_data) if self.image.size() != self.size(): self.setFixedSize(self.image.size()) self.update() def get_qimage(self, image: np.ndarray): height, width, colors = image.shape bytesPerLine = 3 * width QImage = QtGui.QImage image = QImage(image.data, width, height, bytesPerLine, QImage.Format_RGB888) image = image.rgbSwapped() return image def static_ROI(self, cropped:np.ndarray): # height, width = image.shape[:2] # # top_left_x = int(width / 3) # top_left_y = int((height / 2) + (height / 4)) # bottom_right_x = int((width / 3) * 2) # bottom_right_y = int((height / 2) - (height / 4)) # # cv2.rectangle(image, (top_left_x, top_left_y), (bottom_right_x, bottom_right_y), 255, 3) # # image = image[bottom_right_y:top_left_y, top_left_x:bottom_right_x] def paintEvent(self, event): painter = QtGui.QPainter(self) painter.drawImage(0, 0, self.image) self.image = QtGui.QImage() class MainWidget(QtWidgets.QWidget): def __init__(self, parent=None): super().__init__(parent) self.face_detection_widget = FaceDetectionWidget() # TODO: set video port self.record_video = RecordVideo() image_data_slot = self.face_detection_widget.image_data_slot self.record_video.image_data.connect(image_data_slot) layout = QtWidgets.QVBoxLayout() layout.addWidget(self.face_detection_widget) self.run_button = QtWidgets.QPushButton('Start') layout.addWidget(self.run_button) self.run_button.clicked.connect(self.record_video.start_recording) self.screenshot = QtWidgets.QPushButton('Snap Shot') layout.addWidget(self.screenshot) self.screenshot.clicked.connect(self.record_video.framesave) self.setLayout(layout) def main(): app = QtWidgets.QApplication(sys.argv) main_window = QtWidgets.QMainWindow() main_widget = MainWidget() main_window.setCentralWidget(main_widget) main_window.show() sys.exit(app.exec_()) if __name__ == '__main__': main()my problem is with the function:
def framesave(self): read, data = self.camera.read() if read: cv2.imwrite('a.png',data) img=Image.fromarray(data) img.load() text=pytesseract.image_to_string(img, lang='spa', config=tessdata_dir_config)at this part it takes the whole frame but I want to take only the part of it ROI.
Quote:def frame(self):
read, data = self.camera.read()
if read:
cv2.imwrite('char.png', data)
img = Image.fromarray(data)
# _, y = img.height
# _, x = img.width
# width = int(img.shape[1] )
# height = int(img.shape[0])
# img2 = img([width / 2 , height / 3 , width / 3 , height/4])
# rect_img = image_frame[width / 3[1]: height/3[1], upper_left[0]: bottom_right[0]]
img.load()
I would like to learn how to do tesseract process with only insşde ROI.