OCR Live with ROI

murkoc · (This post was last modified: Aug-19-2020, 03:55 PM by Larz60+.)

Hello,

I came across below code on the net. I would like to use this and develop a simple OCR software:

import sys
from os import path

import cv2
import numpy as np

from PyQt5 import QtCore
from PyQt5 import QtWidgets
from PyQt5 import QtGui

import pytesseract
from PIL import Image
from pytesseract import image_to_string
from gtts import gTTS
import os


pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"


tessdata_dir_config = r'--tessdata-dir "C:\Program Files\Tesseract-OCR\tessdata"'



class RecordVideo(QtCore.QObject):
    image_data = QtCore.pyqtSignal(np.ndarray)

    def __init__(self, parent=None):
        super().__init__(parent)
        self.camera = cv2.VideoCapture(0)

        self.timer = QtCore.QBasicTimer()

    def start_recording(self):
        self.timer.start(0, self)

    
    def timerEvent(self, event):
        if (event.timerId() != self.timer.timerId()):
            return

        read, data = self.camera.read()
        if read:
            self.image_data.emit(data)
    def framesave(self):
        
        read, data = self.camera.read()
        if read:
            cv2.imwrite('a.png',data)
            img=Image.fromarray(data)
            img.load()
            
            text=pytesseract.image_to_string(img, lang='spa', config=tessdata_dir_config)
        


class FaceDetectionWidget(QtWidgets.QWidget):
    def __init__(self, parent=None):
        super().__init__(parent)
        self.image = QtGui.QImage()
        self._red = (0, 0, 255)
        self._width = 2
        self._min_size = (30, 30)


    def image_data_slot(self, image_data):


        
        self.image = self.get_qimage(image_data)
        if self.image.size() != self.size():
            self.setFixedSize(self.image.size())

        self.update()
    
        
        
    def get_qimage(self, image: np.ndarray):
        height, width, colors = image.shape
        bytesPerLine = 3 * width
        QImage = QtGui.QImage

        image = QImage(image.data,
                       width,
                       height,
                       bytesPerLine,
                       QImage.Format_RGB888)

        image = image.rgbSwapped()
        return image

def static_ROI(self, cropped:np.ndarray):
    # height, width = image.shape[:2]
    #
    # top_left_x = int(width / 3)
    # top_left_y = int((height / 2) + (height / 4))
    # bottom_right_x = int((width / 3) * 2)
    # bottom_right_y = int((height / 2) - (height / 4))
    #
    # cv2.rectangle(image, (top_left_x, top_left_y), (bottom_right_x, bottom_right_y), 255, 3)
    #
    # image = image[bottom_right_y:top_left_y, top_left_x:bottom_right_x]

    def paintEvent(self, event):
        painter = QtGui.QPainter(self)
        painter.drawImage(0, 0, self.image)
        self.image = QtGui.QImage()


class MainWidget(QtWidgets.QWidget):
    def __init__(self, parent=None):
        super().__init__(parent)
        
        self.face_detection_widget = FaceDetectionWidget()

        # TODO: set video port
        self.record_video = RecordVideo()

        image_data_slot = self.face_detection_widget.image_data_slot
        self.record_video.image_data.connect(image_data_slot)

        layout = QtWidgets.QVBoxLayout()

        layout.addWidget(self.face_detection_widget)
        self.run_button = QtWidgets.QPushButton('Start')
        layout.addWidget(self.run_button)

        self.run_button.clicked.connect(self.record_video.start_recording)

        self.screenshot = QtWidgets.QPushButton('Snap Shot')
        layout.addWidget(self.screenshot)

        self.screenshot.clicked.connect(self.record_video.framesave)
        self.setLayout(layout)


    
def main():
    app = QtWidgets.QApplication(sys.argv)

    main_window = QtWidgets.QMainWindow()
    main_widget = MainWidget()
    main_window.setCentralWidget(main_widget)
    main_window.show()

    sys.exit(app.exec_())


if __name__ == '__main__':

    main()

my problem is with the function:

 def framesave(self):
        
        read, data = self.camera.read()
        if read:
            cv2.imwrite('a.png',data)
            img=Image.fromarray(data)
            img.load()
            
            text=pytesseract.image_to_string(img, lang='spa', config=tessdata_dir_config)

at this part it takes the whole frame but I want to take only the part of it ROI.

Quote:def frame(self):

read, data = self.camera.read()
if read:
cv2.imwrite('char.png', data)
img = Image.fromarray(data)
# _, y = img.height
# _, x = img.width
# width = int(img.shape[1] )
# height = int(img.shape[0])

# img2 = img([width / 2 , height / 3 , width / 3 , height/4])
# rect_img = image_frame[width / 3[1]: height/3[1], upper_left[0]: bottom_right[0]]
img.load()

I would like to learn how to do tesseract process with only insşde ROI.

OCR Live with ROI

User Panel Messages

Announcements