May-07-2020, 03:28 PM
from PIL import Image #pip install Pillow import pytesseract #pip install pytesseract import argparse #pip install argparse import cv2 #pip install opencv-python import os #Standard python library import d3dshot #pip install D3DShot import tkinter as tk #Standard python library from pynput.mouse import Button, Controller #pip install pynput from pynput.mouse import Listener #pip install pynput import logging #Typically standard library but pip install logging #I am using this with the Kite AI autocomplete engine #This program takes a specific region "region=(150, 70, 1700, 1010)" #then creates a png and prints converted text to terminal # With TK the fg is the text color # This will take the mouse position from 2 locations and then take a screen shot and then convert to text #need to add button for next and ?back? button, #need an option for How many pages or locate the total pages and run ocr to track progress #errors if the pynput mouse listener is left going too long the terminal will become unresponsive #solutions?-set timer or try/finally exception or both to notify user the time out and restart #the memory buffer space could be overflowing and causing the unresponsivness root = tk.Tk() canvas1 = tk.Canvas(root, width = 300, height = 300) #dimentions of program window canvas1.pack() mouse = Controller() #global variables window_height = 0 window_width = 0 left = 0 top = 0 right = 0 bottom = 0 #log file for mouse values logging.basicConfig(filename="mouse_log.txt", level=logging.DEBUG, format='%(asctime)s: %(message)s') #functions called with mouse input to computer #def on_move(x, y): i do not need this right now #try: #print('Pointer moved to {0}'.format( #(x, y))) #except : #print("exception on mouse move") def on_click(x, y, button, pressed): try: print('{0} at {1}'.format( 'Pressed' if pressed else 'Released', (x, y))) global window_width window_width = x global window_height window_height = y print('value x: {0} and value y: {1}'.format(window_width,window_height)) if not pressed: # Stop listener return False except: print("exception on the on mouse click") def on_scroll(x, y, dx, dy): try: print('Scrolled {0}'.format( (x, y))) except: print("exception on mouse scroll") #Capture/Record Screen->create raw image file->process raw image->run OCR for text output to terminal->output processed image file def capture_screen (): d = d3dshot.create() screen = d.screenshot_to_disk(region=(left, top, right, bottom)) print(screen) image = cv2.imread(screen) cv2.imshow("image", image) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] filename = "{}.png".format(os.getpid()) print(filename) cv2.imwrite(filename, gray) text = pytesseract.image_to_string(Image.open(filename)) os.remove(filename) print(text) cv2.imshow("Output", gray) #when button 1 pressed listen to mouse input, when mouse input stop output to label 1 def grab_mouse1 (): try: with Listener(on_click=on_click,on_scroll=on_scroll) as listener: listener.join() except : print("exception on the listener") global left left = window_width global top top = window_height print('value1 x: {0} and value1 y: {1}'.format(window_width,window_height)) lbl_topleft = tk.Label(root, text = 'Mouse clicked at (x:{0} and y: {1})'.format(left, top), fg = 'green', font =('helvetica', 12, 'bold')) canvas1.create_window(150, 200, window = lbl_topleft) #when button 2 pressed listen to mouse input, when mouse input stop output to label 2 def grab_mouse2 (): try: with Listener(on_click=on_click,on_scroll=on_scroll) as listener: listener.join() except : print("exception on the listener") global right right = window_width global bottom bottom = window_height print('value2 x: {0} and value2 y: {1}'.format(window_width,window_height)) lbl_bottomright = tk.Label(root, text = 'Mouse clicked at (x:{0} and y: {1})'.format(right, bottom), fg = 'green', font =('helvetica', 12, 'bold')) canvas1.create_window(150, 220, window = lbl_bottomright) #needed to locate the next button to turn the page for main record loop def locate_next(): print("hello") #needed to locate the page numbers to order the images and know the end of record loop def locate_page_number(): print("page numbers") #create buttons and call functions and loop program btn_topleft = tk.Button(text ='Top Left of Window', command = grab_mouse1, bg = 'brown', fg = 'white') btn_bottomright = tk.Button(text ='Bottom Right of Window', command = grab_mouse2, bg = 'brown', fg = 'white') btn_locatenext = tk.Button(text = 'Locate Next Button', command = locate_next, bg = 'brown', fg = 'white') btn_locatepagenum = tk.Button(text = 'Locate Page Numbers', command = locate_page_number, bg = 'brown', fg = 'white') btn_recordscreen = tk.Button(text ='Capture screen', command = capture_screen, bg = 'brown', fg = 'white') canvas1.create_window(5, 20, anchor='w', window = btn_topleft) canvas1.create_window(5, 50, anchor='w', window = btn_bottomright) canvas1.create_window(5, 80, anchor='w', window = btn_locatenext) canvas1.create_window(5, 110, anchor='w', window = btn_locatepagenum) canvas1.create_window(5, 140, anchor='w', window = btn_recordscreen) root.mainloop() cv2.waitKey(0)this is the current version of my code at the time of this writing, here is a link to my onedrive for the file "textDoc.py" that i am troubleshooting
How the program works:
1.select the "Top left of window" button, then mouse click the top left corner of where you want to record the screen
2.select the "Bottom Right of window" button, then mouse click the bottom right corner of where you want to record the screen
3.select the "locate next button", then mouse click where the next page button is on the record screen //this function is not programmed and not available
4.select the "locate page numbers", then mouse click where the page number is shown //this function is not programmed and not available
5.select the "capture screen", this will calculate the screen region from step 1&2 and create a image and produce a file, then convert the file to gray scale "black&white", then preform OCR on the image and print to terminal, then produce gray scale file
The problem i am having:
I follow step 1 or 2 and click at necessary corner, after I click if I wait too long the program becomes unresponsive, if i click on my terminal/cmd prompt(im using windows)-> the program finish printing and becomes responsive
I have tried to use try: & exception: to determine the hang up, this is problem i can live with but if i ever decide to share this program the end user will not understand the issue.
I have considered a timer and jump out of the mouse click and require restart of step, not sure how or where to do this.
Please let me know your thoughts or program suggestions, try running the code for yourself, follow this thread or save the link as i will continue to develop this program.
FYI the other .py programs in the folder i have used to test example code and do not link to the "textDoc.py" program