Python Forum

Full Version: Model ID error
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
Hi Expert,

I am trying to use model id for form recognizer py script. Here is my code . currently I am using endpoint url and api key and data is getting export .but when i use model id it is not taking that from that. how can use it .. here is the code what i tried



from django.shortcuts import render
import os
from django.http import HttpResponse
import csv
import re
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient
from azure.storage.blob import BlobClient


# Create your views here.

def download_blob(blob_name, output_path):
"""
Download
:param blob_name:
:param output_path:
:return:
"""
_, filename = os.path.split(blob_name)
destination_file = os.path.join(output_path, filename)

blob_client = BlobClient.from_connection_string(
conn_str='DefaultEndpointsProtocol=https;AccountName=demoretail;AccountKey=jSZtsbMoGpmViFuWtTXDwEJEktIs24oUAIPSz9tSiZ25zCPe0mFRWC6V0gvlZCcGU0HcxCTdV1GsAl5vMwnanA==;EndpointSuffix=core.windows.net',
container_name='demo',
blob_name=blob_name
)
with open(destination_file, "wb") as my_blob:
blob_data = blob_client.download_blob()
blob_data.readinto(my_blob)

return destination_file


def recognize_form_tables(form_path):
endpoint = https://Test1.cognitiveservices.azure.com/
credential = AzureKeyCredential("c82cdbb3ad62438b9e77e7d0dffdf")
Modelid=("ddkjdhhkjjksjs")
form_recognizer_client = FormRecognizerClient(endpoint, credential)

with open(form_path, "rb") as fd:
form = fd.read()

os.remove(form_path)

response = form_recognizer_client.begin_recognize_content(form)
form_pages = response.result()

tables = []
table_label_data = []
port_regex = '^col1:(.*)'
header_regex = '.*col1:(.*)Area Name:(.*)Month Reporting:\s*([A-Za-z]{3}-[0-9]{2}).*'

table_index = -1
for content in form_pages:
for table in content.tables:
tables.append(table)

table_header = ''
i = 0
flag = False
for line_idx, line in enumerate(content.lines):
port_line = re.findall(port_regex, line.text)
if port_line:
table_index += 1
i = 0
flag = True

if flag and i < 10 :
table_header += line.text + ' '

if i == 10:
header_match = re.match(header_regex, table_header)
if header_match:
gr = header_match.groups()
table_label_data.append([gr[0], gr[1], gr[2]])
table_header = ''
flag = False

i += 1

return tables, table_label_data


def create_csv(table, path):
with open(path, 'a') as f:
writer = csv.writer(f)
for row in table:
if len(row) < 10 or not row[3]:
continue
writer.writerow(row)


def create_csv_data(tables, table_label_data):
count = 0
for t in tables:
count += 1
table_data = []
row_index = -1
for cell in t.cells:
cell = cell.to_dict()

if count > 1 and 'is_header' in cell and cell['is_header']:
continue
elif cell['row_index'] == row_index or (count > 1 and cell['row_index'] == row_index + 1):
table_data[row_index].append(cell['text'])
else:
row_index += 1
if 'is_header' in cell and cell['is_header']:
table_data.append(['Port', 'Area Name', 'Month Reporting'])
else:
table_data.append([])
if len(table_label_data) > count:
table_data[row_index] = table_label_data[count - 1] + table_data[row_index]
table_data[row_index].append(cell['text'])

create_csv(table_data, f'table.csv')
print('Created or updated table.csv file.')


def index(request):
form_path = download_blob('Test_for_MARCH_2022.pdf', '')
tables, table_label_data = recognize_form_tables(form_path)
print('form recognize success')
create_csv_data(tables, table_label_data)
with open('table.csv', newline='') as in_file:
with open('Test.csv', 'w', newline='') as out_file:
writer = csv.writer(out_file)
for row in csv.reader(in_file):
if row:
writer.writerow(row)
return HttpResponse("Load Succeeded")