May-10-2022, 07:53 PM
Hi Expert,
I am trying to use model id for form recognizer py script. Here is my code . currently I am using endpoint url and api key and data is getting export .but when i use model id it is not taking that from that. how can use it .. here is the code what i tried
from django.shortcuts import render
import os
from django.http import HttpResponse
import csv
import re
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient
from azure.storage.blob import BlobClient
# Create your views here.
def download_blob(blob_name, output_path):
"""
Download
:param blob_name:
:param output_path:
:return:
"""
_, filename = os.path.split(blob_name)
destination_file = os.path.join(output_path, filename)
blob_client = BlobClient.from_connection_string(
conn_str='DefaultEndpointsProtocol=https;AccountName=demoretail;AccountKey=jSZtsbMoGpmViFuWtTXDwEJEktIs24oUAIPSz9tSiZ25zCPe0mFRWC6V0gvlZCcGU0HcxCTdV1GsAl5vMwnanA==;EndpointSuffix=core.windows.net',
container_name='demo',
blob_name=blob_name
)
with open(destination_file, "wb") as my_blob:
blob_data = blob_client.download_blob()
blob_data.readinto(my_blob)
return destination_file
def recognize_form_tables(form_path):
endpoint = https://Test1.cognitiveservices.azure.com/
credential = AzureKeyCredential("c82cdbb3ad62438b9e77e7d0dffdf")
Modelid=("ddkjdhhkjjksjs")
form_recognizer_client = FormRecognizerClient(endpoint, credential)
with open(form_path, "rb") as fd:
form = fd.read()
os.remove(form_path)
response = form_recognizer_client.begin_recognize_content(form)
form_pages = response.result()
tables = []
table_label_data = []
port_regex = '^col1:(.*)'
header_regex = '.*col1:(.*)Area Name:(.*)Month Reporting:\s*([A-Za-z]{3}-[0-9]{2}).*'
table_index = -1
for content in form_pages:
for table in content.tables:
tables.append(table)
table_header = ''
i = 0
flag = False
for line_idx, line in enumerate(content.lines):
port_line = re.findall(port_regex, line.text)
if port_line:
table_index += 1
i = 0
flag = True
if flag and i < 10 :
table_header += line.text + ' '
if i == 10:
header_match = re.match(header_regex, table_header)
if header_match:
gr = header_match.groups()
table_label_data.append([gr[0], gr[1], gr[2]])
table_header = ''
flag = False
i += 1
return tables, table_label_data
def create_csv(table, path):
with open(path, 'a') as f:
writer = csv.writer(f)
for row in table:
if len(row) < 10 or not row[3]:
continue
writer.writerow(row)
def create_csv_data(tables, table_label_data):
count = 0
for t in tables:
count += 1
table_data = []
row_index = -1
for cell in t.cells:
cell = cell.to_dict()
if count > 1 and 'is_header' in cell and cell['is_header']:
continue
elif cell['row_index'] == row_index or (count > 1 and cell['row_index'] == row_index + 1):
table_data[row_index].append(cell['text'])
else:
row_index += 1
if 'is_header' in cell and cell['is_header']:
table_data.append(['Port', 'Area Name', 'Month Reporting'])
else:
table_data.append([])
if len(table_label_data) > count:
table_data[row_index] = table_label_data[count - 1] + table_data[row_index]
table_data[row_index].append(cell['text'])
create_csv(table_data, f'table.csv')
print('Created or updated table.csv file.')
def index(request):
form_path = download_blob('Test_for_MARCH_2022.pdf', '')
tables, table_label_data = recognize_form_tables(form_path)
print('form recognize success')
create_csv_data(tables, table_label_data)
with open('table.csv', newline='') as in_file:
with open('Test.csv', 'w', newline='') as out_file:
writer = csv.writer(out_file)
for row in csv.reader(in_file):
if row:
writer.writerow(row)
return HttpResponse("Load Succeeded")
I am trying to use model id for form recognizer py script. Here is my code . currently I am using endpoint url and api key and data is getting export .but when i use model id it is not taking that from that. how can use it .. here is the code what i tried
from django.shortcuts import render
import os
from django.http import HttpResponse
import csv
import re
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient
from azure.storage.blob import BlobClient
# Create your views here.
def download_blob(blob_name, output_path):
"""
Download
:param blob_name:
:param output_path:
:return:
"""
_, filename = os.path.split(blob_name)
destination_file = os.path.join(output_path, filename)
blob_client = BlobClient.from_connection_string(
conn_str='DefaultEndpointsProtocol=https;AccountName=demoretail;AccountKey=jSZtsbMoGpmViFuWtTXDwEJEktIs24oUAIPSz9tSiZ25zCPe0mFRWC6V0gvlZCcGU0HcxCTdV1GsAl5vMwnanA==;EndpointSuffix=core.windows.net',
container_name='demo',
blob_name=blob_name
)
with open(destination_file, "wb") as my_blob:
blob_data = blob_client.download_blob()
blob_data.readinto(my_blob)
return destination_file
def recognize_form_tables(form_path):
endpoint = https://Test1.cognitiveservices.azure.com/
credential = AzureKeyCredential("c82cdbb3ad62438b9e77e7d0dffdf")
Modelid=("ddkjdhhkjjksjs")
form_recognizer_client = FormRecognizerClient(endpoint, credential)
with open(form_path, "rb") as fd:
form = fd.read()
os.remove(form_path)
response = form_recognizer_client.begin_recognize_content(form)
form_pages = response.result()
tables = []
table_label_data = []
port_regex = '^col1:(.*)'
header_regex = '.*col1:(.*)Area Name:(.*)Month Reporting:\s*([A-Za-z]{3}-[0-9]{2}).*'
table_index = -1
for content in form_pages:
for table in content.tables:
tables.append(table)
table_header = ''
i = 0
flag = False
for line_idx, line in enumerate(content.lines):
port_line = re.findall(port_regex, line.text)
if port_line:
table_index += 1
i = 0
flag = True
if flag and i < 10 :
table_header += line.text + ' '
if i == 10:
header_match = re.match(header_regex, table_header)
if header_match:
gr = header_match.groups()
table_label_data.append([gr[0], gr[1], gr[2]])
table_header = ''
flag = False
i += 1
return tables, table_label_data
def create_csv(table, path):
with open(path, 'a') as f:
writer = csv.writer(f)
for row in table:
if len(row) < 10 or not row[3]:
continue
writer.writerow(row)
def create_csv_data(tables, table_label_data):
count = 0
for t in tables:
count += 1
table_data = []
row_index = -1
for cell in t.cells:
cell = cell.to_dict()
if count > 1 and 'is_header' in cell and cell['is_header']:
continue
elif cell['row_index'] == row_index or (count > 1 and cell['row_index'] == row_index + 1):
table_data[row_index].append(cell['text'])
else:
row_index += 1
if 'is_header' in cell and cell['is_header']:
table_data.append(['Port', 'Area Name', 'Month Reporting'])
else:
table_data.append([])
if len(table_label_data) > count:
table_data[row_index] = table_label_data[count - 1] + table_data[row_index]
table_data[row_index].append(cell['text'])
create_csv(table_data, f'table.csv')
print('Created or updated table.csv file.')
def index(request):
form_path = download_blob('Test_for_MARCH_2022.pdf', '')
tables, table_label_data = recognize_form_tables(form_path)
print('form recognize success')
create_csv_data(tables, table_label_data)
with open('table.csv', newline='') as in_file:
with open('Test.csv', 'w', newline='') as out_file:
writer = csv.writer(out_file)
for row in csv.reader(in_file):
if row:
writer.writerow(row)
return HttpResponse("Load Succeeded")