Feb-23-2023, 06:38 PM
I think I have failed to set up my virtual environment correctly. I am now unable to access any pip installed packages. I have the following directory: C:\Users\stand\PDFExtractTest\venv. In venv I have a file extract.py that I am trying to run in VS Code. It worked a few times, and now does not even though I made no changes.
from adobe.pdfservices.operation.auth.credentials import Credentials from adobe.pdfservices.operation.exception.exceptions import ServiceApiException, ServiceUsageException, SdkException from adobe.pdfservices.operation.pdfops.options.extractpdf.extract_pdf_options import ExtractPDFOptions from adobe.pdfservices.operation.pdfops.options.extractpdf.extract_element_type import ExtractElementType from adobe.pdfservices.operation.pdfops.options.extractpdf.extract_renditions_element_type import \ ExtractRenditionsElementType from adobe.pdfservices.operation.pdfops.options.extractpdf.table_structure_type import TableStructureType from adobe.pdfservices.operation.execution_context import ExecutionContext from adobe.pdfservices.operation.io.file_ref import FileRef from adobe.pdfservices.operation.pdfops.extract_pdf_operation import ExtractPDFOperation import os.path import zipfile import json output_zip = "./ExtractTextInfoFromPDF.zip" if os.path.isfile(output_zip): os.remove(output_zip) input_pdf = "C:/Users/stand/PDFExtractTest/venv/Docshospitals.pdf" try: #Initial setup, create credentials instance. credentials = Credentials.service_account_credentials_builder()\ .from_file("C:/Users/stand/PDFExtractTest/venv/pdfservices-api-credentials.json") \ .build() #Create an ExecutionContext using credentials and create a new operation instance. execution_context = ExecutionContext.create(credentials) extract_pdf_operation = ExtractPDFOperation.create_new() #Set operation input from a source file. source = FileRef.create_from_local_file(input_pdf) extract_pdf_operation.set_input(source) # Build ExtractPDF options and set them into the operation extract_pdf_options: ExtractPDFOptions = ExtractPDFOptions.builder() \ .with_elements_to_extract([ExtractElementType.TEXT, ExtractElementType.TABLES]) \ .with_element_to_extract_renditions(ExtractRenditionsElementType.TABLES) \ .with_table_structure_format(TableStructureType.CSV) \ .build() extract_pdf_operation.set_options(extract_pdf_options) #Build ExtractPDF options and set them into the operation #extract_pdf_options: ExtractPDFOptions = ExtractPDFOptions.builder() \ # .with_element_to_extract(ExtractElementType.TEXT) \ # .build() # extract_pdf_operation.set_options(extract_pdf_options) #Execute the operation. result: FileRef = extract_pdf_operation.execute(execution_context) #Save the result to the specified location. result.save_as(output_zip) print("Successfully extracted information from PDF. Printing H1 Headers:\n"); archive = zipfile.ZipFile(output_zip, 'r') jsonentry = archive.open('structuredData.json') jsondata = jsonentry.read() data = json.loads(jsondata) for element in data["elements"]: if(element["Path"].endswith("/H1")): print(element["Text"]) except (ServiceApiException, ServiceUsageException, SdkException): logging.exception("Exception encountered while executing operation")Import statements to adobe.pdfservices all indicate that "the import could not be resolved". I trotted over to my python installation, using a pip list in scripts file and find that I indeed have pdfservices-sdk 2.1.2 there. I have python path set in environmental variable. And indeed, just to test I tried to import some other packages sitting there and none "pop up" as I type them in after "import". Have I done something wrong with the virtual environment thing?