Jan-16-2019, 11:17 AM
There are probably many ways to convert a spreadsheet to a python dictionary, here is another one. It uses Libreoffice's soffice command to convert the spreadsheet to xlsx format if necessary (in a temporary directory), then the openpyxl module to read it. Enjoy! This code may work with .ods, .xlsx, .csv, .xls file formats and perhaps others.
EDIT: further development and latest code in this Gist
EDIT: further development and latest code in this Gist
from collections import namedtuple import pathlib import pickle import subprocess as sp import tempfile __version__ = '2019.01.16.1' WorksheetKey = namedtuple('WorksheetKey', "index name") def spreadsheet_to_dict(filename, cachename=None): """Return a dict: {(index, name): <list of tuples>} Arguments filename: a path to a .ods or .xlsx file. It may also work with .csv and .xls and others cachename: if not None, a path to a pickle file where the python dictionary will be stored. If this file exists and its modification time is more recent than that of the spreadsheet file, it is read directly instead of the spreadsheet. Worksheet keys are namedtuples with members .index and .name see also: libreoffice, openpyxl """ filename = pathlib.Path(filename) if cachename: cachename = pathlib.Path(cachename) if cachename.is_file() and ( cachename.stat().st_mtime > filename.stat().st_mtime): return load_cache(cachename) if filename.suffix == '.xlsx': db = {} from openpyxl.reader.excel import load_workbook wb=load_workbook(str(filename)) for i, (ws, name) in enumerate(zip(wb.worksheets, wb.sheetnames)): db[WorksheetKey(i, name)] = list(ws.values) if cachename: dump_cache(db, cachename) return db with tempfile.TemporaryDirectory() as tmpd: tmp = pathlib.Path(tmpd) userdir = tmp/'.user' userdir.mkdir() convert_to_xlsx = [ 'soffice', # avoid possible interaction with running instance of libreoffice '-env:UserInstallation={}'.format(userdir.as_uri()), '--headless', '--convert-to', # 'xlsx:"Calc MS Excel 2007 XML"', # <- doesn't work # 'xlsx', # <- works 'xlsx:Calc MS Excel 2007 XML', # <- also works '--outdir', str(tmp), str(filename)] sp.run(convert_to_xlsx, check=True, stdout=sp.DEVNULL) return spreadsheet_to_dict( (tmp/filename.name).with_suffix('.xlsx'), cachename=cachename) def load_cache(cachename): with cachename.open('rb') as ifh: return pickle.load(ifh) def dump_cache(obj, cachename): try: exc_occurred = False try: with cachename.open('wb') as ofh: pickle.dump(obj, ofh) except Exception: exc_occurred = True raise finally: if exc_occurred: try: cachename.unlink() except OSError: pass if __name__ == '__main__': with tempfile.TemporaryDirectory() as tmp: fn = pathlib.Path(tmp)/'tmp_example.csv' with fn.open('w') as ofh: print("""\ Spam,maps,32 Eggs,sgge,64 Bacon,nocab,128""", file=ofh) d = spreadsheet_to_dict(str(fn)) print(d)