Python Forum
A Neat way to use pathlib
Thread Rating:
  • 1 Vote(s) - 3 Average
  • 1
  • 2
  • 3
  • 4
  • 5
A Neat way to use pathlib
#1
I have a directory structure which is composed of Countries, States or other subdivision, and Cities.
Here's a way to use pathlib objects in a dictionary to containerize the hierarchy

from pathlib import Path
import os
import inspect


class CountryInfo:
    def __init__(self):
        os.chdir(os.path.dirname(__file__))

        self.home = Path('.')
        self.home.mkdir(exist_ok=True)

        self.bpaths = {
            'data': self.home / '..' / 'data',
            'Country':  {
                'country': self.home / '..' / 'data' / 'Country',
                'USA': {
                    'usa': self.home / '..' / 'data' / 'Country' / 'USA',
                    'Alaska': self.home / '..' / 'data' / 'Country' / 'USA' / 'Alaska',
                    'Alabama': self.home / '..' / 'data' / 'Country' / 'USA' / 'Alabama', 
                    'Arkansas': self.home / '..' / 'data' / 'Country' / 'USA' / 'Arkansas',
                    'American Samoa': self.home / '..' / 'data' / 'Country' / 'USA' / 'AmericanSamoa',
                    'Arizona': self.home / '..' / 'data' / 'Country' / 'USA' / 'Arizona'
                    # ...
                }
            }
        }
        self.create_all_directories(self.bpaths)

    def get_dir_contents(self, path):
        dir_dict = None
        if isinstance(path, Path) and path.exists():
            entries = [entry for entry in path.iterdir()]
            dir_dict = {}
            for entry in entries:
                dd = dir_dict[f"'{entry.name}'"] = {}
                dd['type'] = self.get_type(entry)
                # print(f'entry: {entry.name}, type {type(entry)}')
            print(quit())
        return dir_dict

    def get_dir_contents(self, path):
        if isinstance(path, Path) and path.exists():
            return [entry for entry in path.iterdir()]
        return None

    def create_all_directories(self, path):
        for key, value in path.items():
            if isinstance(value, dict):
                self.create_all_directories(value)
            elif isinstance(value, Path)  and not value.is_file():
                value.mkdir(exist_ok=True)

def testit():
    bp = CountryInfo()

    Arizona = bp.bpaths['Country']['USA']['Arizona']
    Scottsdale = Arizona / 'Scottsdale'

    files = bp.get_dir_contents(Scottsdale)
    if files is not None:
        for file in files:
            print(f'{file}')
    else:
        print('Scottsdale directory is empty')


if __name__ == '__main__':
    testit()
results of example:
Output:
../data/Country/USA/Arizona/Scottsdale/coslicense.pdf ../data/Country/USA/Arizona/Scottsdale/ct_BusinessLicences.csv
Since the elements of the dictionary are pathlib objects, they inherit all of the methods of pathlib
for example:
print(f'Scottsdale path: {Scottsdale.resolve()})
will return the absolute path of the Scottsdale directory (I replaced root paths with ...):
Output:
Scottsdale path: .../Data-2TB/BusinessLists/data/Country/USA/Arizona/Scottsdale
Upodate: Added some error checking, and a new method create_all_directories which will create empty directories if they don't already exist.

Also changed the get_dir_contents method to return all contents of directory. Individual entries can be tested for type by adding after line 62:
        for file in files:
            if file.is_dir():
               print(f'{file.name} is a directory')
            elif file.is_file():
               print(f'{file.name} is a regular file')
            elif file.is_symlink():
               print(f'{file.name} is a symbolic link')
               # and so on, see pathlib (3.7 is most complete if running that python version) )docs for all possibilities
Update Jul25: 07:36 EST
added qualifier:
 and not value.is_file()
to line 51, needed to prevent crash if value is path + file, url, etc.

-----------------------------------------------------------------------
Another method added Nov 29, 2020 (more polished)

I have one module for each project that lays out all of the directories, URL's and common file locations in a relative structure, using pathlib.
Here's a sample for a geocoding project:

the module is named GeoPaths.py and is imported by just about every other module in the project.
A neat feature of using something like this, is that you can run it on it's own in a copy of the project to
immediately set up your directory structure (it will create missing directories, but will leave already existing
directories alone):

GeoPaths.py
import os
from pathlib import Path
 
 
class GeoPaths:
    def __init__(self, depth=0):
        dir_depth = abs(depth)
        os.chdir(os.path.abspath(os.path.dirname(__file__)))
 
        self.homepath = Path('.')
 
        while dir_depth:
            self.homepath = self.homepath / '..'
            dir_depth -= 1
 
        rootpath = self.homepath / '..'
 
        self.docspath = rootpath / 'docs'
        self.docspath.mkdir(exist_ok=True)
 
        self.testspath = rootpath / 'tests'
        self.testspath.mkdir(exist_ok=True)
 
        self.datapath = rootpath / 'data'
        self.datapath.mkdir(exist_ok=True)
 
        self.csvpath = self.datapath / 'csv'
        self.csvpath.mkdir(exist_ok=True)
 
        self.htmlpath = self.datapath / 'html'
        self.htmlpath.mkdir(exist_ok=True)
 
        self.jsonpath = self.datapath / 'json'
        self.jsonpath.mkdir(exist_ok=True)
         
        self.MasterAddressPath = self.datapath / 'MasterAddressDatabase'
        self.MasterAddressPath.mkdir(exist_ok=True)
 
        self.prettypath = self.datapath / 'pretty'
        self.prettypath.mkdir(exist_ok=True)
 
        self.tmppath = self.datapath / 'tmp'
        self.tmppath.mkdir(exist_ok=True)
 
        # Osm data is arranged by state and file type.
        # A rather longdirectory tree, but laid out here for ease of use in software
        self.osmpath = self.datapath / 'osm'
        self.osmpath.mkdir(exist_ok=True)
 
        self.geofabrik_datapath = self.osmpath / 'GeofabrikAndCensus'
        self.geofabrik_datapath.mkdir(exist_ok=True)
 
        # URL's
        self.TigerLineGeoDatabase: 'https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-geodatabase-file.html'
        self.qgis_plugins = 'https://plugins.qgis.org/plugins/?page=1&&'
        self.osmfilelink = 'https://ftp.osuosl.org/pub/openstreetmap/planet/'
        self.geofabrikserver = 'https://download.geofabrik.de/north-america.html'
 
        # Common files:
        self.geofabrikjson = self.jsonpath / 'GeofabrikLinks.json'
 
 
if __name__ == '__main__':
    GeoPaths()
Before running the script, my directory structure for a new project looks like this:
Output:
├── src │ └── GeoPaths.py └── venv ...
After running GeoPaths.py directory structure looks like this:
$ . ./venv/bin/activate
(venv)$ python src/GeoPaths.py
Output:
. ├── data │ ├── csv │ ├── html │ ├── json │ ├── MasterAddressDatabase │ ├── osm │ │ └── GeofabrikAndCensus │ ├── pretty │ └── tmp ├── docs ├── src │ └── GeoPaths.py ├── tests └── venv ...
Now, assume you have a module named MyModule.py in the src diretory, and you want to open a json file named sillyfile.json.
here's the code that would do that:

MyModule.py
from GeoPaths import GeoPaths
import json
 
 
class MySillyClass:
    def __init__(self):
        self.gpaths = GeoPaths()
        self.jsonfile = self.gpaths.jsonpath / 'sillyfile.json'
 
    def create_dict(self):
        sillydict = {
            'Cowboys': '21',
            'GreenBayPackers': '7'
        }
 
        with self.jsonfile.open('w') as fp:
            json.dump(sillydict, fp)
 
    def read_it_back(self):
        with self.jsonfile.open() as fp:
            read_sillydict = json.load(fp)
        for key, value in read_sillydict.items():
            print(f"{key}: {value}")    
 
def main():
    mcc = MySillyClass()
    mcc.create_dict()
    mcc.read_it_back()
 
 
if __name__ == '__main__':
    main()
Results of running this script:
Output:
Cowboys: 21 GreenBayPackers: 7
The depth attribute in GeoPaths.py can be used when code in in a subdirectory of src.
Increment by one for each sublevel, and paths will automatically be adjusted for all source code in that subdirectory.
Reply


Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020