Python Forum
Practical use of pathlib
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Practical use of pathlib
#1
The app below uses pathlib objects to read the header record from national fips code files
and print each. Eventually it will process each into an easy access format, but for proof of
concept, I am only printing out the first line of each.

module ReadFipsCodeData.py:
from pathlib import Path, PurePath
import json


class ReadFipsCodeData:
    def __init__(self):
        self.fips_dirs = [ ]

        self.homepath = Path('.')

        self.datapath = self.homepath / 'data'
        self.code_descr_filepath = self.datapath / 'code_descr.json'
        self.fips_class_filepath = self.datapath / 'fips_class.json'

        self.code_datapath = self.homepath / '..' / 'data' / 'Codes' / 'ANSI-FIPS-Codes' / 'AllFiles'

        with self.code_descr_filepath.open('r') as jn:
            self.code_categories = json.load(jn)

        with self.fips_class_filepath.open('r') as jn:
            self.fips_class = json.load(jn)

    def build_national_code_db(self):

        for filename, value in self.code_categories.items():
            f = value['filename']
            filepath = self.code_datapath / f

            delimiter = value['delim']

            with filepath.open('r', encoding='ISO-8859-1') as fin:
                file_data = fin.readlines()
                # Just read first line for now (should be heading in each case)
                # This can be matched to self.code_categories (value here) dictionary to get
                # description of file, and info on each field
                header = True
                for line in file_data:
                    line = line.strip()
                    if len(line) == 0:
                        continue
                    line = line.split(delimiter)
                    if header:
                        print(f'\nfilename: {f}')
                        print(f'Header: {line}')
                        header = False


def main():
    cj = ReadFipsCodeData()
    cj.build_national_code_db()

if __name__ == '__main__':
    main()
This produces the following output:
Output:
filename: national_aia.txt Header: ['AIANNHCE', 'AIANNHNAME'] filename: national_cd113.txt Header: ['STATE   STATEFP CD113FP NAMELSAD'] filename: national_county.txt Header: ['AL', '01', '001', 'Autauga County', 'H1'] filename: national_cousub.txt Header: ['STATE', 'STATEFP', 'COUNTYFP', 'COUNTYNAME', 'COUSUBFP', 'COUSUBNAME', 'FUNCSTAT'] filename: national_places.txt Header: ['STATE', 'STATEFP', 'PLACEFP', 'PLACENAME', 'TYPE', 'FUNCSTAT', 'COUNTY'] filename: national_schdist.txt Header: ['STATE', 'STATEFP', 'LEA', 'SDNAME', 'TYPE'] filename: national_vtd.txt Header: ['STATE', 'STATEFP', 'COUNTYFP', 'COUNTYNAME', 'VTDST', 'VTDNAME']
The two dictionaries that are used to get file information are created with the following application:
CreateFIPScodeDescJson.py:
from pathlib import Path, PurePath
import json


class CreateFIPScodeDescJson:
    def __init__(self):
        self.homepath = Path('.')
        self.datapath = self.homepath / 'data'
        self.code_descr_filepath = self.datapath / 'code_descr.json'
        self.fips_class_filepath = self.datapath / 'fips_class.json'

        self.fips_class = {
            'H1': 'identifies an active county or statistically equivalent entity that does not qualify'
                  ' under subclass C7 or H6.',
            'H4': 'identifies a legally defined inactive or nonfunctioning county or statistically '
                  'equivalent entity that does not qualify under subclass H6.',
            'H5': 'identifies census areas in Alaska, a statistical county equivalent entity.',
            'H6': 'identifies a county or statistically equivalent entity that is areally coextensive '
                  'or governmentally consolidated with an incorporated place, part of an incorporated '
                  'place, or a consolidated city.',
            'C7': 'identifies an incorporated place that is an independent city; that is, it also '
                  'serves as a county equivalent because it is not part of any county, and a minor '
                  'civil division (MCD) equivalent because it is not part of any MCD.'
        }

        self.code_categories = {
            'national_aia.txt': {
                'delim': ',',
                'filename': 'national_aia.txt',
                'fields': {
                    'AIANNHCE': {
                        'length': '4',
                        'type': 'String',
                        'descr': 'Current American Indian/Alaska Native/Native Hawaiian area census code'
                    },
                    'AIANNHNAME': {
                        'length': '',
                        'type': 'String',
                        'descr': 'American Indian Area name and legal/statistical area description'
                    }
                },
            },
            'national_cd113.txt': {
                'delim': 'spaces',
                'filename': 'national_cd113.txt',
                'fields': {
                    'STATE': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'State Postal Code'
                    },
                    'STATEFP': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'State FIPS CODE'
                    },
                    'CD113FP': {
                        'length': '4',
                        'type': 'String',
                        'descr': '113th Congressional District FIPS Code'
                    },
                    'NAMELSAD': {
                        'length': '41',
                        'type': 'String',
                        'descr': 'Current name and the translated legal/statistical area description'
                                 ' for the congressional district'
                    }
                },
            },
            'national_county.txt': {
                'delim': ',',
                'filename': 'national_county.txt',
                'fields': {
                    'STATE': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'State Postal Code'
                    },
                    'STATEFP': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'State FIPS Code'
                    },
                    'COUNTYFP': {
                        'length': '3',
                        'type': 'String',
                        'descr': 'County FIPS Code'
                    },
                    'COUNTYNAME': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'County Name and Legal/Statistical Area Description'
                    },
                    'CLASSFP': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'FIPS Class Code'
                    }
                },
            },
            'national_cousub.txt': {
                'delim': ',',
                'filename': 'national_cousub.txt',
                'fields': {
                    'STATE': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'State Postal Code'
                    },
                    'STATEFP': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'Current state FIPS code'
                    },
                    'COUNTYFP': {
                        'length': '3',
                        'type': 'String',
                        'descr': 'Current county FIPS code'
                    },
                    'NAME': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'Current county subdivision name'
                    },
                    'COUSUBFP': {
                        'length': '5',
                        'type': 'String',
                        'descr': 'Current county subdivision FIPS code'
                    },
                    'NAMELSAD': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'Current name and the translated legal/statistical area description code '
                                 'for county subdivision'
                    },
                    'FUNCSTAT': {
                        'length': '1',
                        'type': 'String',
                        'descr': 'Current functional status'
                    },
                }
            },
            'national_places.txt': {
                'delim': '|',
                'filename': 'national_places.txt',
                'fields': {
                    'STATE': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'State Postal Code'
                    },
                    'STATEFP': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'Current state FIPS code'
                    },
                    'PLACEFP': {
                        'length': '5',
                        'type': 'String',
                        'descr': 'Current place FIPS code'
                    },
                    'PLACENAME': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'Current place name'
                    },
                    'TYPE': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'Place Name Type'
                    },
                    'FUNCSTAT': {
                        'length': '1',
                        'type': 'String',
                        'descr': 'Current functional status'
                    },
                    'COUNTY': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'Current County Name'
                    }
                }
            },
            'national_schdist.txt': {
                'delim': ',',
                'filename': 'national_schdist.txt',
                'fields': {
                    'STATE': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'State Postal Code'
                    },
                    'STATEFP': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'Current state FIPS code'
                    },
                    'LEA': {
                        'length': '5',
                        'type': 'String',
                        'descr': 'Federal Local Education Agency ID'
                    },
                    'SDNAME': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'Current unified school district name'
                    },
                    'TYPE': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'Current school district type'
                    }
                },
            },
            'national_vtd.txt': {
                'delim': '|',
                'filename': 'national_vtd.txt',
                'fields': {
                    'STATE': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'State Postal Code'
                    },
                    'STATEFP': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'Current state FIPS code'
                    },
                    'COUNTYFP': {
                        'length': '3',
                        'type': 'String',
                        'descr': 'County FIPS Code'
                    },
                    'COUNTYNAME': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'County Name and Legal/Statistical Area Description'
                    },
                    'VTDST':{
                        'length': '',
                        'type': '',
                        'descr': 'Voter District'
                    },
                    'VTDNAME':{
                        'length': '',
                        'type': '',
                        'descr': 'Voter District Name'
                    }
                },
            }
        }

        # save code class dict
        with self.fips_class_filepath.open('w') as jout:
            json.dump(self.fips_class, jout)

        # Save code description dict
        with self.code_descr_filepath.open('w') as jout:
            json.dump(self.code_categories, jout)

if __name__ == '__main__':
    CreateFIPScodeDescJson()
This type of file definition allows for file content verification
Example, new version of ReadFipsCodeData.py with verification:
from pathlib import Path, PurePath
import json


class ReadFipsCodeData:
    def __init__(self):
        self.fips_dirs = [ ]

        self.homepath = Path('.')

        self.datapath = self.homepath / 'data'
        self.code_descr_filepath = self.datapath / 'code_descr.json'
        self.fips_class_filepath = self.datapath / 'fips_class.json'

        self.code_datapath = self.homepath / '..' / 'data' / 'Codes' / 'ANSI-FIPS-Codes' / 'AllFiles'

        with self.code_descr_filepath.open('r') as jn:
            self.code_categories = json.load(jn)

        with self.fips_class_filepath.open('r') as jn:
            self.fips_class = json.load(jn)

    def verify_file_content(self, header, filename, file_format):
        try:
            print(f'\n{filename}')
            print(f'Header: {header}')
            for fieldname in header:
                field = file_format['fields'][fieldname]
                print(f'\n    fieldname: {fieldname}')
                print(f"    length: {field['length']}, type: {field['type']}")
                print(f"    {field['descr']}")
            return True
        except:
            raise FileNotFoundError

    def build_national_code_db(self):
        try:
            for filename, value in self.code_categories.items():
                f = value['filename']
                filepath = self.code_datapath / f

                delimiter = value['delim']

                with filepath.open('r', encoding='ISO-8859-1') as fin:
                    file_data = fin.readlines()
                    # Just read first line for now (should be heading in each case)
                    # This can be matched to self.code_categories (value here) dictionary to get
                    # description of file, and info on each field
                    header_found = False
                    for line in file_data:
                        line = line.strip()
                        if len(line) == 0:
                            continue
                        if delimiter == '':
                            line = line.split()
                        else:
                            line = line.split(delimiter)
                        if header_found:
                            # Process remainder of file
                            pass
                        else:
                            if self.verify_file_content(line, filename, value):
                                header_found = True
                                break
        except FileNotFoundError:
            print(f'The directory {self.code_datapath.resolve()} does not contain FIPS data files.')
            print(f'Please supply proper directory name')


def main():
    cj = ReadFipsCodeData()
    cj.build_national_code_db()

if __name__ == '__main__':
    main()
which produces:
Output:
national_aia.txt Header: ['AIANNHCE', 'AIANNHNAME']     fieldname: AIANNHCE     length: 4, type: String     Current American Indian/Alaska Native/Native Hawaiian area census code     fieldname: AIANNHNAME     length: , type: String     American Indian Area name and legal/statistical area description national_cd113.txt Header: ['STATE', 'STATEFP', 'CD113FP', 'NAMELSAD']     fieldname: STATE     length: 2, type: String     State Postal Code     fieldname: STATEFP     length: 2, type: String     State FIPS CODE     fieldname: CD113FP     length: 4, type: String     113th Congressional District FIPS Code     fieldname: NAMELSAD     length: 41, type: String     Current name and the translated legal/statistical area description for the congressional district national_county.txt Header: ['STATE', 'STATEFP', 'COUNTYFP', 'COUNTYNAME', 'CLASSFP']     fieldname: STATE     length: 2, type: String     State Postal Code     fieldname: STATEFP     length: 2, type: String     State FIPS Code     fieldname: COUNTYFP     length: 3, type: String     County FIPS Code     fieldname: COUNTYNAME     length: 100, type: String     County Name and Legal/Statistical Area Description     fieldname: CLASSFP     length: 2, type: String     FIPS Class Code national_cousub.txt Header: ['STATE', 'STATEFP', 'COUNTYFP', 'COUNTYNAME', 'COUSUBFP', 'COUSUBNAME', 'FUNCSTAT']     fieldname: STATE     length: 2, type: String     State Postal Code     fieldname: STATEFP     length: 2, type: String     Current state FIPS code     fieldname: COUNTYFP     length: 3, type: String     Current county FIPS code     fieldname: COUNTYNAME     length: 100, type: String     Current county subdivision name     fieldname: COUSUBFP     length: 5, type: String     Current county subdivision FIPS code     fieldname: COUSUBNAME     length: 100, type: String     Current name and the translated legal/statistical area description code for county subdivision     fieldname: FUNCSTAT     length: 1, type: String     Current functional status national_places.txt Header: ['STATE', 'STATEFP', 'PLACEFP', 'PLACENAME', 'TYPE', 'FUNCSTAT', 'COUNTY']     fieldname: STATE     length: 2, type: String     State Postal Code     fieldname: STATEFP     length: 2, type: String     Current state FIPS code     fieldname: PLACEFP     length: 5, type: String     Current place FIPS code     fieldname: PLACENAME     length: 100, type: String     Current place name     fieldname: TYPE     length: 100, type: String     Place Name Type     fieldname: FUNCSTAT     length: 1, type: String     Current functional status     fieldname: COUNTY     length: 100, type: String     Current County Name national_schdist.txt Header: ['STATE', 'STATEFP', 'LEA', 'SDNAME', 'TYPE']     fieldname: STATE     length: 2, type: String     State Postal Code     fieldname: STATEFP     length: 2, type: String     Current state FIPS code     fieldname: LEA     length: 5, type: String     Federal Local Education Agency ID     fieldname: SDNAME     length: 100, type: String     Current unified school district name     fieldname: TYPE     length: 100, type: String     Current school district type national_vtd.txt Header: ['STATE', 'STATEFP', 'COUNTYFP', 'COUNTYNAME', 'VTDST', 'VTDNAME']     fieldname: STATE     length: 2, type: String     State Postal Code     fieldname: STATEFP     length: 2, type: String     Current state FIPS code     fieldname: COUNTYFP     length: 3, type: String     County FIPS Code     fieldname: COUNTYNAME     length: 100, type: String     County Name and Legal/Statistical Area Description     fieldname: VTDST     length: , type:     Voter District     fieldname: VTDNAME     length: , type:     Voter District Name
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  How to store pathlib paths in json file Larz60+ 2 15,309 Aug-30-2018, 06:47 PM
Last Post: Larz60+
  How to get directory information with pathlib Larz60+ 0 3,579 Oct-21-2017, 12:18 PM
Last Post: Larz60+

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020