Python Forum
Creating tables based on a dictionary of dictionaries
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Creating tables based on a dictionary of dictionaries
#1
I have a rather large, complicated dictionary structure that I need to duplicate in a Snowflake SQL database. I know that I can iterate through each dictionary and INSERT the data one row at a time, but that will be a lot of work and won't be very efficient.

I would like to know if there is a way that I could use my dictionary structure to create tables and relate them to each other.

Sample data:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
{"provinces": {"province":"Ontario",
               "cities": [
                     {"city":"Toronto",
                      "population": 2930000,
                      "districts": [
                           {"district": "East York"},
                           {"district": "North York"},
                           {"district": "Scarborough"},
                                   ],
                     },
                     {"city":"Ottawa",
                      "population": 994837,
                      "districts": [
                           {"district": "Centretown"},
                           {"district": "Golden Triangle"},
                           {"district": "Sandy Hill"},
                                   ],
                     },
                                               ],
               "lakes": [
                    {"lake": "Lake Erie"},
                    {"lake": "Lake Simcoe"},
                        ],
             },
             {"province": "Alberta",
                  "cities": [
                        {"city":"Edmonton",
                         "population":981280,
                         "districts": [
                              {"district": "Ellerslie"},
                              {"district": "Whitemud"},
                                                           ],
                        },
                  "lakes": [
                       {"lake": "Lake Louise"},
                       {"lake": "Moraine Lake"},
                       {"lake": "Bow Lake"},
                           ],
             },
             {"province": "Saskatchewan",
                  "cities": [
                        {"city": "Saskatoon",
                         "population": 273010,
                         "districts": [
                              {"district": "Holmwood"},
                              {"district": "Lakewood"},
                                      ],
                        },
                            ],
             }
}    
The desired SQL tables that I would want to generate from that data:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
CREATE TABLE provinces (prov_pk INT AUTOINCREMENT,
                        province STRING,
                        note STRING,
                        PRIMARY KEY (prov_pk)
                       )
 
CREATE TABLE cities (city_pk INT AUTOINCREMENT,
                     prov_key INT NOT NULL,
                     city STRING NOT NULL,
                     capital STRING NOT NULL,
                     population INT NOT NULL,
                     PRIMARY KEY (city_pk),
                     FOREIGN KEY (prov_key) REFERENCES provinces(prov_pk)
                    )
 
CREATE TABLE districts (district_key INT AUTOINCREMENT,
                        city_key INT,
                        district STRING,
                        PRIMARY KEY (district_key),
                        FOREIGN KEY (city_key) REFERENCES cities(city_pk)
                       )
                        
CREATE TABLE lakes (lake_key INT AUTOINCREMENT,
                    prov_key INT NOT NULL,
                    lake STRING NOT NULL,
                    PRIMARY KEY (lake_key),
                    FOREIGN KEY (prov_key) REFERENCES provinces(prov_pk)
Is there a module or something else that could handle this?
Reply
#2
Please post syntactically correct code. I had to fix the dictionary!

You could start by flattening the structure, here is code that does that
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
data = {
    "provinces": [
        {
            "province": "Ontario",
            "cities": [
                {
                    "city": "Toronto",
                    "population": 2930000,
                    "districts": [
                        {"district": "East York"},
                        {"district": "North York"},
                        {"district": "Scarborough"},
                    ],
                },
                {
                    "city": "Ottawa",
                    "population": 994837,
                    "districts": [
                        {"district": "Centretown"},
                        {"district": "Golden Triangle"},
                        {"district": "Sandy Hill"},
                    ],
                },
            ],
            "lakes": [
                {"lake": "Lake Erie"},
                {"lake": "Lake Simcoe"},
            ],
        },
        {
            "province": "Alberta",
            "cities": [
                {
                    "city": "Edmonton",
                    "population": 981280,
                    "districts": [
                        {"district": "Ellerslie"},
                        {"district": "Whitemud"},
                    ],
                }
            ],
            "lakes": [
                {"lake": "Lake Louise"},
                {"lake": "Moraine Lake"},
                {"lake": "Bow Lake"},
            ],
        },
        {
            "province": "Saskatchewan",
            "cities": [
                {
                    "city": "Saskatoon",
                    "population": 273010,
                    "districts": [
                        {"district": "Holmwood"},
                        {"district": "Lakewood"},
                    ],
                },
            ],
        },
    ]
}
 
 
class Parser:
    def __init__(self):
        self.rows = []
 
    def handle_dict(self, dic, parent_num):
        num = len(self.rows)
        self.rows.append(obj := {})
        for key, value in dic.items():
            if isinstance(value, list):
                for item in value:
                    self.handle_dict(item, num)
            else:
                obj[key] = value
        obj['parent'] = parent_num
 
 
def main():
    p = Parser()
    p.handle_dict(data, -1)
    for i, x in enumerate(p.rows):
        print(i, x)
 
 
if __name__ == "__main__":
    main()
Output:
0 {'parent': -1} 1 {'province': 'Ontario', 'parent': 0} 2 {'city': 'Toronto', 'population': 2930000, 'parent': 1} 3 {'district': 'East York', 'parent': 2} 4 {'district': 'North York', 'parent': 2} 5 {'district': 'Scarborough', 'parent': 2} 6 {'city': 'Ottawa', 'population': 994837, 'parent': 1} 7 {'district': 'Centretown', 'parent': 6} 8 {'district': 'Golden Triangle', 'parent': 6} 9 {'district': 'Sandy Hill', 'parent': 6} 10 {'lake': 'Lake Erie', 'parent': 1} 11 {'lake': 'Lake Simcoe', 'parent': 1} 12 {'province': 'Alberta', 'parent': 0} 13 {'city': 'Edmonton', 'population': 981280, 'parent': 12} 14 {'district': 'Ellerslie', 'parent': 13} 15 {'district': 'Whitemud', 'parent': 13} 16 {'lake': 'Lake Louise', 'parent': 12} 17 {'lake': 'Moraine Lake', 'parent': 12} 18 {'lake': 'Bow Lake', 'parent': 12} 19 {'province': 'Saskatchewan', 'parent': 0} 20 {'city': 'Saskatoon', 'population': 273010, 'parent': 19} 21 {'district': 'Holmwood', 'parent': 20} 22 {'district': 'Lakewood', 'parent': 20}
Calab likes this post
« We can solve any problem by introducing an extra level of indirection »
Reply
#3
I added more code to plan the building of the tables
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
class Parser:
    def __init__(self):
        self.rows = []
 
    def handle_dict(self, dic, parent_num):
        num = len(self.rows)
        self.rows.append(obj := {})
        for key, value in dic.items():
            if isinstance(value, list):
                for item in value:
                    self.handle_dict(item, num)
            else:
                obj[key] = value
        obj['parent'] = parent_num
 
    def plan_tables(self):
        pos = [None]
        self.tables = {}
        for i, obj in enumerate(self.rows[1:], 1):
            name = next(iter(obj))
            if not name in self.tables:
                self.tables[name] = []
            t = self.tables[name]
            j = len(t)
            pos.append((name, j))
            t.append(obj)
            if obj['parent']:
                obj['parent'] = pos[obj['parent']]
            else:
                del obj['parent']
 
 
def main():
    p = Parser()
    p.handle_dict(data, -1)
    for i, x in enumerate(p.rows):
        print(i, x)
    print('==========================')
    p.plan_tables()
    for name, table in p.tables.items():
        print(f'TABLE {name}:')
        for x in table:
            print(x)
 
 
if __name__ == "__main__":
    main()
Output:
========================== TABLE province: {'province': 'Ontario'} {'province': 'Alberta'} {'province': 'Saskatchewan'} TABLE city: {'city': 'Toronto', 'population': 2930000, 'parent': ('province', 0)} {'city': 'Ottawa', 'population': 994837, 'parent': ('province', 0)} {'city': 'Edmonton', 'population': 981280, 'parent': ('province', 1)} {'city': 'Saskatoon', 'population': 273010, 'parent': ('province', 2)} TABLE district: {'district': 'East York', 'parent': ('city', 0)} {'district': 'North York', 'parent': ('city', 0)} {'district': 'Scarborough', 'parent': ('city', 0)} {'district': 'Centretown', 'parent': ('city', 1)} {'district': 'Golden Triangle', 'parent': ('city', 1)} {'district': 'Sandy Hill', 'parent': ('city', 1)} {'district': 'Ellerslie', 'parent': ('city', 2)} {'district': 'Whitemud', 'parent': ('city', 2)} {'district': 'Holmwood', 'parent': ('city', 3)} {'district': 'Lakewood', 'parent': ('city', 3)} TABLE lake: {'lake': 'Lake Erie', 'parent': ('province', 0)} {'lake': 'Lake Simcoe', 'parent': ('province', 0)} {'lake': 'Lake Louise', 'parent': ('province', 1)} {'lake': 'Moraine Lake', 'parent': ('province', 1)} {'lake': 'Bow Lake', 'parent': ('province', 1)}
Calab and Pedroski55 like this post
« We can solve any problem by introducing an extra level of indirection »
Reply
#4
I like Gribouillis' solution, but I have never made any classes, so I don't really understand them.

The first problem was to correct the bad dictionary you posted. That took me a while!

Thereafter, you just have: dictionary[key]: list . No module or extra stuff is needed to deal with the data.

I use MySQL and phpadmin to access the database. I don't know anything about Snowflake SQL. Must be cold Canadian!

For doing things with or to a database from Python, I use pymysql.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import pymysql
 
# this data is ok, finally!
mydict = {"provinces": [
          {"province": "Ontario",
           "cities": [{"city":"Toronto",
                      "population": 2930000,
                      "districts": [{"district": "East York"}, {"district": "North York"}, {"district": "Scarborough"}]},           
                     {"city":"Ottawa",
                      "population": 994837,
                      "districts": [{"district": "Centretown"}, {"district": "Golden Triangle"}, {"district": "Sandy Hill"}]}],
           "lakes": [{"lake": "Lake Erie"}, {"lake": "Lake Simcoe"}]},         
          {"province": "Alberta",
           "cities": [{"city":"Edmonton",
                       "population":981280,
                       "districts": [{"district": "Ellerslie"}, {"district": "Whitemud"}]}],                      
           "lakes": [{"lake": "Lake Louise"}, {"lake": "Moraine Lake"}, {"lake": "Bow Lake"}]},
 
          {"province": "Saskatchewan",
           "cities": [{"city": "Saskatoon",
                       "population": 273010,
                       "districts": [{"district": "Holmwood"}, {"district": "Lakewood"}]}],
           "lakes": [{"lake": "Lake Skat"}, {"lake": "Lake Chewan"}]}]}
 
# how to get all the data you need from mydict
print(type(mydict['provinces']) # <class 'list'>
 
for p in mydict['provinces']:
    print(f"**********{p['province']}**********")
    for c in p['cities']:
        print(f"++++++{c['city']}++++++")
        print(c['population'])
        for d in c['districts']:
               print(f"----{d['district']}----")
    for l in p['lakes']:
        print(f"____{l['lake']}____")
             
# first you need MySQL installed and running with a privileged user
# connect to MySQL and create a new db
connect_db = pymysql.connect(host='localhost',
                       user='pedro',
                       password='topsecret')
 
# make a database called canada
with connect_db.cursor() as cursor:
    cursor.execute('CREATE DATABASE IF NOT EXISTS canada')
    connect_db.close()
 
# for connecting to the db canada, close after using
connect_canada_db = pymysql.connect(host='localhost',
                             user='pedro',
                             #port=''
                             password='topsecret',
                             db='canada',
                             cursorclass=pymysql.cursors.DictCursor)
 
# now make the tables
# first provinces
# only have 3 provinces, number keys: Ontario = 1, Alberta = 2, Saskatchewan = 3 need to pass these when inserting data
# the other provinces have declared independence
with connect_canada_db.cursor() as cursor:
        make_table = """CREATE TABLE IF NOT EXISTS provinces (prov_pk INT AUTO_INCREMENT PRIMARY KEY NOT NULL,
                        province VARCHAR(50),
                        note VARCHAR(50)) DEFAULT CHARACTER SET UTF8MB4 ENGINE=InnoDB;"""                    
        cursor.execute(make_table)
        auto_inc = 'ALTER TABLE provinces AUTO_INCREMENT=1;'
        cursor.execute(auto_inc)
        make_table = """CREATE TABLE IF NOT EXISTS cities (city_pk INT AUTO_INCREMENT PRIMARY KEY NOT NULL,
                        prov_key INT NOT NULL,
                        city VARCHAR(50) NOT NULL DEFAULT 'Gotham',
                        capital VARCHAR(50) NOT NULL DEFAULT 'Gotham',
                        population INT NOT NULL DEFAULT 0,
                        FOREIGN KEY (prov_key) REFERENCES provinces(prov_pk)) DEFAULT CHARACTER SET UTF8MB4 ENGINE=InnoDB;"""                    
        cursor.execute(make_table)
        auto_inc = 'ALTER TABLE cities AUTO_INCREMENT=1;'
        cursor.execute(auto_inc)
        make_table = """CREATE TABLE districts (district_key INT AUTO_INCREMENT PRIMARY KEY NOT NULL,
                        city_key INT NOT NULL,
                        district VARCHAR(50),
                        FOREIGN KEY (city_key) REFERENCES cities(city_pk)) DEFAULT CHARACTER SET UTF8MB4 ENGINE=InnoDB;"""
        cursor.execute(make_table)
        auto_inc = 'ALTER TABLE districts AUTO_INCREMENT=1;'
        cursor.execute(auto_inc)
        make_table = """CREATE TABLE lakes (lake_key INT AUTO_INCREMENT PRIMARY KEY NOT NULL,
                    prov_key INT NOT NULL,
                    lake VARCHAR(50) NOT NULL,
                    FOREIGN KEY (prov_key) REFERENCES provinces(prov_pk)) DEFAULT CHARACTER SET UTF8MB4 ENGINE=InnoDB;"""
        cursor.execute(make_table)
        auto_inc = 'ALTER TABLE lakes AUTO_INCREMENT=1;'
        cursor.execute(auto_inc)
 
# now populate the tables
# first provinces
with connect_canada_db.cursor() as cursor:
    for p in mydict['provinces']:
        print(p['province']) # shows the 3 remaining provinces of once proud Canada
        if p['province'] == 'Ontario':
            remark = 'backwards'           
        elif p['province'] == 'Alberta':
            remark = 'Let your hair hang down'           
        elif p['province'] == 'Saskatchewan':
            remark = 'Freezing'           
        tup = (p['province'], remark)
        insert_sql = "INSERT INTO provinces (province, note) VALUES (%s, %s)"
        cursor.execute(insert_sql, tup)
        connect_canada_db.commit()
         
# second cities
with connect_canada_db.cursor() as cursor:
    for p in mydict['provinces']:
        print(p['province']) # shows the 3 remaining provinces of once proud Canada
        if p['province'] == 'Ontario':
            fk = 1
        elif p['province'] == 'Alberta':
            fk = 2
        elif p['province'] == 'Saskatchewan':
            fk = 3       
        for c in p['cities']:
            print(c['city'])
            tup = (fk, c['city'], c['population'] )
            insert_sql = "INSERT INTO cities (prov_key, city, population) VALUES (%s, %s, %s)"
            cursor.execute(insert_sql, tup)
            connect_canada_db.commit()
 
# third districts
with connect_canada_db.cursor() as cursor:
    for p in mydict['provinces']:
        print(p['province']) # shows the 3 remaining provinces of once proud Canada
        for c in p['cities']:
            print(c['city'])
            if c['city'] == 'Toronto':
                fk = 1
            elif c['city'] == 'Ottawa':
                fk = 2
            elif c['city'] == 'Edmonton':
                fk = 3
            elif c['city'] == 'Saskatoon':
                fk = 4       
            for d in c['districts']:
                print(d['district'])
                tup = (fk, d['district'])
                insert_sql = "INSERT INTO districts (city_key, district) VALUES (%s, %s)"
                cursor.execute(insert_sql, tup)
                connect_canada_db.commit()
 
# fourth lakes
with connect_canada_db.cursor() as cursor:
    for p in mydict['provinces']:
        print(p['province']) # shows the 3 remaining provinces of once proud Canada
        if p['province'] == 'Ontario':
            fk = 1
        elif p['province'] == 'Alberta':
            fk = 2
        elif p['province'] == 'Saskatchewan':
            fk = 3       
        for l in p['lakes']:
            print(l['lake'])
            tup = (fk, l['lake'])
            insert_sql = "INSERT INTO lakes (prov_key, lake) VALUES (%s, %s)"
            cursor.execute(insert_sql, tup)
            connect_canada_db.commit()
 
# close the connection
connect_canada_db.close()
print("All done, everything looks good in phpmyadmin!")
Personally, I would use the names of the provinces as the FOREIGN KEY, otherwise you need to remember which number belongs to which province.

I find, the easiest way to populate a table is import a csv using phpadmin.
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  creating new time series based on giving mean, standard deviation and skewness Staph 1 3,857 Aug-06-2019, 10:41 PM
Last Post: scidam
  Problem with creating DataFrame using Dictionary ift38375 8 5,106 Jul-02-2019, 05:39 PM
Last Post: ThomasL
  Adding Tables and Extracting Values from Tables jamescox11480 5 4,774 Sep-29-2018, 04:49 PM
Last Post: jamescox11480

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020