Oct-31-2023, 08:01 AM
(This post was last modified: Oct-31-2023, 08:01 AM by Gribouillis.)
Here is an alternate way you could parse this data
import re data = """ map -> map_name: string -> name = John int -> age = 30 string -> city = New York int -> code = 16755251 map -> map_name1: string -> name1 = John int -> age1 = 30 string -> city1 = New York int -> code1 = 16755251 float -> floater1 = 3.33 --- --- """ from collections import namedtuple Element = namedtuple('Element', 'type number data line') map_pattern = re.compile(r'^\s*map\s*->\s*(\w+)\s*[:]\s*$') key_value_pattern = re.compile(r"^\s*(\w+)\s*->\s*(\w+)\s*=(.*)$") end_pattern = re.compile('^\s*---\s*$') empty_pattern = re.compile('^\s*$') def flat_parse(data): for i, line in enumerate(data.splitlines(), 1): if match := key_value_pattern.match(line): yield Element('KEY', i, (match.group(1), match.group(2), match.group(3)), line) elif match := map_pattern.match(line): yield Element('MAP', i, match.group(1), line) elif match := end_pattern.match(line): yield Element('END', i, None, line) elif match := empty_pattern.match(line): yield Element('EMPTY', i, None, line) else: yield Element('ERROR', i, None, line) def convert(tp, value): value = value.strip() match tp: case 'int': return int(value) case 'float': return float(value) case 'string': return str(value) def parse(data): current = {} stack = [] for elt in flat_parse(data): match elt.type: case 'KEY': tp, name, value = elt.data current[name] = convert(tp, value) case 'MAP': stack.append(current) current[elt.data] = current = {} case 'END': if not stack: raise RuntimeError('Too many ends of map', elt) current = stack.pop() case 'EMPTY': pass case 'ERROR': raise RuntimeError('Parsing error', elt) if stack: raise RuntimeError('End of map missing at end of data') return current print(parse(data))
Output:λ python paillasse/pf/parsemap.py
{'map_name': {'name': 'John', 'age': 30, 'city': 'New York', 'code': 16755251, 'map_name1': {'name1': 'John', 'age1': 30, 'city1': 'New York', 'code1': 16755251, 'floater1': 3.33}}}