You can parse the lines with this code now
import re
from collections import namedtuple
Element = namedtuple('Element', 'type number data line')
arrow_id_pattern = re.compile(r'^\s*(\w+)\s*->\s*(\w+)\s*([:=])(.*)$')
arrow_string_pattern = re.compile(r'^\s*(\w+)\s*->(.*$)')
end_pattern = re.compile('^\s*---\s*$')
empty_pattern = re.compile('^\s*$')
def parse_line(number, line):
if match := arrow_id_pattern.match(line):
tp, word, op, rest = match.groups()
rest = rest.strip()
if op == ':':
if rest or (tp not in ('map', 'list')):
return Element('ERROR', number, None, line)
elif tp == 'list':
return Element('LIST', number, word, line)
else:
return Element('MAP', number, word, line)
else:
return Element('DICT_ITEM', number, (tp, word, rest), line)
elif match := arrow_string_pattern.match(line):
tp, rest = match.groups()
rest = rest.strip()
return Element('LIST_ITEM', number, (tp, rest), line)
elif end_pattern.match(line):
return Element('END', number, None, line)
elif empty_pattern.match(line):
return Element('EMPTY', number, None, line)
else:
return Element('ERROR', number, None, line)
def flat_parse(data):
for i, line in enumerate(data.splitlines(), 1):
yield parse_line(i, line)
data = """
map -> map_name:
string -> name = John
int -> age = 30
string -> city = New York
int -> code = 16755251
map -> map_name1:
string -> name1 = John
int -> age1 = 30
string -> city1 = New York
int -> code1 = 16755251
float -> floater1 = 3.33
---
int -> code1 = 16755251
---
int -> code2 = 16755251
list -> list_name:
string -> shit
---
"""
for elt in flat_parse(data):
print(elt)
Output:
Element(type='EMPTY', number=1, data=None, line='')
Element(type='MAP', number=2, data='map_name', line=' map -> map_name:')
Element(type='DICT_ITEM', number=3, data=('string', 'name', 'John'), line=' string -> name = John')
Element(type='DICT_ITEM', number=4, data=('int', 'age', '30'), line=' int -> age = 30')
Element(type='DICT_ITEM', number=5, data=('string', 'city', 'New York'), line=' string -> city = New York')
Element(type='DICT_ITEM', number=6, data=('int', 'code', '16755251'), line=' int -> code = 16755251')
Element(type='MAP', number=7, data='map_name1', line=' map -> map_name1:')
Element(type='DICT_ITEM', number=8, data=('string', 'name1', 'John'), line=' string -> name1 = John')
Element(type='DICT_ITEM', number=9, data=('int', 'age1', '30'), line=' int -> age1 = 30')
Element(type='DICT_ITEM', number=10, data=('string', 'city1', 'New York'), line=' string -> city1 = New York')
Element(type='DICT_ITEM', number=11, data=('int', 'code1', '16755251'), line=' int -> code1 = 16755251')
Element(type='DICT_ITEM', number=12, data=('float', 'floater1', '3.33'), line=' float -> floater1 = 3.33')
Element(type='END', number=13, data=None, line=' ---')
Element(type='DICT_ITEM', number=14, data=('int', 'code1', '16755251'), line=' int -> code1 = 16755251')
Element(type='END', number=15, data=None, line=' ---')
Element(type='DICT_ITEM', number=16, data=('int', 'code2', '16755251'), line=' int -> code2 = 16755251')
Element(type='LIST', number=17, data='list_name', line=' list -> list_name:')
Element(type='LIST_ITEM', number=18, data=('string', 'shit'), line=' string -> shit')
Element(type='END', number=19, data=None, line=' ---')