Posts: 10
Threads: 4
Joined: Oct 2023
def parse_map(self, data, map_name):
lines = data.strip().split('\n')
result = {}
map_stack = []
for line in lines:
line = line.strip()
if line == '---':
if map_stack:
current_map = map_stack.pop()
if map_stack:
map_stack[-1][map_name] = current_map
else:
result.update(current_map)
else:
break # Stop parsing when '---' is encountered at the top level
map_pattern = rf"(\w+) -> ({map_name})[ ]*:"
map_match = re.match(map_pattern, line)
if map_match:
current_map = {}
map_stack.append(current_map)
elif current_map is not None:
key_value_pattern = rf"(\w+) -> (\w+) = (\w+(?:[\s.\,]+\w+)*)"
key_value_match = re.match(key_value_pattern, line)
if key_value_match:
datatype, key, value = key_value_match.groups()
current_map[key] = self.parse_key(line, key)
return result
This is my parse function.
This is data to parse
data = """
map -> map_name:
string -> name = John
int -> age = 30
string -> city = New York
int -> code = 16755251
map -> map_name1:
string -> name1 = John
int -> age1 = 30
string -> city1 = New York
int -> code1 = 16755251
float -> floater1 = 3.33
---
--- this is the output
{'name': 'John', 'age': 30, 'city': 'New York', 'code': 16755251, 'name1': 'John', 'age1': 30, 'city1': 'New York', 'code1': 16755251, 'floater1': 3.33} As you can see, instead of having nested map parsed as nested map with its own keys within it, it joins them together in main map.
How can I achieve this desired output?
{'name': 'John', 'age': 30, 'city': 'New York', 'code': 16755251, 'map_name1': {'name1': 'John', 'age1': 30, 'city1': 'New York', 'code1': 16755251, 'floater1': 3.33}}
Posts: 10
Threads: 4
Joined: Oct 2023
here is parse_key if needed
def parse_key(self, data, key):
lines = data.strip().split('\n')
result = None
for line in lines:
line = line.strip()
pattern = rf"(\w+) -> (\w+) = (\w+(?:[\s.\,]+\w+)*)"
match = re.match(pattern, line)
if match:
datatype, name, value = match.groups()
if name == key:
if datatype == 'int':
result = int(value)
elif datatype == 'string':
result = str(value)
elif datatype == 'float':
result = float(value)
elif datatype == 'bool':
result = bool(value)
elif datatype == 'hex':
result = hex(value)
elif datatype == 'bin':
result = bin(value)
return result
Posts: 4,787
Threads: 76
Joined: Jan 2018
Oct-31-2023, 06:53 AM
(This post was last modified: Oct-31-2023, 06:53 AM by Gribouillis.)
Not sure what you want to do exactly but the line map -> map_name1: does not match the map pattern at line 18, because this pattern contains the map_name that is passed to the function. It means that the lines in the inner map are read as if they were lines from the outer map.
Posts: 4,787
Threads: 76
Joined: Jan 2018
Oct-31-2023, 08:01 AM
(This post was last modified: Oct-31-2023, 08:01 AM by Gribouillis.)
Here is an alternate way you could parse this data
import re
data = """
map -> map_name:
string -> name = John
int -> age = 30
string -> city = New York
int -> code = 16755251
map -> map_name1:
string -> name1 = John
int -> age1 = 30
string -> city1 = New York
int -> code1 = 16755251
float -> floater1 = 3.33
---
---
"""
from collections import namedtuple
Element = namedtuple('Element', 'type number data line')
map_pattern = re.compile(r'^\s*map\s*->\s*(\w+)\s*[:]\s*$')
key_value_pattern = re.compile(r"^\s*(\w+)\s*->\s*(\w+)\s*=(.*)$")
end_pattern = re.compile('^\s*---\s*$')
empty_pattern = re.compile('^\s*$')
def flat_parse(data):
for i, line in enumerate(data.splitlines(), 1):
if match := key_value_pattern.match(line):
yield Element('KEY', i, (match.group(1), match.group(2), match.group(3)), line)
elif match := map_pattern.match(line):
yield Element('MAP', i, match.group(1), line)
elif match := end_pattern.match(line):
yield Element('END', i, None, line)
elif match := empty_pattern.match(line):
yield Element('EMPTY', i, None, line)
else:
yield Element('ERROR', i, None, line)
def convert(tp, value):
value = value.strip()
match tp:
case 'int':
return int(value)
case 'float':
return float(value)
case 'string':
return str(value)
def parse(data):
current = {}
stack = []
for elt in flat_parse(data):
match elt.type:
case 'KEY':
tp, name, value = elt.data
current[name] = convert(tp, value)
case 'MAP':
stack.append(current)
current[elt.data] = current = {}
case 'END':
if not stack:
raise RuntimeError('Too many ends of map', elt)
current = stack.pop()
case 'EMPTY':
pass
case 'ERROR':
raise RuntimeError('Parsing error', elt)
if stack:
raise RuntimeError('End of map missing at end of data')
return current
print(parse(data)) Output: λ python paillasse/pf/parsemap.py
{'map_name': {'name': 'John', 'age': 30, 'city': 'New York', 'code': 16755251, 'map_name1': {'name1': 'John', 'age1': 30, 'city1': 'New York', 'code1': 16755251, 'floater1': 3.33}}}
Posts: 10
Threads: 4
Joined: Oct 2023
(Oct-31-2023, 06:53 AM)Gribouillis Wrote: Not sure what you want to do exactly but the line map -> map_name1: does not match the map pattern at line 18, because this pattern contains the map_name that is passed to the function. It means that the lines in the inner map are read as if they were lines from the outer map.
Thanks for the code provided. But how could i remove the main map name as a key.?. Only nested maps should be treated as keys.
Posts: 4,787
Threads: 76
Joined: Jan 2018
(Oct-31-2023, 10:02 AM)Unkovic Wrote: But how could i remove the main map name as a key.?. Only nested maps should be treated as keys. If you want only the main map, you can just do
main_map = parse(data)['map_name']
Posts: 10
Threads: 4
Joined: Oct 2023
Oct-31-2023, 03:28 PM
(This post was last modified: Oct-31-2023, 04:00 PM by Unkovic.)
(Oct-31-2023, 10:14 AM)Gribouillis Wrote: (Oct-31-2023, 10:02 AM)Unkovic Wrote: But how could i remove the main map name as a key.?. Only nested maps should be treated as keys. If you want only the main map, you can just do
main_map = parse(data)['map_name']
No, I think we misunderstood. Main map name shouldn't be treated as key, hence this output
EDIT: I just realized your way was smarter, don't need to avoid map name. I'll stick to this.
Posts: 10
Threads: 4
Joined: Oct 2023
The only thing I now need a help with is (nested) lists.
How could we parse this?
data = """
map -> map_name:
string -> name = John
int -> age = 30
string -> city = New York
int -> code = 16755251
map -> map_name1:
string -> name1 = John
int -> age1 = 30
string -> city1 = New York
int -> code1 = 16755251
float -> floater1 = 3.33
---
int -> code1 = 16755251
---
int -> code2 = 16755251
list -> list_name:
string -> shit
---
""" As you can see I added list to the data
list_pattern = re.compile(r'^\s*list\s*->\s*(\w+)\s*[:]\s*$')
I even have made the regex. But since lists only contains values, I'm unsure how to implement so it doesn't possibly affect with keys regex
key_value_pattern = re.compile(r"^\s*(\w+)\s*->\s*(\w+)\s*=(.*)$")
value_pattern_list = re.compile(r"^\s*(\w+)\s*->(.*)$")
Posts: 4,787
Threads: 76
Joined: Jan 2018
Oct-31-2023, 05:50 PM
(This post was last modified: Oct-31-2023, 05:50 PM by Gribouillis.)
You can parse the lines with this code now
import re
from collections import namedtuple
Element = namedtuple('Element', 'type number data line')
arrow_id_pattern = re.compile(r'^\s*(\w+)\s*->\s*(\w+)\s*([:=])(.*)$')
arrow_string_pattern = re.compile(r'^\s*(\w+)\s*->(.*$)')
end_pattern = re.compile('^\s*---\s*$')
empty_pattern = re.compile('^\s*$')
def parse_line(number, line):
if match := arrow_id_pattern.match(line):
tp, word, op, rest = match.groups()
rest = rest.strip()
if op == ':':
if rest or (tp not in ('map', 'list')):
return Element('ERROR', number, None, line)
elif tp == 'list':
return Element('LIST', number, word, line)
else:
return Element('MAP', number, word, line)
else:
return Element('DICT_ITEM', number, (tp, word, rest), line)
elif match := arrow_string_pattern.match(line):
tp, rest = match.groups()
rest = rest.strip()
return Element('LIST_ITEM', number, (tp, rest), line)
elif end_pattern.match(line):
return Element('END', number, None, line)
elif empty_pattern.match(line):
return Element('EMPTY', number, None, line)
else:
return Element('ERROR', number, None, line)
def flat_parse(data):
for i, line in enumerate(data.splitlines(), 1):
yield parse_line(i, line)
data = """
map -> map_name:
string -> name = John
int -> age = 30
string -> city = New York
int -> code = 16755251
map -> map_name1:
string -> name1 = John
int -> age1 = 30
string -> city1 = New York
int -> code1 = 16755251
float -> floater1 = 3.33
---
int -> code1 = 16755251
---
int -> code2 = 16755251
list -> list_name:
string -> shit
---
"""
for elt in flat_parse(data):
print(elt) Output: Element(type='EMPTY', number=1, data=None, line='')
Element(type='MAP', number=2, data='map_name', line=' map -> map_name:')
Element(type='DICT_ITEM', number=3, data=('string', 'name', 'John'), line=' string -> name = John')
Element(type='DICT_ITEM', number=4, data=('int', 'age', '30'), line=' int -> age = 30')
Element(type='DICT_ITEM', number=5, data=('string', 'city', 'New York'), line=' string -> city = New York')
Element(type='DICT_ITEM', number=6, data=('int', 'code', '16755251'), line=' int -> code = 16755251')
Element(type='MAP', number=7, data='map_name1', line=' map -> map_name1:')
Element(type='DICT_ITEM', number=8, data=('string', 'name1', 'John'), line=' string -> name1 = John')
Element(type='DICT_ITEM', number=9, data=('int', 'age1', '30'), line=' int -> age1 = 30')
Element(type='DICT_ITEM', number=10, data=('string', 'city1', 'New York'), line=' string -> city1 = New York')
Element(type='DICT_ITEM', number=11, data=('int', 'code1', '16755251'), line=' int -> code1 = 16755251')
Element(type='DICT_ITEM', number=12, data=('float', 'floater1', '3.33'), line=' float -> floater1 = 3.33')
Element(type='END', number=13, data=None, line=' ---')
Element(type='DICT_ITEM', number=14, data=('int', 'code1', '16755251'), line=' int -> code1 = 16755251')
Element(type='END', number=15, data=None, line=' ---')
Element(type='DICT_ITEM', number=16, data=('int', 'code2', '16755251'), line=' int -> code2 = 16755251')
Element(type='LIST', number=17, data='list_name', line=' list -> list_name:')
Element(type='LIST_ITEM', number=18, data=('string', 'shit'), line=' string -> shit')
Element(type='END', number=19, data=None, line=' ---')
Posts: 10
Threads: 4
Joined: Oct 2023
(Oct-31-2023, 03:57 PM)Unkovic Wrote: The only thing I now need a help with is (nested) lists.
How could we parse this?
data = """
map -> map_name:
string -> name = John
int -> age = 30
string -> city = New York
int -> code = 16755251
map -> map_name1:
string -> name1 = John
int -> age1 = 30
string -> city1 = New York
int -> code1 = 16755251
float -> floater1 = 3.33
---
int -> code1 = 16755251
---
int -> code2 = 16755251
list -> list_name:
string -> shit
---
""" As you can see I added list to the data
list_pattern = re.compile(r'^\s*list\s*->\s*(\w+)\s*[:]\s*$')
I even have made the regex. But since lists only contains values, I'm unsure how to implement so it doesn't possibly affect with keys regex
key_value_pattern = re.compile(r"^\s*(\w+)\s*->\s*(\w+)\s*=(.*)$")
value_pattern_list = re.compile(r"^\s*(\w+)\s*->(.*)$")
Thanks man. Appreciate it. I made it finally
|