Help with nested maps

Unkovic · Oct-30-2023, 10:29 PM

def parse_map(self, data, map_name):
        lines = data.strip().split('\n')
        result = {}
        map_stack = []

        for line in lines:
            line = line.strip()
            if line == '---':
                if map_stack:
                    current_map = map_stack.pop()
                    if map_stack:
                        map_stack[-1][map_name] = current_map
                    else:
                        result.update(current_map)
                else:
                    break  # Stop parsing when '---' is encountered at the top level

            map_pattern = rf"(\w+) -> ({map_name})[ ]*:"
            map_match = re.match(map_pattern, line)

            if map_match:
                current_map = {}
                map_stack.append(current_map)

            elif current_map is not None:
                key_value_pattern = rf"(\w+) -> (\w+) = (\w+(?:[\s.\,]+\w+)*)"
                key_value_match = re.match(key_value_pattern, line)

                if key_value_match:
                    datatype, key, value = key_value_match.groups()
                    current_map[key] = self.parse_key(line, key)

        return result

This is my parse function.

This is data to parse

data = """
    map -> map_name:
        string -> name = John
		int -> age = 30
		string -> city = New York
		int -> code = 16755251
        map -> map_name1:
            string -> name1 = John
            int -> age1 = 30
            string -> city1 = New York
            int -> code1 = 16755251
            float -> floater1 = 3.33
        ---
    ---

this is the output

{'name': 'John', 'age': 30, 'city': 'New York', 'code': 16755251, 'name1': 'John', 'age1': 30, 'city1': 'New York', 'code1': 16755251, 'floater1': 3.33}

As you can see, instead of having nested map parsed as nested map with its own keys within it, it joins them together in main map.
How can I achieve this desired output?

{'name': 'John', 'age': 30, 'city': 'New York', 'code': 16755251, 'map_name1': {'name1': 'John', 'age1': 30, 'city1': 'New York', 'code1': 16755251, 'floater1': 3.33}}

Unkovic · Oct-30-2023, 10:30 PM

here is parse_key if needed

def parse_key(self, data, key):
        lines = data.strip().split('\n')
        result = None
        for line in lines:
            line = line.strip()
            pattern = rf"(\w+) -> (\w+) = (\w+(?:[\s.\,]+\w+)*)"
            match = re.match(pattern, line)
            if match:
                datatype, name, value = match.groups()
                if name == key:
                    if datatype == 'int':
                        result = int(value)
                    elif datatype == 'string':
                        result = str(value)
                    elif datatype == 'float':
                        result = float(value)
                    elif datatype == 'bool':
                        result = bool(value)
                    elif datatype == 'hex':
                        result = hex(value)
                    elif datatype == 'bin':
                        result = bin(value)
            
        return result

**Gribouillis** · (This post was last modified: Oct-31-2023, 06:53 AM by Gribouillis.)

Not sure what you want to do exactly but the line map -> map_name1: does not match the map pattern at line 18, because this pattern contains the map_name that is passed to the function. It means that the lines in the inner map are read as if they were lines from the outer map.

**Gribouillis** · (This post was last modified: Oct-31-2023, 08:01 AM by Gribouillis.)

Here is an alternate way you could parse this data

import re

data = """
    map -> map_name:
        string -> name = John
        int -> age = 30
        string -> city = New York
        int -> code = 16755251
        map -> map_name1:
            string -> name1 = John
            int -> age1 = 30
            string -> city1 = New York
            int -> code1 = 16755251
            float -> floater1 = 3.33
        ---
    ---
"""

from collections import namedtuple
Element = namedtuple('Element', 'type number data line')
map_pattern = re.compile(r'^\s*map\s*->\s*(\w+)\s*[:]\s*$')
key_value_pattern = re.compile(r"^\s*(\w+)\s*->\s*(\w+)\s*=(.*)$")
end_pattern = re.compile('^\s*---\s*$')
empty_pattern = re.compile('^\s*$')

def flat_parse(data):
    for i, line in enumerate(data.splitlines(), 1):
        if match := key_value_pattern.match(line):
            yield Element('KEY', i, (match.group(1), match.group(2), match.group(3)), line)
        elif match := map_pattern.match(line):
            yield Element('MAP', i, match.group(1), line)
        elif match := end_pattern.match(line):
            yield Element('END', i, None, line)
        elif match := empty_pattern.match(line):
            yield Element('EMPTY', i, None, line)
        else:
            yield Element('ERROR', i, None, line)

def convert(tp, value):
    value = value.strip()
    match tp:
        case 'int':
            return int(value)
        case 'float':
            return float(value)
        case 'string':
            return str(value)

def parse(data):
    current = {}
    stack = []
    for elt in flat_parse(data):
        match elt.type:
            case 'KEY':
                tp, name, value = elt.data
                current[name] = convert(tp, value)
            case 'MAP':
                stack.append(current)
                current[elt.data] = current = {}
            case 'END':
                if not stack:
                    raise RuntimeError('Too many ends of map', elt)
                current = stack.pop()
            case 'EMPTY':
                pass
            case 'ERROR':
                raise RuntimeError('Parsing error', elt)
    if stack:
        raise RuntimeError('End of map missing at end of data')
    return current

print(parse(data))

Output:λ python paillasse/pf/parsemap.py
{'map_name': {'name': 'John', 'age': 30, 'city': 'New York', 'code': 16755251, 'map_name1': {'name1': 'John', 'age1': 30, 'city1': 'New York', 'code1': 16755251, 'floater1': 3.33}}}

Unkovic · Oct-31-2023, 10:02 AM

(Oct-31-2023, 06:53 AM)Gribouillis Wrote: Not sure what you want to do exactly but the line map -> map_name1: does not match the map pattern at line 18, because this pattern contains the map_name that is passed to the function. It means that the lines in the inner map are read as if they were lines from the outer map.

Thanks for the code provided. But how could i remove the main map name as a key.?. Only nested maps should be treated as keys.

**Gribouillis** · Oct-31-2023, 10:14 AM

(Oct-31-2023, 10:02 AM)Unkovic Wrote: But how could i remove the main map name as a key.?. Only nested maps should be treated as keys.

If you want only the main map, you can just do

main_map = parse(data)['map_name']

Unkovic · (This post was last modified: Oct-31-2023, 04:00 PM by Unkovic.)

(Oct-31-2023, 10:14 AM)Gribouillis Wrote:
(Oct-31-2023, 10:02 AM)Unkovic Wrote: But how could i remove the main map name as a key.?. Only nested maps should be treated as keys.
If you want only the main map, you can just do
main_map = parse(data)['map_name']

No, I think we misunderstood. Main map name shouldn't be treated as key, hence this output

EDIT: I just realized your way was smarter, don't need to avoid map name. I'll stick to this.

Unkovic · Oct-31-2023, 03:57 PM

The only thing I now need a help with is (nested) lists.

How could we parse this?

data = """
    map -> map_name:
        string -> name = John
		int -> age = 30
		string -> city = New York
		int -> code = 16755251
        map -> map_name1:
            string -> name1 = John
            int -> age1 = 30
            string -> city1 = New York
            int -> code1 = 16755251
            float -> floater1 = 3.33
        ---
        int -> code1 = 16755251
    ---
    int -> code2 = 16755251
    list -> list_name:
        string -> shit
    ---
"""

As you can see I added list to the data

list_pattern = re.compile(r'^\s*list\s*->\s*(\w+)\s*[:]\s*$')

I even have made the regex. But since lists only contains values, I'm unsure how to implement so it doesn't possibly affect with keys regex

key_value_pattern = re.compile(r"^\s*(\w+)\s*->\s*(\w+)\s*=(.*)$")
value_pattern_list = re.compile(r"^\s*(\w+)\s*->(.*)$")

**Gribouillis** · (This post was last modified: Oct-31-2023, 05:50 PM by Gribouillis.)

You can parse the lines with this code now

import re
from collections import namedtuple
Element = namedtuple('Element', 'type number data line')

arrow_id_pattern = re.compile(r'^\s*(\w+)\s*->\s*(\w+)\s*([:=])(.*)$')
arrow_string_pattern = re.compile(r'^\s*(\w+)\s*->(.*$)')
end_pattern = re.compile('^\s*---\s*$')
empty_pattern = re.compile('^\s*$')

def parse_line(number, line):
    if match := arrow_id_pattern.match(line):
        tp, word, op, rest = match.groups()
        rest = rest.strip()
        if op == ':':
            if rest or (tp not in ('map', 'list')):
                return Element('ERROR', number, None, line)
            elif tp == 'list':
                return Element('LIST', number, word, line)
            else:
                return Element('MAP', number, word, line)
        else:
            return Element('DICT_ITEM', number, (tp, word, rest), line)
    elif match := arrow_string_pattern.match(line):
        tp, rest = match.groups()
        rest = rest.strip()
        return Element('LIST_ITEM', number, (tp, rest), line)
    elif end_pattern.match(line):
        return Element('END', number, None, line)
    elif empty_pattern.match(line):
        return Element('EMPTY', number, None, line)
    else:
        return Element('ERROR', number, None, line)


def flat_parse(data):
    for i, line in enumerate(data.splitlines(), 1):
        yield parse_line(i, line)


data = """
    map -> map_name:
        string -> name = John
        int -> age = 30
        string -> city = New York
        int -> code = 16755251
        map -> map_name1:
            string -> name1 = John
            int -> age1 = 30
            string -> city1 = New York
            int -> code1 = 16755251
            float -> floater1 = 3.33
        ---
        int -> code1 = 16755251
    ---
    int -> code2 = 16755251
    list -> list_name:
        string -> shit
    ---
"""

for elt in flat_parse(data):
    print(elt)

Output:Element(type='EMPTY', number=1, data=None, line='')
Element(type='MAP', number=2, data='map_name', line='    map -> map_name:')
Element(type='DICT_ITEM', number=3, data=('string', 'name', 'John'), line='        string -> name = John')
Element(type='DICT_ITEM', number=4, data=('int', 'age', '30'), line='        int -> age = 30')
Element(type='DICT_ITEM', number=5, data=('string', 'city', 'New York'), line='        string -> city = New York')
Element(type='DICT_ITEM', number=6, data=('int', 'code', '16755251'), line='        int -> code = 16755251')
Element(type='MAP', number=7, data='map_name1', line='        map -> map_name1:')
Element(type='DICT_ITEM', number=8, data=('string', 'name1', 'John'), line='            string -> name1 = John')
Element(type='DICT_ITEM', number=9, data=('int', 'age1', '30'), line='            int -> age1 = 30')
Element(type='DICT_ITEM', number=10, data=('string', 'city1', 'New York'), line='            string -> city1 = New York')
Element(type='DICT_ITEM', number=11, data=('int', 'code1', '16755251'), line='            int -> code1 = 16755251')
Element(type='DICT_ITEM', number=12, data=('float', 'floater1', '3.33'), line='            float -> floater1 = 3.33')
Element(type='END', number=13, data=None, line='        ---')
Element(type='DICT_ITEM', number=14, data=('int', 'code1', '16755251'), line='        int -> code1 = 16755251')
Element(type='END', number=15, data=None, line='    ---')
Element(type='DICT_ITEM', number=16, data=('int', 'code2', '16755251'), line='    int -> code2 = 16755251')
Element(type='LIST', number=17, data='list_name', line='    list -> list_name:')
Element(type='LIST_ITEM', number=18, data=('string', 'shit'), line='        string -> shit')
Element(type='END', number=19, data=None, line='    ---')

Unkovic · Nov-01-2023, 01:07 AM

(Oct-31-2023, 03:57 PM)Unkovic Wrote: The only thing I now need a help with is (nested) lists.

How could we parse this?
data = """
    map -> map_name:
        string -> name = John
		int -> age = 30
		string -> city = New York
		int -> code = 16755251
        map -> map_name1:
            string -> name1 = John
            int -> age1 = 30
            string -> city1 = New York
            int -> code1 = 16755251
            float -> floater1 = 3.33
        ---
        int -> code1 = 16755251
    ---
    int -> code2 = 16755251
    list -> list_name:
        string -> shit
    ---
"""
As you can see I added list to the data

list_pattern = re.compile(r'^\s*list\s*->\s*(\w+)\s*[:]\s*$')

I even have made the regex. But since lists only contains values, I'm unsure how to implement so it doesn't possibly affect with keys regex

key_value_pattern = re.compile(r"^\s*(\w+)\s*->\s*(\w+)\s*=(.*)$")
value_pattern_list = re.compile(r"^\s*(\w+)\s*->(.*)$")

Thanks man. Appreciate it. I made it finally

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	Best way to map trajectory data on Google Maps	Gduffley	1	2,679	Feb-05-2020, 12:36 AM Last Post: scidam
	Can't visualize maps using Gmaps	mPlummers	0	3,561	Sep-11-2019, 02:38 PM Last Post: mPlummers
	Search "Places near by me" or "where am I" in google maps	barry76	1	2,692	Feb-07-2019, 04:10 PM Last Post: snippsat
	Non-Geographic Heat Maps	JackValadez	0	2,099	Oct-17-2018, 06:03 PM Last Post: JackValadez
	How get attributes of maps from loop statement	LB_994	3	3,176	Aug-21-2018, 03:24 PM Last Post: LB_994
	How to retrieve locality from google maps API	Prince_Bhatia	0	3,350	Jul-23-2018, 07:57 AM Last Post: Prince_Bhatia
	python charmap codec can't decode byte X in position Y character maps to < undefined>	owais	9	39,168	Apr-28-2018, 10:52 PM Last Post: abadawi

Help with nested maps

User Panel Messages

Announcements