Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Help with nested maps
#1
def parse_map(self, data, map_name):
        lines = data.strip().split('\n')
        result = {}
        map_stack = []

        for line in lines:
            line = line.strip()
            if line == '---':
                if map_stack:
                    current_map = map_stack.pop()
                    if map_stack:
                        map_stack[-1][map_name] = current_map
                    else:
                        result.update(current_map)
                else:
                    break  # Stop parsing when '---' is encountered at the top level

            map_pattern = rf"(\w+) -> ({map_name})[ ]*:"
            map_match = re.match(map_pattern, line)

            if map_match:
                current_map = {}
                map_stack.append(current_map)

            elif current_map is not None:
                key_value_pattern = rf"(\w+) -> (\w+) = (\w+(?:[\s.\,]+\w+)*)"
                key_value_match = re.match(key_value_pattern, line)

                if key_value_match:
                    datatype, key, value = key_value_match.groups()
                    current_map[key] = self.parse_key(line, key)

        return result
    
This is my parse function.

This is data to parse

data = """
    map -> map_name:
        string -> name = John
		int -> age = 30
		string -> city = New York
		int -> code = 16755251
        map -> map_name1:
            string -> name1 = John
            int -> age1 = 30
            string -> city1 = New York
            int -> code1 = 16755251
            float -> floater1 = 3.33
        ---
    ---
this is the output

{'name': 'John', 'age': 30, 'city': 'New York', 'code': 16755251, 'name1': 'John', 'age1': 30, 'city1': 'New York', 'code1': 16755251, 'floater1': 3.33}
As you can see, instead of having nested map parsed as nested map with its own keys within it, it joins them together in main map.
How can I achieve this desired output?

{'name': 'John', 'age': 30, 'city': 'New York', 'code': 16755251, 'map_name1': {'name1': 'John', 'age1': 30, 'city1': 'New York', 'code1': 16755251, 'floater1': 3.33}}
Reply
#2
here is parse_key if needed

def parse_key(self, data, key):
        lines = data.strip().split('\n')
        result = None
        for line in lines:
            line = line.strip()
            pattern = rf"(\w+) -> (\w+) = (\w+(?:[\s.\,]+\w+)*)"
            match = re.match(pattern, line)
            if match:
                datatype, name, value = match.groups()
                if name == key:
                    if datatype == 'int':
                        result = int(value)
                    elif datatype == 'string':
                        result = str(value)
                    elif datatype == 'float':
                        result = float(value)
                    elif datatype == 'bool':
                        result = bool(value)
                    elif datatype == 'hex':
                        result = hex(value)
                    elif datatype == 'bin':
                        result = bin(value)
            
        return result
Reply
#3
Not sure what you want to do exactly but the line map -> map_name1: does not match the map pattern at line 18, because this pattern contains the map_name that is passed to the function. It means that the lines in the inner map are read as if they were lines from the outer map.
Reply
#4
Here is an alternate way you could parse this data
import re

data = """
    map -> map_name:
        string -> name = John
        int -> age = 30
        string -> city = New York
        int -> code = 16755251
        map -> map_name1:
            string -> name1 = John
            int -> age1 = 30
            string -> city1 = New York
            int -> code1 = 16755251
            float -> floater1 = 3.33
        ---
    ---
"""

from collections import namedtuple
Element = namedtuple('Element', 'type number data line')
map_pattern = re.compile(r'^\s*map\s*->\s*(\w+)\s*[:]\s*$')
key_value_pattern = re.compile(r"^\s*(\w+)\s*->\s*(\w+)\s*=(.*)$")
end_pattern = re.compile('^\s*---\s*$')
empty_pattern = re.compile('^\s*$')

def flat_parse(data):
    for i, line in enumerate(data.splitlines(), 1):
        if match := key_value_pattern.match(line):
            yield Element('KEY', i, (match.group(1), match.group(2), match.group(3)), line)
        elif match := map_pattern.match(line):
            yield Element('MAP', i, match.group(1), line)
        elif match := end_pattern.match(line):
            yield Element('END', i, None, line)
        elif match := empty_pattern.match(line):
            yield Element('EMPTY', i, None, line)
        else:
            yield Element('ERROR', i, None, line)

def convert(tp, value):
    value = value.strip()
    match tp:
        case 'int':
            return int(value)
        case 'float':
            return float(value)
        case 'string':
            return str(value)

def parse(data):
    current = {}
    stack = []
    for elt in flat_parse(data):
        match elt.type:
            case 'KEY':
                tp, name, value = elt.data
                current[name] = convert(tp, value)
            case 'MAP':
                stack.append(current)
                current[elt.data] = current = {}
            case 'END':
                if not stack:
                    raise RuntimeError('Too many ends of map', elt)
                current = stack.pop()
            case 'EMPTY':
                pass
            case 'ERROR':
                raise RuntimeError('Parsing error', elt)
    if stack:
        raise RuntimeError('End of map missing at end of data')
    return current

print(parse(data))
Output:
λ python paillasse/pf/parsemap.py {'map_name': {'name': 'John', 'age': 30, 'city': 'New York', 'code': 16755251, 'map_name1': {'name1': 'John', 'age1': 30, 'city1': 'New York', 'code1': 16755251, 'floater1': 3.33}}}
Reply
#5
(Oct-31-2023, 06:53 AM)Gribouillis Wrote: Not sure what you want to do exactly but the line map -> map_name1: does not match the map pattern at line 18, because this pattern contains the map_name that is passed to the function. It means that the lines in the inner map are read as if they were lines from the outer map.

Thanks for the code provided. But how could i remove the main map name as a key.?. Only nested maps should be treated as keys.
Reply
#6
(Oct-31-2023, 10:02 AM)Unkovic Wrote: But how could i remove the main map name as a key.?. Only nested maps should be treated as keys.
If you want only the main map, you can just do
main_map = parse(data)['map_name']
Reply
#7
(Oct-31-2023, 10:14 AM)Gribouillis Wrote:
(Oct-31-2023, 10:02 AM)Unkovic Wrote: But how could i remove the main map name as a key.?. Only nested maps should be treated as keys.
If you want only the main map, you can just do
main_map = parse(data)['map_name']

No, I think we misunderstood. Main map name shouldn't be treated as key, hence this output

EDIT: I just realized your way was smarter, don't need to avoid map name. I'll stick to this.
Reply
#8
The only thing I now need a help with is (nested) lists.

How could we parse this?

data = """
    map -> map_name:
        string -> name = John
		int -> age = 30
		string -> city = New York
		int -> code = 16755251
        map -> map_name1:
            string -> name1 = John
            int -> age1 = 30
            string -> city1 = New York
            int -> code1 = 16755251
            float -> floater1 = 3.33
        ---
        int -> code1 = 16755251
    ---
    int -> code2 = 16755251
    list -> list_name:
        string -> shit
    ---
"""
As you can see I added list to the data

list_pattern = re.compile(r'^\s*list\s*->\s*(\w+)\s*[:]\s*$')

I even have made the regex. But since lists only contains values, I'm unsure how to implement so it doesn't possibly affect with keys regex

key_value_pattern = re.compile(r"^\s*(\w+)\s*->\s*(\w+)\s*=(.*)$")
value_pattern_list = re.compile(r"^\s*(\w+)\s*->(.*)$")
Reply
#9
You can parse the lines with this code now
import re
from collections import namedtuple
Element = namedtuple('Element', 'type number data line')

arrow_id_pattern = re.compile(r'^\s*(\w+)\s*->\s*(\w+)\s*([:=])(.*)$')
arrow_string_pattern = re.compile(r'^\s*(\w+)\s*->(.*$)')
end_pattern = re.compile('^\s*---\s*$')
empty_pattern = re.compile('^\s*$')

def parse_line(number, line):
    if match := arrow_id_pattern.match(line):
        tp, word, op, rest = match.groups()
        rest = rest.strip()
        if op == ':':
            if rest or (tp not in ('map', 'list')):
                return Element('ERROR', number, None, line)
            elif tp == 'list':
                return Element('LIST', number, word, line)
            else:
                return Element('MAP', number, word, line)
        else:
            return Element('DICT_ITEM', number, (tp, word, rest), line)
    elif match := arrow_string_pattern.match(line):
        tp, rest = match.groups()
        rest = rest.strip()
        return Element('LIST_ITEM', number, (tp, rest), line)
    elif end_pattern.match(line):
        return Element('END', number, None, line)
    elif empty_pattern.match(line):
        return Element('EMPTY', number, None, line)
    else:
        return Element('ERROR', number, None, line)


def flat_parse(data):
    for i, line in enumerate(data.splitlines(), 1):
        yield parse_line(i, line)


data = """
    map -> map_name:
        string -> name = John
        int -> age = 30
        string -> city = New York
        int -> code = 16755251
        map -> map_name1:
            string -> name1 = John
            int -> age1 = 30
            string -> city1 = New York
            int -> code1 = 16755251
            float -> floater1 = 3.33
        ---
        int -> code1 = 16755251
    ---
    int -> code2 = 16755251
    list -> list_name:
        string -> shit
    ---
"""

for elt in flat_parse(data):
    print(elt)
Output:
Element(type='EMPTY', number=1, data=None, line='') Element(type='MAP', number=2, data='map_name', line=' map -> map_name:') Element(type='DICT_ITEM', number=3, data=('string', 'name', 'John'), line=' string -> name = John') Element(type='DICT_ITEM', number=4, data=('int', 'age', '30'), line=' int -> age = 30') Element(type='DICT_ITEM', number=5, data=('string', 'city', 'New York'), line=' string -> city = New York') Element(type='DICT_ITEM', number=6, data=('int', 'code', '16755251'), line=' int -> code = 16755251') Element(type='MAP', number=7, data='map_name1', line=' map -> map_name1:') Element(type='DICT_ITEM', number=8, data=('string', 'name1', 'John'), line=' string -> name1 = John') Element(type='DICT_ITEM', number=9, data=('int', 'age1', '30'), line=' int -> age1 = 30') Element(type='DICT_ITEM', number=10, data=('string', 'city1', 'New York'), line=' string -> city1 = New York') Element(type='DICT_ITEM', number=11, data=('int', 'code1', '16755251'), line=' int -> code1 = 16755251') Element(type='DICT_ITEM', number=12, data=('float', 'floater1', '3.33'), line=' float -> floater1 = 3.33') Element(type='END', number=13, data=None, line=' ---') Element(type='DICT_ITEM', number=14, data=('int', 'code1', '16755251'), line=' int -> code1 = 16755251') Element(type='END', number=15, data=None, line=' ---') Element(type='DICT_ITEM', number=16, data=('int', 'code2', '16755251'), line=' int -> code2 = 16755251') Element(type='LIST', number=17, data='list_name', line=' list -> list_name:') Element(type='LIST_ITEM', number=18, data=('string', 'shit'), line=' string -> shit') Element(type='END', number=19, data=None, line=' ---')
Reply
#10
(Oct-31-2023, 03:57 PM)Unkovic Wrote: The only thing I now need a help with is (nested) lists.

How could we parse this?

data = """
    map -> map_name:
        string -> name = John
		int -> age = 30
		string -> city = New York
		int -> code = 16755251
        map -> map_name1:
            string -> name1 = John
            int -> age1 = 30
            string -> city1 = New York
            int -> code1 = 16755251
            float -> floater1 = 3.33
        ---
        int -> code1 = 16755251
    ---
    int -> code2 = 16755251
    list -> list_name:
        string -> shit
    ---
"""
As you can see I added list to the data

list_pattern = re.compile(r'^\s*list\s*->\s*(\w+)\s*[:]\s*$')

I even have made the regex. But since lists only contains values, I'm unsure how to implement so it doesn't possibly affect with keys regex

key_value_pattern = re.compile(r"^\s*(\w+)\s*->\s*(\w+)\s*=(.*)$")
value_pattern_list = re.compile(r"^\s*(\w+)\s*->(.*)$")

Thanks man. Appreciate it. I made it finally
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  Best way to map trajectory data on Google Maps Gduffley 1 2,679 Feb-05-2020, 12:36 AM
Last Post: scidam
  Can't visualize maps using Gmaps mPlummers 0 3,561 Sep-11-2019, 02:38 PM
Last Post: mPlummers
  Search "Places near by me" or "where am I" in google maps barry76 1 2,692 Feb-07-2019, 04:10 PM
Last Post: snippsat
  Non-Geographic Heat Maps JackValadez 0 2,099 Oct-17-2018, 06:03 PM
Last Post: JackValadez
  How get attributes of maps from loop statement LB_994 3 3,176 Aug-21-2018, 03:24 PM
Last Post: LB_994
  How to retrieve locality from google maps API Prince_Bhatia 0 3,350 Jul-23-2018, 07:57 AM
Last Post: Prince_Bhatia
  python charmap codec can't decode byte X in position Y character maps to < undefined> owais 9 39,168 Apr-28-2018, 10:52 PM
Last Post: abadawi

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020