Well I would process your categories/items structure with some OOP abstraction. Here is the quick solution:
import json from typing import List class Item: def __init__(self, name: str): self.name = name def get_name(self) -> str: return self.name class Category: def __init__(self, name: str): self.name = name self.parent_category = None self.subcategories = [] self.items = [] def set_parent(self, parent_category: "Category"): self.parent_category = parent_category def get_parent(self) -> "Category": if self.parent_category is None: raise Exception("You are trying to get non-existing parent category") return self.parent_category def add_subcategory(self, subcategory: "Category"): self.subcategories.append(subcategory) def get_subcategories(self) -> List["Category"]: return self.subcategories def add_item(self, item: Item): self.items.append(item) def get_items(self) -> List[Item]: return self.items def pop_last_item(self) -> Item: if len(self.items) == 0: raise Exception("Cannot pop last item - empty item list") return self.items.pop() def to_dict(self) -> dict: return { "category_name": self.name, "items": list(map(lambda item: item.get_name(), self.items)), "subcategories": list(map(lambda subcategory: subcategory.to_dict(), self.subcategories)) } with open("./input_file.txt") as file: # root category is a container of all 1st level categories root_category = Category("root") current_category = root_category current_tabs_count = 0 # we iterate over all file lines for line in file.readlines(): # we cut the the tabulators from the start of a line line_without_leading_tabs = line.lstrip("\t") # if the line is empty then, we just skip it if line_without_leading_tabs == "": continue # we count the number of a start tabs leading_tabs_count = len(line) - len(line_without_leading_tabs) # if there is two tabs jump forward, we quit with syntax error if leading_tabs_count - current_tabs_count > 1: raise Exception("Syntax error - two tabs forward jump is not allowed") # if there is only one tab jump forward, we create new category from the last item if leading_tabs_count - current_tabs_count == 1: new_category = Category(current_category.pop_last_item().get_name()) new_category.set_parent(current_category) current_category.add_subcategory(new_category) current_category = new_category # if other cases we stay on one place or going back # we move to appropriate parent category else: for i in range(current_tabs_count - leading_tabs_count): current_category = current_category.get_parent() # and for all cases, we add new item and change the current tabs count current_category.add_item(Item(line_without_leading_tabs.strip())) current_tabs_count = leading_tabs_count # we extract categories from the root category and dumps them to json result = list(map(lambda category: category.to_dict(), root_category.get_subcategories())) print(json.dumps(result))And here is the result in JSON:
Output:[
{
"category_name":"Cat 1",
"items":[
"Sub Cat 1",
"Sub Cat 2"
],
"subcategories":[
{
"category_name":"Sub Cat 3",
"items":[
"Testing"
],
"subcategories":[]
}
]
},
{
"category_name":"Cat 2",
"items":[
"Sub Cat 2",
"Sub Cat 3"
],
"subcategories":[
{
"category_name":"Sub Cat 1",
"items":[
"Nested item",
"Yet another nested item"
],
"subcategories":[
{
"category_name":"Another nested item",
"items":[
"Nested-nested item",
"Nested-nested item 2"
],
"subcategories":[]
}
]
}
]
}
]