Phasing a tabbed file?

ODIS · (This post was last modified: Dec-15-2017, 03:53 PM by ODIS.)

Well I would process your categories/items structure with some OOP abstraction. Here is the quick solution:

import json
from typing import List


class Item:
    def __init__(self, name: str):
        self.name = name

    def get_name(self) -> str:
        return self.name


class Category:
    def __init__(self, name: str):
        self.name = name
        self.parent_category = None
        self.subcategories = []
        self.items = []

    def set_parent(self, parent_category: "Category"):
        self.parent_category = parent_category

    def get_parent(self) -> "Category":
        if self.parent_category is None:
            raise Exception("You are trying to get non-existing parent category")
        return self.parent_category

    def add_subcategory(self, subcategory: "Category"):
        self.subcategories.append(subcategory)

    def get_subcategories(self) -> List["Category"]:
        return self.subcategories

    def add_item(self, item: Item):
        self.items.append(item)

    def get_items(self) -> List[Item]:
        return self.items

    def pop_last_item(self) -> Item:
        if len(self.items) == 0:
            raise Exception("Cannot pop last item - empty item list")
        return self.items.pop()

    def to_dict(self) -> dict:
        return {
            "category_name": self.name,
            "items": list(map(lambda item: item.get_name(), self.items)),
            "subcategories": list(map(lambda subcategory: subcategory.to_dict(), self.subcategories))
        }


with open("./input_file.txt") as file:
    # root category is a container of all 1st level categories
    root_category = Category("root")
    current_category = root_category
    current_tabs_count = 0
    # we iterate over all file lines
    for line in file.readlines():
        # we cut the the tabulators from the start of a line
        line_without_leading_tabs = line.lstrip("\t")
        # if the line is empty then, we just skip it
        if line_without_leading_tabs == "":
            continue
        # we count the number of a start tabs
        leading_tabs_count = len(line) - len(line_without_leading_tabs)
        # if there is two tabs jump forward, we quit with syntax error
        if leading_tabs_count - current_tabs_count > 1:
            raise Exception("Syntax error - two tabs forward jump is not allowed")
        # if there is only one tab jump forward, we create new category from the last item
        if leading_tabs_count - current_tabs_count == 1:
            new_category = Category(current_category.pop_last_item().get_name())
            new_category.set_parent(current_category)
            current_category.add_subcategory(new_category)
            current_category = new_category
        # if other cases we stay on one place or going back
        # we move to appropriate parent category
        else:
            for i in range(current_tabs_count - leading_tabs_count):
                current_category = current_category.get_parent()
        # and for all cases, we add new item and change the current tabs count
        current_category.add_item(Item(line_without_leading_tabs.strip()))
        current_tabs_count = leading_tabs_count


# we extract categories from the root category and dumps them to json
result = list(map(lambda category: category.to_dict(), root_category.get_subcategories()))
print(json.dumps(result))

And here is the result in JSON:

Output:[
   {
      "category_name":"Cat 1",
      "items":[
         "Sub Cat 1",
         "Sub Cat 2"
      ],
      "subcategories":[
         {
            "category_name":"Sub Cat 3",
            "items":[
               "Testing"
            ],
            "subcategories":[]
         }
      ]
   },
   {
      "category_name":"Cat 2",
      "items":[
         "Sub Cat 2",
         "Sub Cat 3"
      ],
      "subcategories":[
         {
            "category_name":"Sub Cat 1",
            "items":[
               "Nested item",
               "Yet another nested item"
            ],
            "subcategories":[
               {
                  "category_name":"Another nested item",
                  "items":[
                     "Nested-nested item",
                     "Nested-nested item 2"
                  ],
                  "subcategories":[]
               }
            ]
         }
      ]
   }
]

Phasing a tabbed file?

User Panel Messages

Announcements