For those who might be interested, I've added a few things to the code, so now it displays the siblings number, e.g. 1 in "3.1", the text value from the element, and the number of identical siblings.
The code in my last post was incorrectly calculating siblings id. This is an update which should fix it.
xml_root = etree.fromstring(xml_file_content) raw_tree = etree.ElementTree(xml_root) nice_tree = collections.OrderedDict() for tag in xml_root.iter(): path = re.sub('\[[0-9]+\]', '', raw_tree.getpath(tag)) if path not in nice_tree: nice_tree[path] = {'attribs':[], 'values': '', 'count': 1} else: nice_tree[path]['count'] += 1 if len(tag.keys()) > 0: nice_tree[path]['attribs'].extend(attrib for attrib in tag.keys() if attrib not in nice_tree[path]) if tag.text: nice_tree[path]['values'] = ' '.join(tag.text.split()) last_level = -1 sibling = 0 for path, d in nice_tree.items(): this_level = int(path.count('/') - 1) if this_level == last_level: sibling += 1 else: sibling = 0 last_level = this_level print('{0}{1}.{2}: {3} [{4}] [{5}] [{6}]'.format(' ' * this_level, this_level, sibling, path.split('/')[-1], ', '.join(d['attribs']) if len(d['attribs']) > 0 else '', d['values'], d['count']))
The code in my last post was incorrectly calculating siblings id. This is an update which should fix it.
xml_root = etree.fromstring(file_content) raw_tree = etree.ElementTree(xml_root) nice_tree = collections.OrderedDict() for tag in xml_root.iter(): path = re.sub('\[[0-9]+\]', '', raw_tree.getpath(tag)) if path not in nice_tree: nice_tree[path] = {'attribs':[], 'values':'', 'count':1} else: nice_tree[path]['count'] += 1 if len(tag.keys()) > 0: nice_tree[path]['attribs'].extend(attrib for attrib in tag.keys() if attrib not in nice_tree[path]) if tag.text: nice_tree[path]['values'] = ' '.join(tag.text.split()) last_level = -1 siblings = [] for path, d in nice_tree.items(): this_level = int(path.count('/') - 1) if len(siblings)-1 < this_level: siblings.append(0) elif this_level == last_level: siblings[this_level] += 1 elif this_level < last_level: siblings[last_level] = 0 siblings[this_level] += 1 else: siblings[this_level] = 0 last_level = this_level print('{0}{1}.{2}: {3} [{4}] [{5}] [{6}]'.format(' ' * this_level, this_level, siblings[this_level], path.split('/')[-1], ', '.join(d['attribs']) if len(d['attribs']) > 0 else '', d['values'], d['count']))