Python Forum

Full Version: XML minidom "Pretty Print" Lost Data
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
I have code that successfully creates a desired XML file from a provided CSV file. However, I'm trying to format it to be easier to read. I want the list of "triggers" and list of "messages" to be separated by a blank line and each "trigger" and each "message" should have its own line. Trying to use minidom, but the "pretty" file loses the triggers for some reason.

Code:

import csv
import os
import tkinter as tk
from tkinter import filedialog, simpledialog
import xml.etree.ElementTree as ET
from xml.dom import minidom

def process_csv_to_xml():
    # Initialize tkinter
    root = tk.Tk()
    root.withdraw()  # Hide main window

    # Prompt for CSV file
    csv_file_path = filedialog.askopenfilename(title="Select 'Tag Export' CSV file", filetypes=[("CSV files", "*.csv")])
    if not csv_file_path:
        print("No file selected.")
        return

    # Prompt for PLC Shortcut with default value
    plc_shortcut = simpledialog.askstring("Input", "Enter PLC Shortcut (STRING):", initialvalue="SCP", parent=root)
    if not plc_shortcut:
        print("PLC Shortcut not provided.")
        return

    # Prompt for Starting Trigger ID with default value
    starting_trigger_id = simpledialog.askinteger("Input", "Enter Starting Trigger ID (INT):", initialvalue=1, parent=root)
    if starting_trigger_id is None:
        print("Starting Trigger ID not provided.")
        return

    # Prompt for Starting Message ID with default value
    starting_message_id = simpledialog.askinteger("Input", "Enter Starting Message ID (INT):", initialvalue=1, parent=root)
    if starting_message_id is None:
        print("Starting Message ID not provided.")
        return
        
    # Prompt for XML file name with default value
    xml_file_path = filedialog.asksaveasfilename(
        title="Save XML file",
        defaultextension=".xml",
        filetypes=[("XML files", "*.xml")],
        initialfile="Alarms",
        initialdir=os.path.dirname(csv_file_path)
    )
    if not xml_file_path:
        print("XML file not saved.")
        return

    triggers = []
    messages = []
    trigger_number = starting_trigger_id
    message_number = starting_message_id

    with open(csv_file_path, 'r', newline='') as csv_file:
        csv_reader = csv.reader(csv_file)
        for _ in range(6):
            next(csv_reader)  # Skip first 6 lines
        headers = next(csv_reader)  # Line 7 is the headers

        for row in csv_reader:
            tag_type = row[headers.index('TYPE')]
            tag_name = row[headers.index('NAME')]
            data_type = row[headers.index('DATATYPE')]

            # Debug info
            print(f"Processing tag: Name='{tag_name}', DataType='{data_type}'")

            if tag_type == 'TAG':
                if data_type == 'All_DigitalAlm_ActiveResumeDelay':
                    trigger = ET.Element('trigger', id=f"T{trigger_number}", type="value",
                                         ack_all_value="0", use_ack_all="false", ack_tag="",
                                         exp=f"{{[{plc_shortcut}]{tag_name}.ALM_IND}}",
                                         message_tag="", message_handshake_exp="",
                                         message_notification_tag="", remote_ack_exp="",
                                         remote_ack_handshake_tag="", label=f"Label{trigger_number}",
                                         handshake_tag="")
                    triggers.append(trigger)
                    trigger_number += 1

                elif data_type == 'All_ScaleSignal_2SPFailAlm':
                    trigger = ET.Element('trigger', id=f"T{trigger_number}", type="value",
                                         ack_all_value="0", use_ack_all="false", ack_tag="",
                                         exp=f"{{[{plc_shortcut}]{tag_name}.ALM_Display}}",
                                         message_tag="", message_handshake_exp="",
                                         message_notification_tag="", remote_ack_exp="",
                                         remote_ack_handshake_tag="", label=f"Label{trigger_number}",
                                         handshake_tag="")
                    triggers.append(trigger)
                    trigger_number += 1

                elif data_type == 'Wtr_Motor_Alm':
                    trigger1 = ET.Element('trigger', id=f"T{trigger_number}", type="bit",
                                          ack_all_value="0", use_ack_all="false", ack_tag="",
                                          exp=f"{{[{plc_shortcut}]{tag_name}.Thermal_Seal_Status}}",
                                          message_tag="", message_handshake_exp="",
                                          message_notification_tag="", remote_ack_exp="",
                                          remote_ack_handshake_tag="", label=f"Label{trigger_number}",
                                          handshake_tag="")
                    triggers.append(trigger1)
                    trigger_number += 1

                    trigger2 = ET.Element('trigger', id=f"T{trigger_number}", type="value",
                                          ack_all_value="0", use_ack_all="false", ack_tag="",
                                          exp=f"{{[{plc_shortcut}]{tag_name}.Run_Fail_Status}}",
                                          message_tag="", message_handshake_exp="",
                                          message_notification_tag="", remote_ack_exp="",
                                          remote_ack_handshake_tag="", label=f"Label{trigger_number}",
                                          handshake_tag="")
                    triggers.append(trigger2)
                    trigger_number += 1

                elif data_type == 'More_Scale_Signal_For_Valve':
                    trigger = ET.Element('trigger', id=f"T{trigger_number}", type="value",
                                         ack_all_value="0", use_ack_all="false", ack_tag="",
                                         exp=f"{{[{plc_shortcut}]{tag_name}.Open_Close_Status}}",
                                         message_tag="", message_handshake_exp="",
                                         message_notification_tag="", remote_ack_exp="",
                                         remote_ack_handshake_tag="", label=f"Label{trigger_number}",
                                         handshake_tag="")
                    triggers.append(trigger)
                    trigger_number += 1

            # Handle messages
            if data_type == 'All_DigitalAlm_ActiveResumeDelay':
                message = ET.Element('message', id=f"M{message_number}", trigger_value="1",
                                     identifier=str(message_number), trigger=f"#T{trigger_number-1}",
                                     backcolor="#800000", forecolor="#FFFFFF", audio="false",
                                     display="true", print="false", message_to_tag="false",
                                     text=tag_name)
                messages.append(message)
                message_number += 1

            elif data_type == 'All_ScaleSignal_2SPFailAlm':
                for i in range(5):
                    message = ET.Element('message', id=f"M{message_number}", trigger_value=str(i+1),
                                         identifier=str(message_number), trigger=f"#T{trigger_number-1}",
                                         backcolor="#800000", forecolor="#FFFFFF", audio="false",
                                         display="true", print="false", message_to_tag="false",
                                         text=f"{tag_name}{'Low Float' if i == 0 else ('Low' if i == 1 else ('Signal Fail' if i == 2 else ('High' if i == 3 else 'High Float')))}")
                    messages.append(message)
                    message_number += 1

            elif data_type == 'Wtr_Motor_Alm':
                for i in range(5):
                    if i < 3:
                        message = ET.Element('message', id=f"M{message_number}", trigger_value=str(i+1),
                                             identifier=str(message_number), trigger=f"#T{trigger_number-2+i}",
                                             backcolor="#800000", forecolor="#FFFFFF", audio="false",
                                             display="true", print="false", message_to_tag="false",
                                             text=f"{tag_name}{'Thermal Fail' if i == 0 else ('Seal Fail' if i == 1 else ('Fail to Run' if i == 2 else ('RVSS Fault' if i == 3 else 'VFD Fault')))}")
                    else:
                        message = ET.Element('message', id=f"M{message_number}", trigger_value=str(i+1),
                                             identifier=str(message_number), trigger=f"#T{trigger_number-2+i}",
                                             backcolor="#800000", forecolor="#FFFFFF", audio="false",
                                             display="true", print="false", message_to_tag="false",
                                             text=f"{tag_name}{'VFD Fault'}")
                    messages.append(message)
                    message_number += 1

            elif data_type == 'More_Scale_Signal_For_Valve':
                for i in range(3):
                    message = ET.Element('message', id=f"M{message_number}", trigger_value=str(i+3),
                                         identifier=str(message_number), trigger=f"#T{trigger_number-1}",
                                         backcolor="#800000", forecolor="#FFFFFF", audio="false",
                                         display="true", print="false", message_to_tag="false",
                                         text=f"{tag_name}{'Position Fail' if i == 0 else ('Fail to Open' if i == 1 else 'Fail to Close')}")
                    messages.append(message)
                    message_number += 1

    # Debugging: Check contents of triggers and messages
    print(f"Total Triggers: {len(triggers)}")
    for t in triggers:
        print(f"Trigger: {ET.tostring(t, 'unicode')}")
    print(f"Total Messages: {len(messages)}")
    for m in messages:
        print(f"Message: {ET.tostring(m, 'unicode')}")

    # Create XML structure
    root_elem = ET.Element('root')

    # Append triggers
    triggers_elem = ET.SubElement(root_elem, 'triggers')
    for trigger in triggers:
        triggers_elem.append(trigger)

    # Append messages
    messages_elem = ET.SubElement(root_elem, 'messages')
    for message in messages:
        messages_elem.append(message)

    # Convert XML to string with proper formatting
    rough_string = ET.tostring(root_elem, 'utf-8')

    # Write the raw XML to a file
    raw_xml_file_path = xml_file_path.replace(".xml", "_RAW.xml")
    with open(raw_xml_file_path, 'wb') as raw_file:
        raw_file.write(rough_string)
    
    print(f"Raw XML file has been saved successfully at {raw_xml_file_path}")

    # Pretty print the XML
    try:
        reparsed = minidom.parseString(rough_string)
        pretty_xml = reparsed.toprettyxml(indent="  ")

        # Insert a blank line between <triggers> and <messages>
        lines = pretty_xml.split('\n')
        for i, line in enumerate(lines):
            if "<triggers>" in line:
                triggers_end = i
            if "<messages>" in line:
                messages_start = i
                break

        formatted_xml = "\n".join(lines[:triggers_end+1] + [""] + lines[messages_start:])
    except Exception as e:
        print(f"Error during pretty printing: {e}")
        formatted_xml = rough_string.decode('utf-8')

    # Write the pretty-printed XML to a file
    pretty_xml_file_path = xml_file_path.replace(".xml", "_PRETTY.xml")
    with open(pretty_xml_file_path, 'w', encoding='utf-8') as pretty_file:
        pretty_file.write(formatted_xml)
    
    print(f"Pretty XML file has been saved successfully at {pretty_xml_file_path}")

if __name__ == "__main__":
    process_csv_to_xml()
Test file and resultant XML files are attached.
(Jun-14-2024, 08:36 PM)marksy95 Wrote: [ -> ]I have code that successfully creates a desired XML file from a provided CSV file. However, I'm trying to format it to be easier to read. I want the list of "triggers" and list of "messages" to be separated by a blank line and each "trigger" and each "message" should have its own line. Trying to use minidom, but the "pretty" file loses the triggers for some reason.

Code:

import csv
import os
import tkinter as tk
from tkinter import filedialog, simpledialog
import xml.etree.ElementTree as ET
from xml.dom import minidom

def process_csv_to_xml():
    # Initialize tkinter
    root = tk.Tk()
    root.withdraw()  # Hide main window

    # Prompt for CSV file
    csv_file_path = filedialog.askopenfilename(title="Select 'Tag Export' CSV file", filetypes=[("CSV files", "*.csv")])
    if not csv_file_path:
        print("No file selected.")
        return

    # Prompt for PLC Shortcut with default value
    plc_shortcut = simpledialog.askstring("Input", "Enter PLC Shortcut (STRING):", initialvalue="SCP", parent=root)
    if not plc_shortcut:
        print("PLC Shortcut not provided.")
        return

    # Prompt for Starting Trigger ID with default value
    starting_trigger_id = simpledialog.askinteger("Input", "Enter Starting Trigger ID (INT):", initialvalue=1, parent=root)
    if starting_trigger_id is None:
        print("Starting Trigger ID not provided.")
        return

    # Prompt for Starting Message ID with default value
    starting_message_id = simpledialog.askinteger("Input", "Enter Starting Message ID (INT):", initialvalue=1, parent=root)
    if starting_message_id is None:
        print("Starting Message ID not provided.")
        return
        
    # Prompt for XML file name with default value
    xml_file_path = filedialog.asksaveasfilename(
        title="Save XML file",
        defaultextension=".xml",
        filetypes=[("XML files", "*.xml")],
        initialfile="Alarms",
        initialdir=os.path.dirname(csv_file_path)
    )
    if not xml_file_path:
        print("XML file not saved.")
        return

    triggers = []
    messages = []
    trigger_number = starting_trigger_id
    message_number = starting_message_id

    with open(csv_file_path, 'r', newline='') as csv_file:
        csv_reader = csv.reader(csv_file)
        for _ in range(6):
            next(csv_reader)  # Skip first 6 lines
        headers = next(csv_reader)  # Line 7 is the headers

        for row in csv_reader:
            tag_type = row[headers.index('TYPE')]
            tag_name = row[headers.index('NAME')]
            data_type = row[headers.index('DATATYPE')]

            # Debug info
            print(f"Processing tag: Name='{tag_name}', DataType='{data_type}'")

            if tag_type == 'TAG':
                if data_type == 'All_DigitalAlm_ActiveResumeDelay':
                    trigger = ET.Element('trigger', id=f"T{trigger_number}", type="value",
                                         ack_all_value="0", use_ack_all="false", ack_tag="",
                                         exp=f"{{[{plc_shortcut}]{tag_name}.ALM_IND}}",
                                         message_tag="", message_handshake_exp="",
                                         message_notification_tag="", remote_ack_exp="",
                                         remote_ack_handshake_tag="", label=f"Label{trigger_number}",
                                         handshake_tag="")
                    triggers.append(trigger)
                    trigger_number += 1

                elif data_type == 'All_ScaleSignal_2SPFailAlm':
                    trigger = ET.Element('trigger', id=f"T{trigger_number}", type="value",
                                         ack_all_value="0", use_ack_all="false", ack_tag="",
                                         exp=f"{{[{plc_shortcut}]{tag_name}.ALM_Display}}",
                                         message_tag="", message_handshake_exp="",
                                         message_notification_tag="", remote_ack_exp="",
                                         remote_ack_handshake_tag="", label=f"Label{trigger_number}",
                                         handshake_tag="")
                    triggers.append(trigger)
                    trigger_number += 1

                elif data_type == 'Wtr_Motor_Alm':
                    trigger1 = ET.Element('trigger', id=f"T{trigger_number}", type="bit",
                                          ack_all_value="0", use_ack_all="false", ack_tag="",
                                          exp=f"{{[{plc_shortcut}]{tag_name}.Thermal_Seal_Status}}",
                                          message_tag="", message_handshake_exp="",
                                          message_notification_tag="", remote_ack_exp="",
                                          remote_ack_handshake_tag="", label=f"Label{trigger_number}",
                                          handshake_tag="")
                    triggers.append(trigger1)
                    trigger_number += 1

                    trigger2 = ET.Element('trigger', id=f"T{trigger_number}", type="value",
                                          ack_all_value="0", use_ack_all="false", ack_tag="",
                                          exp=f"{{[{plc_shortcut}]{tag_name}.Run_Fail_Status}}",
                                          message_tag="", message_handshake_exp="",
                                          message_notification_tag="", remote_ack_exp="",
                                          remote_ack_handshake_tag="", label=f"Label{trigger_number}",
                                          handshake_tag="")
                    triggers.append(trigger2)
                    trigger_number += 1

                elif data_type == 'More_Scale_Signal_For_Valve':
                    trigger = ET.Element('trigger', id=f"T{trigger_number}", type="value",
                                         ack_all_value="0", use_ack_all="false", ack_tag="",
                                         exp=f"{{[{plc_shortcut}]{tag_name}.Open_Close_Status}}",
                                         message_tag="", message_handshake_exp="",
                                         message_notification_tag="", remote_ack_exp="",
                                         remote_ack_handshake_tag="", label=f"Label{trigger_number}",
                                         handshake_tag="")
                    triggers.append(trigger)
                    trigger_number += 1

            # Handle messages
            if data_type == 'All_DigitalAlm_ActiveResumeDelay':
                message = ET.Element('message', id=f"M{message_number}", trigger_value="1",
                                     identifier=str(message_number), trigger=f"#T{trigger_number-1}",
                                     backcolor="#800000", forecolor="#FFFFFF", audio="false",
                                     display="true", print="false", message_to_tag="false",
                                     text=tag_name)
                messages.append(message)
                message_number += 1

            elif data_type == 'All_ScaleSignal_2SPFailAlm':
                for i in range(5):
                    message = ET.Element('message', id=f"M{message_number}", trigger_value=str(i+1),
                                         identifier=str(message_number), trigger=f"#T{trigger_number-1}",
                                         backcolor="#800000", forecolor="#FFFFFF", audio="false",
                                         display="true", print="false", message_to_tag="false",
                                         text=f"{tag_name}{'Low Float' if i == 0 else ('Low' if i == 1 else ('Signal Fail' if i == 2 else ('High' if i == 3 else 'High Float')))}")
                    messages.append(message)
                    message_number += 1

            elif data_type == 'Wtr_Motor_Alm':
                for i in range(5):
                    if i < 3:
                        message = ET.Element('message', id=f"M{message_number}", trigger_value=str(i+1),
                                             identifier=str(message_number), trigger=f"#T{trigger_number-2+i}",
                                             backcolor="#800000", forecolor="#FFFFFF", audio="false",
                                             display="true", print="false", message_to_tag="false",
                                             text=f"{tag_name}{'Thermal Fail' if i == 0 else ('Seal Fail' if i == 1 else ('Fail to Run' if i == 2 else ('RVSS Fault' if i == 3 else 'VFD Fault')))}")
                    else:
                        message = ET.Element('message', id=f"M{message_number}", trigger_value=str(i+1),
                                             identifier=str(message_number), trigger=f"#T{trigger_number-2+i}",
                                             backcolor="#800000", forecolor="#FFFFFF", audio="false",
                                             display="true", print="false", message_to_tag="false",
                                             text=f"{tag_name}{'VFD Fault'}")
                    messages.append(message)
                    message_number += 1

            elif data_type == 'More_Scale_Signal_For_Valve':
                for i in range(3):
                    message = ET.Element('message', id=f"M{message_number}", trigger_value=str(i+3),
                                         identifier=str(message_number), trigger=f"#T{trigger_number-1}",
                                         backcolor="#800000", forecolor="#FFFFFF", audio="false",
                                         display="true", print="false", message_to_tag="false",
                                         text=f"{tag_name}{'Position Fail' if i == 0 else ('Fail to Open' if i == 1 else 'Fail to Close')}")
                    messages.append(message)
                    message_number += 1

    # Debugging: Check contents of triggers and messages
    print(f"Total Triggers: {len(triggers)}")
    for t in triggers:
        print(f"Trigger: {ET.tostring(t, 'unicode')}")
    print(f"Total Messages: {len(messages)}")
    for m in messages:
        print(f"Message: {ET.tostring(m, 'unicode')}")

    # Create XML structure
    root_elem = ET.Element('root')

    # Append triggers
    triggers_elem = ET.SubElement(root_elem, 'triggers')
    for trigger in triggers:
        triggers_elem.append(trigger)

    # Append messages
    messages_elem = ET.SubElement(root_elem, 'messages')
    for message in messages:
        messages_elem.append(message)

    # Convert XML to string with proper formatting
    rough_string = ET.tostring(root_elem, 'utf-8')

    # Write the raw XML to a file
    raw_xml_file_path = xml_file_path.replace(".xml", "_RAW.xml")
    with open(raw_xml_file_path, 'wb') as raw_file:
        raw_file.write(rough_string)
    
    print(f"Raw XML file has been saved successfully at {raw_xml_file_path}")

    # Pretty print the XML
    try:
        reparsed = minidom.parseString(rough_string)
        pretty_xml = reparsed.toprettyxml(indent="  ")

        # Insert a blank line between <triggers> and <messages>
        lines = pretty_xml.split('\n')
        for i, line in enumerate(lines):
            if "<triggers>" in line:
                triggers_end = i
            if "<messages>" in line:
                messages_start = i
                break

        formatted_xml = "\n".join(lines[:triggers_end+1] + [""] + lines[messages_start:])
    except Exception as e:
        print(f"Error during pretty printing: {e}")
        formatted_xml = rough_string.decode('utf-8')

    # Write the pretty-printed XML to a file
    pretty_xml_file_path = xml_file_path.replace(".xml", "_PRETTY.xml")
    with open(pretty_xml_file_path, 'w', encoding='utf-8') as pretty_file:
        pretty_file.write(formatted_xml)
    
    print(f"Pretty XML file has been saved successfully at {pretty_xml_file_path}")

if __name__ == "__main__":
    process_csv_to_xml()
Test file and resultant XML files are attached.

Seems like the only real issue is trying to join them back together with the newline.
you can use pandas to both read a csv file, and export to xml with just a few lines of code.
  1. To read csv file see:
  2. To output as xml see
As for finding files in a directory, use pathlib.
To get a list of csv files in any directory:
from pathlib import Path

mycsvdir = Path("./data/csv")
csvfilelist = [filename for filename in mydir.iterdir() if filename.is_file and filename.suffix == '.csv']