Python Forum
Convert MultiLayer XML to DataFrame using Pandas - Printable Version

+- Python Forum (https://python-forum.io)
+-- Forum: Python Coding (https://python-forum.io/forum-7.html)
+--- Forum: General Coding Help (https://python-forum.io/forum-8.html)
+--- Thread: Convert MultiLayer XML to DataFrame using Pandas (/thread-33309.html)



Convert MultiLayer XML to DataFrame using Pandas - vsingh17 - Apr-14-2021

Hi Team,

Need your help, i'm noob in Python and trying to learn.
i have a Multilayer XML file and wanna convert into Dataframe using pandas.
example available online are very basic XML to Dataframe.
XML is big but i have got the extract required and is as below.

<?xml version="1.0" encoding="UTF-16" standalone="no" ?>
<QryPlanXML xmlns="http://schemas.teradata.com/queryplan" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schemas.teradata.com/queryplan http://schemas.teradata.com/queryplan/queryplan.xsd">

<Query CacheFlag="False" CollectTimeStamp="2021-04-08T07:40:23" QueryID="307177244358366240">
<ObjectDefs>
<Database DatabaseName="ABC_123" Id="DBABC_123">
<Relation Cardinality="2248788" Confidence="High" DatabaseId="DBABC_123" Id="REL1" Partitioned="false" RelationKind="Permanent" TableName="Transform" Version="13">
<Field CharType="1" DataLength="30" FieldID="1025" FieldName="Process_Name" FieldType="CV" Id="REL1_FLD1025" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL1" ValueAccessFrequency="0"/>
<Field DataLength="4" FieldID="1026" FieldName="Business_Date" FieldType="DA" Id="REL1_FLD1026" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL1" ValueAccessFrequency="0"/>
<Field DataLength="4" FieldID="1027" FieldName="Business_Date_Cycle_Num" FieldType="I" Id="REL1_FLD1027" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL1" ValueAccessFrequency="0"/>
<Field FieldID="1030" FieldName="PROCESS_ID" Id="REL1_FLD1030" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL1" ValueAccessFrequency="0"/>
<Index Id="REL1_IDX1" IndexNum="1" IndexType="Nonpartitioned Primary" OrderBy="false" QCFIndexFlag="false" RelationId="REL1" UniqueFlag="false">
<FieldRef Ref="REL1_FLD1025"/>
<FieldRef Ref="REL1_FLD1026"/>
<FieldRef Ref="REL1_FLD1027"/>
</Index>
</Relation>
<Relation Cardinality="28558918" Confidence="High" DatabaseId="DBABC_123" Id="REL2" Partitioned="false" RelationKind="Permanent" TableName="Process_Id_Log" Version="10">
<Field CharType="1" DataLength="30" FieldID="1025" FieldName="Process_Name" FieldType="CV" Id="REL2_FLD1025" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL2" ValueAccessFrequency="0"/>
<Field FieldID="1026" FieldName="PROCESS_ID" Id="REL2_FLD1026" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL2" ValueAccessFrequency="0"/>
<Field FieldID="1028" FieldName="STREAM_KEY" Id="REL2_FLD1028" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL2" ValueAccessFrequency="0"/>
<Index Id="REL2_IDX1" IndexNum="1" IndexType="Nonpartitioned Primary" OrderBy="false" QCFIndexFlag="false" RelationId="REL2" UniqueFlag="false">
<FieldRef Ref="REL2_FLD1025"/>
</Index>
</Relation>
<Relation Cardinality="215" Confidence="High" DatabaseId="DBABC_123" Id="REL4" Partitioned="false" RelationKind="Permanent" TableName="Stream" Version="7">
<Field DataLength="2" FieldID="1025" FieldName="Stream_Key" FieldType="I2" Id="REL4_FLD1025" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL4" ValueAccessFrequency="0"/>
<Index Id="REL4_IDX1" IndexNum="1" IndexType="Nonpartitioned Primary" OrderBy="false" QCFIndexFlag="false" RelationId="REL4" UniqueFlag="true">
<FieldRef Ref="REL4_FLD1025"/>
</Index>
</Relation>
</Database>
<Database DatabaseName="ABC_456" Id="DBABC_123">
<Relation Cardinality="185" Confidence="High" DatabaseId="ABC_456" Id="REL3" Partitioned="false" RelationKind="Permanent" TableName="Domain" Version="1">
<Field DataLength="2" FieldID="1025" FieldName="Key_Set_Id" FieldType="I2" Id="REL3_FLD1025" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL3" ValueAccessFrequency="0"/>
<Field DataLength="2" FieldID="1026" FieldName="Domain_Id" FieldType="I2" Id="REL3_FLD1026" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL3" ValueAccessFrequency="0"/>
<Index Id="REL3_IDX1" IndexNum="1" IndexType="Nonpartitioned Primary" OrderBy="false" QCFIndexFlag="false" RelationId="REL3" UniqueFlag="true">
<FieldRef Ref="REL3_FLD1025"/>
<FieldRef Ref="REL3_FLD1026"/>
</Index>
</Relation>
</Database>
</ObjectDefs>
</Query>
</QryPlanXML>

Requried Output:
DatabaseName TableName FieldName Id
ABC_123 Transform Process_Name REL1_FLD1025
ABC_123 Transform Business_Date REL1_FLD1026
ABC_123 Transform Business_Date_Cycle_Num REL1_FLD1027
ABC_123 Transform PROCESS_ID REL1_FLD1030
ABC_123 Process_Id_Log Process_Name REL2_FLD1025
ABC_123 Process_Id_Log PROCESS_ID REL2_FLD1026
ABC_123 Process_Id_Log STREAM_KEY REL2_FLD1028
ABC_456 Domain Key_Set_Id REL3_FLD1025
ABC_456 Domain Domain_Id REL3_FLD1026

need your help..! trying since 4-5 days but not getting the correct out.

Thanks,
Vijay Singh