Convert MultiLayer XML to DataFrame using Pandas - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: Python Coding (https://python-forum.io/forum-7.html) +--- Forum: General Coding Help (https://python-forum.io/forum-8.html) +--- Thread: Convert MultiLayer XML to DataFrame using Pandas (/thread-33309.html) |
Convert MultiLayer XML to DataFrame using Pandas - vsingh17 - Apr-14-2021 Hi Team, Need your help, i'm noob in Python and trying to learn. i have a Multilayer XML file and wanna convert into Dataframe using pandas. example available online are very basic XML to Dataframe. XML is big but i have got the extract required and is as below. <?xml version="1.0" encoding="UTF-16" standalone="no" ?> <QryPlanXML xmlns="http://schemas.teradata.com/queryplan" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schemas.teradata.com/queryplan http://schemas.teradata.com/queryplan/queryplan.xsd"> <Query CacheFlag="False" CollectTimeStamp="2021-04-08T07:40:23" QueryID="307177244358366240"> <ObjectDefs> <Database DatabaseName="ABC_123" Id="DBABC_123"> <Relation Cardinality="2248788" Confidence="High" DatabaseId="DBABC_123" Id="REL1" Partitioned="false" RelationKind="Permanent" TableName="Transform" Version="13"> <Field CharType="1" DataLength="30" FieldID="1025" FieldName="Process_Name" FieldType="CV" Id="REL1_FLD1025" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL1" ValueAccessFrequency="0"/> <Field DataLength="4" FieldID="1026" FieldName="Business_Date" FieldType="DA" Id="REL1_FLD1026" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL1" ValueAccessFrequency="0"/> <Field DataLength="4" FieldID="1027" FieldName="Business_Date_Cycle_Num" FieldType="I" Id="REL1_FLD1027" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL1" ValueAccessFrequency="0"/> <Field FieldID="1030" FieldName="PROCESS_ID" Id="REL1_FLD1030" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL1" ValueAccessFrequency="0"/> <Index Id="REL1_IDX1" IndexNum="1" IndexType="Nonpartitioned Primary" OrderBy="false" QCFIndexFlag="false" RelationId="REL1" UniqueFlag="false"> <FieldRef Ref="REL1_FLD1025"/> <FieldRef Ref="REL1_FLD1026"/> <FieldRef Ref="REL1_FLD1027"/> </Index> </Relation> <Relation Cardinality="28558918" Confidence="High" DatabaseId="DBABC_123" Id="REL2" Partitioned="false" RelationKind="Permanent" TableName="Process_Id_Log" Version="10"> <Field CharType="1" DataLength="30" FieldID="1025" FieldName="Process_Name" FieldType="CV" Id="REL2_FLD1025" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL2" ValueAccessFrequency="0"/> <Field FieldID="1026" FieldName="PROCESS_ID" Id="REL2_FLD1026" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL2" ValueAccessFrequency="0"/> <Field FieldID="1028" FieldName="STREAM_KEY" Id="REL2_FLD1028" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL2" ValueAccessFrequency="0"/> <Index Id="REL2_IDX1" IndexNum="1" IndexType="Nonpartitioned Primary" OrderBy="false" QCFIndexFlag="false" RelationId="REL2" UniqueFlag="false"> <FieldRef Ref="REL2_FLD1025"/> </Index> </Relation> <Relation Cardinality="215" Confidence="High" DatabaseId="DBABC_123" Id="REL4" Partitioned="false" RelationKind="Permanent" TableName="Stream" Version="7"> <Field DataLength="2" FieldID="1025" FieldName="Stream_Key" FieldType="I2" Id="REL4_FLD1025" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL4" ValueAccessFrequency="0"/> <Index Id="REL4_IDX1" IndexNum="1" IndexType="Nonpartitioned Primary" OrderBy="false" QCFIndexFlag="false" RelationId="REL4" UniqueFlag="true"> <FieldRef Ref="REL4_FLD1025"/> </Index> </Relation> </Database> <Database DatabaseName="ABC_456" Id="DBABC_123"> <Relation Cardinality="185" Confidence="High" DatabaseId="ABC_456" Id="REL3" Partitioned="false" RelationKind="Permanent" TableName="Domain" Version="1"> <Field DataLength="2" FieldID="1025" FieldName="Key_Set_Id" FieldType="I2" Id="REL3_FLD1025" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL3" ValueAccessFrequency="0"/> <Field DataLength="2" FieldID="1026" FieldName="Domain_Id" FieldType="I2" Id="REL3_FLD1026" JoinAccessFrequency="0" RangeAccessFrequency="0" RelationId="REL3" ValueAccessFrequency="0"/> <Index Id="REL3_IDX1" IndexNum="1" IndexType="Nonpartitioned Primary" OrderBy="false" QCFIndexFlag="false" RelationId="REL3" UniqueFlag="true"> <FieldRef Ref="REL3_FLD1025"/> <FieldRef Ref="REL3_FLD1026"/> </Index> </Relation> </Database> </ObjectDefs> </Query> </QryPlanXML> Requried Output: DatabaseName TableName FieldName Id ABC_123 Transform Process_Name REL1_FLD1025 ABC_123 Transform Business_Date REL1_FLD1026 ABC_123 Transform Business_Date_Cycle_Num REL1_FLD1027 ABC_123 Transform PROCESS_ID REL1_FLD1030 ABC_123 Process_Id_Log Process_Name REL2_FLD1025 ABC_123 Process_Id_Log PROCESS_ID REL2_FLD1026 ABC_123 Process_Id_Log STREAM_KEY REL2_FLD1028 ABC_456 Domain Key_Set_Id REL3_FLD1025 ABC_456 Domain Domain_Id REL3_FLD1026 need your help..! trying since 4-5 days but not getting the correct out. Thanks, Vijay Singh |