Python Forum

Full Version: Splitting values in column in a pandas dataframe based on a condition
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
Hi All,

I have been trying to split data separated by a comma in the 'name' column into two columns, however not all of the rows in this column have commas.

Initially I tried to use the below but I think it tries to split every column even ones without colums and so it fails:

I think I need a conditional statement that can apply the splitting to just the cells in the names coloumn with commas in, but I couldn't work out how to do this.

Any help would be much appreciated Wall Big Grin Wall

df1['new name'], df1['category'] = zip(*df1['name'].map(lambda x: x.split(',')))
Here is the full query that I want to apply the above to:

import requests
from lxml import etree
toDate = "2018-04-25"
fromDate = "2018-04-25"
dateType = "gasday"

def getXML():
    url="http://marketinformation.natgrid.co.uk/MIPIws-public/public/publicwebservice.asmx"
    headers = {'content-type': 'application/soap+xml; charset=utf-8'}
    body ="""<soap12:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap12="http://www.w3.org/2003/05/soap-envelope">
        <soap12:Body>
            <GetPublicationDataWM xmlns="http://www.NationalGrid.com/MIPI/">
                <reqObject>
                    <LatestFlag>Y</LatestFlag>
                    <ApplicableForFlag>Y</ApplicableForFlag>
                    <ToDate>%s</ToDate>
                    <FromDate>%s</FromDate>
                    <DateType>%s</DateType>
                    <PublicationObjectNameList>
                    <string>Nominations, Aggregate Imbalance</string>
                    <string>Nominations, Aggregate Prevailing Nomination, Boiloff to LDZ Total</string>
                    <string>Nominations, Aggregate Prevailing Nomination, Interconnector Entry Total</string>
                    <string>Nominations, Aggregate Prevailing Nomination, Interconnector Exit Total</string>
                    <string>Nominations, Aggregate Prevailing Nomination, LDZ Direct Total</string>
                    <string>Nominations, Aggregate Prevailing Nomination, LNG Importation Total</string>
                    <string>Nominations, Aggregate Prevailing Nomination, NBP Trade Entry Total</string>
                    <string>Nominations, Aggregate Prevailing Nomination, NBP Trade Exit Total</string>
                    <string>Nominations, Aggregate Prevailing Nomination, Non Daily Meters Total</string>
                    <string>Nominations, Aggregate Prevailing Nomination, NTS Industrial Total</string>
                    <string>Nominations, Aggregate Prevailing Nomination, NTS Powerstation Total</string>
                    <string>Nominations, Aggregate Prevailing Nomination, Onshore field Total</string>
                    <string>Nominations, Aggregate Prevailing Nomination, Shrinkage (LDZ ) Total</string>
                    <string>Nominations, Aggregate Prevailing Nomination, Storage Entry Total</string>
                    <string>Nominations, Aggregate Prevailing Nomination, Storage Exit Total</string>
                    <string>Nominations, Aggregate Prevailing Nomination, Subterminals Total</string>
                    <string>Nominations, Aggregate Prevailing Nomination, Total Aggregate System Entry</string>
                    <string>Nominations, Aggregate Prevailing Nomination, Total Aggregate System Exit</string>
                    <string>Nominations, Prevailing Nomination, Aldbrough, Storage Entry</string>
                    <string>Nominations, Prevailing Nomination, Aldbrough, Storage Exit</string>
                    <string>Nominations, Prevailing Nomination, Avonmouth, Storage Entry</string>
                    <string>Nominations, Prevailing Nomination, Avonmouth, Storage Exit</string>
                    <string>Nominations, Prevailing Nomination, AvonmouthBL, Storage Boiloff</string>
                    <string>Nominations, Prevailing Nomination, Bacton, Interconnector Entry</string>
                    <string>Nominations, Prevailing Nomination, Bacton, Interconnector Exit</string>
                    <string>Nominations, Prevailing Nomination, Bacton-BBL, Interconnector Entry</string>
                    <string>Nominations, Prevailing Nomination, Bacton-BBL, Interconnector Exit</string>
                    <string>Nominations, Prevailing Nomination, Bacton-Perenco, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, Bacton-Seal, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, Bacton-Shell, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, Bacton-Tullow, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, Barrow, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, BurtonPoint, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, Dragon, LNG Importation</string>
                    <string>Nominations, Prevailing Nomination, DynevorArms, Storage Entry</string>
                    <string>Nominations, Prevailing Nomination, DynevorArms, Storage Exit</string>
                    <string>Nominations, Prevailing Nomination, DynevorArmsBN, Storage Boiloff</string>
                    <string>Nominations, Prevailing Nomination, Easington-Amethyst, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, Easington-Dimlington, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, Easington-Langeled, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, Easington-WestSole, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, Easington-York, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, Glenmavis, Storage Entry</string>
                    <string>Nominations, Prevailing Nomination, Glenmavis, Storage Exit</string>
                    <string>Nominations, Prevailing Nomination, GlenmavisBL, Storage Boiloff</string>
                    <string>Nominations, Prevailing Nomination, GrainNTS1, LNG Importation</string>
                    <string>Nominations, Prevailing Nomination, GrainNTS2, LNG Importation</string>
                    <string>Nominations, Prevailing Nomination, Hatfield Moor, Storage Exit</string>
                    <string>Nominations, Prevailing Nomination, Hatfield, OnshoreField</string>
                    <string>Nominations, Prevailing Nomination, HatfieldMoor, Storage Entry</string>
                    <string>Nominations, Prevailing Nomination, Hilltop, Storage Entry</string>
                    <string>Nominations, Prevailing Nomination, Hilltop, Storage Exit</string>
                    <string>Nominations, Prevailing Nomination, HoleHouseFarm, Storage Entry</string>
                    <string>Nominations, Prevailing Nomination, HoleHousFm, Storage Exit</string>
                    <string>Nominations, Prevailing Nomination, Holford, OnshoreField</string>
                    <string>Nominations, Prevailing Nomination, Holford, Storage Entry</string>
                    <string>Nominations, Prevailing Nomination, Holford, Storage Exit</string>
                    <string>Nominations, Prevailing Nomination, Hornsea, Storage Entry</string>
                    <string>Nominations, Prevailing Nomination, Hornsea, Storage Exit</string>
                    <string>Nominations, Prevailing Nomination, HumblyGrov, Storage Entry</string>
                    <string>Nominations, Prevailing Nomination, HumblyGrov, Storage Exit</string>
                    <string>Nominations, Prevailing Nomination, IsleOfGrain, Storage Entry</string>
                    <string>Nominations, Prevailing Nomination, IsleOfGrainBL, Storage Boiloff</string>
                    <string>Nominations, Prevailing Nomination, Moffat, Interconnector Entry</string>
                    <string>Nominations, Prevailing Nomination, Moffat, Interconnector Exit</string>
                    <string>Nominations, Prevailing Nomination, Partington, Storage Entry</string>
                    <string>Nominations, Prevailing Nomination, Partington, Storage Exit</string>
                    <string>Nominations, Prevailing Nomination, PartingtonBL, Storage Boiloff</string>
                    <string>Nominations, Prevailing Nomination, Rough, Storage Exit</string>
                    <string>Nominations, Prevailing Nomination, RoughStor, Storage Entry</string>
                    <string>Nominations, Prevailing Nomination, SouthHook, LNG Importation</string>
                    <string>Nominations, Prevailing Nomination, STFergus-Mobil, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, STFergus-Shell, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, STFergus-NSMP, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, Stublach, Storage Entry</string>
                    <string>Nominations, Prevailing Nomination, Stublach, Storage Exit</string>
                    <string>Nominations, Prevailing Nomination, Teesside-BP, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, Teesside-PX, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, Theddlethorpe, Sub Terminal</string>
                    <string>Nominations, Prevailing Nomination, Wytchfarm, OnshoreField</string>
                    </PublicationObjectNameList>
                </reqObject>
            </GetPublicationDataWM>
        </soap12:Body>
    </soap12:Envelope>""" % (toDate, fromDate,dateType)

    response = requests.post(url,data=body,headers=headers)
    return response.content
root = etree.fromstring(getXML())
# map prefix 'd' to the default namespace URI
ns = { 'd': 'http://www.NationalGrid.com/MIPI/'}
 

import pandas as pd
df1 = pd.DataFrame(columns=("applicable_at","applicable_for","name","value","quality_indicator","substituted","created_date"))   
for pd_date in pd.date_range(fromDate, periods=1):
    day = pd_date.strftime('%Y-%m-%d')
     
    root = etree.fromstring(getXML())
     
    #map prefix 'd' to the default namespace URI
    ns = {'d': 'http://www.NationalGrid.com/MIPI/'}
     
    publication_objects = root.xpath('//d:CLSMIPIPublicationObjectBE', namespaces=ns)
     
    for obj in publication_objects:
        name = obj.find('d:PublicationObjectName', ns).text
     
        for data in obj.findall('d:PublicationObjectData/d:CLSPublicationObjectDataBE', ns):
            applicable_at = pd.to_datetime(data.find('d:ApplicableAt', ns).text)    
            applicable_for = pd.to_datetime(data.find('d:ApplicableFor', ns).text)
            value = float(data.find('d:Value', ns).text)
            quality_indicator = data.find('d:Value', ns).text
            substituted = data.find('d:Substituted', ns).text
            created_date = pd.to_datetime(data.find('d:CreatedDate', ns).text)
        
         
            df1.loc[len(df1) +1] = [applicable_at, applicable_for,name, value, quality_indicator, substituted, created_date]
                      
            df1['name']= df1.name.str.replace('Nominations, Prevailing Nomination,', '')
            df1['name']= df1.name.str.replace('Nominations, Aggregate Prevailing Nomination,', '')
            df1['name']= df1.name.str.replace('Nominations,', '')
           # df1['new name'], df1['category'] = zip(*df1['name'].map(lambda x: x.split(',')))
          
any help would be much appreciated :)