Nov-07-2023, 10:56 PM
Updated code to account for potential gaps:
import pandas as pd import numpy as np def extract_value(l, name): extracted = [item['value'] for item in l if item['name'] == name] if len(extracted) == 0: return np.nan else: return extracted[0] data = {'name': ['Alice', 'Bob', 'Clark'], 'preferences': [[{'name': 'fruit', 'value': 'apple'}, {'name': 'drink', 'value': 'lemonade'}, {'name': 'food', 'value': 'pizza'}], [{'name': 'fruit', 'value': 'orange'}, {'name': 'drink', 'value': 'soda'}, {'name': 'food', 'value': 'soup'}], [{'name': 'fruit', 'value': 'pear'}, {'name': 'food', 'value': 'chicken'}]]} df = pd.DataFrame(data) # Extract values from 'preferences' column df['fruit'] = df['preferences'].apply(lambda x: extract_value(x, 'fruit')) df['drink'] = df['preferences'].apply(lambda x: extract_value(x, 'drink')) df['food'] = df['preferences'].apply(lambda x: extract_value(x, 'food')) # Drop the 'preferences' column df = df.drop(columns=['preferences'])