Jul-12-2023, 04:19 PM
Hi,
I am new here, so please bear with me as I am beginner.
There is a code below, in which there is a function called compute_percentages.
I would like to ask why perc[-2] for example or perc[-1] is working perfectly inside that function, but when I have done:
I would be very grateful for ideas, thank you.
I am new here, so please bear with me as I am beginner.
There is a code below, in which there is a function called compute_percentages.
I would like to ask why perc[-2] for example or perc[-1] is working perfectly inside that function, but when I have done:
perc = (source["value"] / source["value"].sum()) * 100this:
perc[-2]throws an error about: " ValueError: -2 is not in range".
I would be very grateful for ideas, thank you.
import pandas as pd import altair as alt source = pd.DataFrame([ { "question": "Question 1", "type": "Strongly disagree", "value": 24, }, { "question": "Question 1", "type": "Disagree", "value": 294, }, { "question": "Question 1", "type": "Neither agree nor disagree", "value": 594, }, { "question": "Question 1", "type": "Agree", "value": 1927, }, { "question": "Question 1", "type": "Strongly agree", "value": 376, }, { "question": "Question 2", "type": "Strongly disagree", "value": 2, }, { "question": "Question 2", "type": "Disagree", "value": 2, }, { "question": "Question 2", "type": "Neither agree nor disagree", "value": 0, }, { "question": "Question 2", "type": "Agree", "value": 7, }, { "question": "Question 2", "type": "Strongly agree", "value": 11, }, { "question": "Question 3", "type": "Strongly disagree", "value": 2, }, { "question": "Question 3", "type": "Disagree", "value": 0, }, { "question": "Question 3", "type": "Neither agree nor disagree", "value": 2, }, { "question": "Question 3", "type": "Agree", "value": 4, }, { "question": "Question 3", "type": "Strongly agree", "value": 2, }, { "question": "Question 4", "type": "Strongly disagree", "value": 0, }, { "question": "Question 4", "type": "Disagree", "value": 2, }, { "question": "Question 4", "type": "Neither agree nor disagree", "value": 1, }, { "question": "Question 4", "type": "Agree", "value": 7, }, { "question": "Question 4", "type": "Strongly agree", "value": 6, }, { "question": "Question 5", "type": "Strongly disagree", "value": 0, }, { "question": "Question 5", "type": "Disagree", "value": 1, }, { "question": "Question 5", "type": "Neither agree nor disagree", "value": 3, }, { "question": "Question 5", "type": "Agree", "value": 16, }, { "question": "Question 5", "type": "Strongly agree", "value": 4, }, { "question": "Question 6", "type": "Strongly disagree", "value": 1, }, { "question": "Question 6", "type": "Disagree", "value": 1, }, { "question": "Question 6", "type": "Neither agree nor disagree", "value": 2, }, { "question": "Question 6", "type": "Agree", "value": 9, }, { "question": "Question 6", "type": "Strongly agree", "value": 3, }, { "question": "Question 7", "type": "Strongly disagree", "value": 0, }, { "question": "Question 7", "type": "Disagree", "value": 0, }, { "question": "Question 7", "type": "Neither agree nor disagree", "value": 1, }, { "question": "Question 7", "type": "Agree", "value": 4, }, { "question": "Question 7", "type": "Strongly agree", "value": 0, }, { "question": "Question 8", "type": "Strongly disagree", "value": 0, }, { "question": "Question 8", "type": "Disagree", "value": 0, }, { "question": "Question 8", "type": "Neither agree nor disagree", "value": 0, }, { "question": "Question 8", "type": "Agree", "value": 0, }, { "question": "Question 8", "type": "Strongly agree", "value": 2, } ]) # Add type_code that we can sort by source["type_code"] = source.type.map({ "Strongly disagree": -2, "Disagree": -1, "Neither agree nor disagree": 0, "Agree": 1, "Strongly agree": 2 }) source def compute_percentages(df): # Set type_code as index and sort df = df.set_index("type_code").sort_index() # Compute percentage of value with question group perc = (df["value"] / df["value"].sum()) * 100 df["percentage"] = perc # Compute percentage end, centered on "Neither agree nor disagree" (type_code 0) df["percentage_end"] = perc.cumsum() - (perc[-2] + perc[-1] + perc[0] / 2) # Compute percentage start by subtracting percent df["percentage_start"] = df["percentage_end"] - perc return df source = ( source .groupby("question", group_keys=True) .apply(compute_percentages) .reset_index(drop=True) )