Python Forum

Full Version: IndexError: index out of bounds
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
Hi all, first post to the forum!

I'm getting an index error(shown after script) when running the following code, I would be very grateful if someone could explain why.

import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }
months = ['jan', 'feb', 'mar', 'apr',
        'may', 'jun', 'jul',
        'aug', 'sep', 'oct', 'nov', 'dec']
days = ['mon', 'tues', 'wed', 'thur', 'fri',
        'sat', 'sun']

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.
    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')

#Get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    x = ''
    for item in CITY_DATA:
        if x == '':
            x += item.title()
        else:
            x += ', ' + item.title()

    city = input('Please enter one of the following cities: {}\n'.format(x)).lower()
    while city not in CITY_DATA:
        city = input('Invalid input. ' +
                    'please enter one of the following cities:\n{}\n'.format(x)).lower()

#Get user input for month (all, january, february, ... , june)
    x = ''
    for item in months:
        if x == '':
            x += item
        else:
            x += ', ' + item.title()

    month = input('Please enter one of the following to filter by month:\n{}\n'.format(x)).lower()
    while month not in months and month != 'all':
        month = input("\nInvalid selection. " +
                      "Please enter one of the following options to filter by month. " +
                      "\n(All, {})\n".format(x)).lower()

#Get user input for day of week (all, monday, tuesday, ... sunday)
    x = ''
    for item in days:
        if x == '':
            x += item.title()
        else:
            x += ', ' + item.title()

    day = input('Please enter one of the following to filter by day:\nAll, {}\n'.format(x)).lower()
    while day not in days and day != 'all':
        day = input("\nInvalid selection. " +
                    "Please enter one of the following options to filter by day." +
                    "\n(All, {})\n".format(x)).lower()

    print('-'*40)
    return city, month, day

def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.
    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """

    df = pd.read_csv(CITY_DATA.get(city))

    if 'Start Time' in df.columns:
        df['Start Time'] = pd.to_datetime(df['Start Time'])
        df['month'] = df['Start Time'].dt.month
        df['day_of_week'] = df['Start Time'].dt.weekday_name
        if month != 'all':
            df = df[df['month'] == (months.index(month) + 1) ]

        if day != 'all':
            df = df[df['day_of_week'] == day.title()]

    if 'End Time' in df.columns:
        df['End Time'] = pd.to_datetime(df['End Time'])

    print('\nFILTER SECTION:\nCity: {}\nMonth: {}\n Day: {}'.format(city, month, day))

    return df

def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month
    print('The most common month is: ', months[df['month'].mode()[0] - 1].title())

    # TO DO: display the most common day of week
    print('The most common day of the week is: ', df['day_of_week'].mode()[0])

    # TO DO: display the most common start hour
    df['start_hour'] = df['Start Time'].dt.hour
    print('Most common start hour: ', df['start_hour'].mode()[0])

    print('\n This took %s seconds.' % (time.time() - start_time))
    print('-'*40)

def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCALCULATING THE MOST POPULAR STATION AND TRIP...\n')
    start_time = time.time()

#Display most commonly used start station
    df2 = df[df['start_hour'] == df['start_hour'].mode()[0]]
    print('The most commonly used start station: ', df2['Start Station'].mode()[0])

#Display most commonly used end station
    print('The most commonly used end station: ', df['End Station'].mode()[0])

#Display most frequent combination of start station and end station trip
    x = str(df.groupby(['Start Station', 'End Station']).size().sort_values(ascending=False).head(1).index[0])
    x = x.replace('(', '').replace(')', '').replace("'", '')
    print('Most frequent combination of start and end station: ',x.strip())

    print('\n This took %s seconds.' % (time.time() - start_time))
    print('-'*40)

def seconds_to_datestamp(seconds):
    """Format seconds to days, hours, minutes, seconds"""

    seconds_in_day = 86400
    seconds_in_hour = 3600
    seconds_in_minuet = 60

    day = seconds // seconds_in_day
    seconds = seconds - (days * seconds_in_day)

    hours = seconds // seconds_in_hour
    seconds = seconds - (minuets * seconds_in_hour)

    minuets = seconds // seconds_in_minuet
    seconds = secinds - (minuets * seconds_in_minuet)
    return'{0:.0f} days, {1:.0f} hours, {2:.0f} minuets, {3:0f} seconds'.format(days, hours, minuets, seconds)

def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCALCULATING TRIP DURATION...\n')
    start_time = time.time()

#Display total travel time
    print('Total Travel Time: ', seconds_to_datestamp(df['Trip Duration'].sum()))

#Display mean travel time
    print('Mean Travel Time: ', seconds_to_datestamp(df['Trip Duration'].mean()))

    print('\nThis took %s seconds.' % (time.time() - start_time))
    print('-'*40)

def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('CALCULATING USER STATS...\n')
    start_time = time.time()

#Display counts of user types
    if 'User Type' in df.columns:
        x = '\n' + df['User Type'].value_counts().to_string()
    else:
        x = 'N/A'
    print('Count of user types: {}'.format(x))

#Display counts of gender
    if 'Gender' in df.columns:
        x = '\n' + df['Gender'].value_counts().to_string()
    else:
        x = 'N/A'
    print('\nCount of Genders: {}'.format(x))

#Display earliest, most recent, and most common year of birth
    if 'Birth Year' in df.columns:
        x = int(df['Birth Year'].min())
    else:
        x = 'N/A'
    print('\nEarliest year of birth: {}'.format(x))

    if 'Birth Year' in df.columns:
        x = int(df['Birth Year'].max())
    else:
        x = 'N/A'
    print('\n Most recent birth year: {}'.format(x))

    if 'Birth Year' in df.columns:
        x = int(df['Birth Year'].mode()[0])
    else:
        x = 'N/A'
    print('Most common year of birth: {}'.format(x))

    print('\nThis took %s seconds.' % (time.time() - start_time))
    print('-'*40)

def main():

    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

        counter = 1
        for i, (index,row) in enumerate(df.iterrows()):
            print('\n', row)
            if counter%5 == 0:
                if input('\nWould you like to view more records? (Yes or No)').lower() == 'no':
                    break
                elif (i == df.shape[0]):
                    break
                counter += 1

        restart = input('\nWould you like to restart? (Yes or No)').lower()
        if restart.lower() != 'yes':
            break
            

if __name__ == '__main__':
    main()
Error:
Traceback (most recent call last): File "/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3118, in get_value tz=getattr(series.dtype, 'tz', None)) File "pandas/_libs/index.pyx", line 106, in pandas._libs.index.IndexEngine.get_value File "pandas/_libs/index.pyx", line 114, in pandas._libs.index.IndexEngine.get_value File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc File "pandas/_libs/hashtable_class_helper.pxi", line 958, in pandas._libs.hashtable.Int64HashTable.get_item File "pandas/_libs/hashtable_class_helper.pxi", line 964, in pandas._libs.hashtable.Int64HashTable.get_item KeyError: 0
During handling of the above exception, another exception occurred:
Error:
Traceback (most recent call last): File "bikeshare.py", line 239, in <module> main() File "bikeshare.py", line 218, in main time_stats(df) File "bikeshare.py", line 105, in time_stats print('The most common month is: ', months[df['month'].mode()[0] - 1].title()) File "/opt/conda/lib/python3.6/site-packages/pandas/core/series.py", line 767, in __getitem__ result = self.index.get_value(self, key) File "/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3124, in get_value return libindex.get_value_box(s, key) File "pandas/_libs/index.pyx", line 55, in pandas._libs.index.get_value_box File "pandas/_libs/index.pyx", line 70, in pandas._libs.index.get_value_box IndexError: index out of bounds
I have replaced:

days = ['mon', 'tues', 'wed', 'thur', 'fri', 'sat', 'sun']
with:
days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
However, I am getting the following ErrorMessage:

Error:
Traceback (most recent call last): File "bikeshare.py", line 238, in <module> main() File "bikeshare.py", line 219, in main trip_duration_stats(df) File "bikeshare.py", line 161, in trip_duration_stats print('Total Travel Time: ', seconds_to_datestamp(df['Trip Duration'].sum())) File "bikeshare.py", line 145, in seconds_to_datestamp seconds = seconds - (days * seconds_in_day) TypeError: ufunc 'subtract' did not contain a loop with signature matching types dtype('<U10') dtype('<U10') dtype('<U10')
    # TO DO: display the most common month
    print('The most common month is: ', months[df['month'].mode()[0] - 1].title())
This is the line that throws the error "IndexError: index out of bounds"
so you need to have a closer look at that line.
Hi ThiomasL. Many thanks for your message. Having converted the days from days -[mon,tue,wed...ect] to days[monday, tuesday,wednesday...etc]I am not seeing the error as posted above. I was wondering if you could see why?