Python Forum

Hi all, first post to the forum!

I'm getting an index error(shown after script) when running the following code, I would be very grateful if someone could explain why.

import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }
months = ['jan', 'feb', 'mar', 'apr',
        'may', 'jun', 'jul',
        'aug', 'sep', 'oct', 'nov', 'dec']
days = ['mon', 'tues', 'wed', 'thur', 'fri',
        'sat', 'sun']

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.
    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')

#Get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    x = ''
    for item in CITY_DATA:
        if x == '':
            x += item.title()
        else:
            x += ', ' + item.title()

    city = input('Please enter one of the following cities: {}\n'.format(x)).lower()
    while city not in CITY_DATA:
        city = input('Invalid input. ' +
                    'please enter one of the following cities:\n{}\n'.format(x)).lower()

#Get user input for month (all, january, february, ... , june)
    x = ''
    for item in months:
        if x == '':
            x += item
        else:
            x += ', ' + item.title()

    month = input('Please enter one of the following to filter by month:\n{}\n'.format(x)).lower()
    while month not in months and month != 'all':
        month = input("\nInvalid selection. " +
                      "Please enter one of the following options to filter by month. " +
                      "\n(All, {})\n".format(x)).lower()

#Get user input for day of week (all, monday, tuesday, ... sunday)
    x = ''
    for item in days:
        if x == '':
            x += item.title()
        else:
            x += ', ' + item.title()

    day = input('Please enter one of the following to filter by day:\nAll, {}\n'.format(x)).lower()
    while day not in days and day != 'all':
        day = input("\nInvalid selection. " +
                    "Please enter one of the following options to filter by day." +
                    "\n(All, {})\n".format(x)).lower()

    print('-'*40)
    return city, month, day

def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.
    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """

    df = pd.read_csv(CITY_DATA.get(city))

    if 'Start Time' in df.columns:
        df['Start Time'] = pd.to_datetime(df['Start Time'])
        df['month'] = df['Start Time'].dt.month
        df['day_of_week'] = df['Start Time'].dt.weekday_name
        if month != 'all':
            df = df[df['month'] == (months.index(month) + 1) ]

        if day != 'all':
            df = df[df['day_of_week'] == day.title()]

    if 'End Time' in df.columns:
        df['End Time'] = pd.to_datetime(df['End Time'])

    print('\nFILTER SECTION:\nCity: {}\nMonth: {}\n Day: {}'.format(city, month, day))

    return df

def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month
    print('The most common month is: ', months[df['month'].mode()[0] - 1].title())

    # TO DO: display the most common day of week
    print('The most common day of the week is: ', df['day_of_week'].mode()[0])

    # TO DO: display the most common start hour
    df['start_hour'] = df['Start Time'].dt.hour
    print('Most common start hour: ', df['start_hour'].mode()[0])

    print('\n This took %s seconds.' % (time.time() - start_time))
    print('-'*40)

def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCALCULATING THE MOST POPULAR STATION AND TRIP...\n')
    start_time = time.time()

#Display most commonly used start station
    df2 = df[df['start_hour'] == df['start_hour'].mode()[0]]
    print('The most commonly used start station: ', df2['Start Station'].mode()[0])

#Display most commonly used end station
    print('The most commonly used end station: ', df['End Station'].mode()[0])

#Display most frequent combination of start station and end station trip
    x = str(df.groupby(['Start Station', 'End Station']).size().sort_values(ascending=False).head(1).index[0])
    x = x.replace('(', '').replace(')', '').replace("'", '')
    print('Most frequent combination of start and end station: ',x.strip())

    print('\n This took %s seconds.' % (time.time() - start_time))
    print('-'*40)

def seconds_to_datestamp(seconds):
    """Format seconds to days, hours, minutes, seconds"""

    seconds_in_day = 86400
    seconds_in_hour = 3600
    seconds_in_minuet = 60

    day = seconds // seconds_in_day
    seconds = seconds - (days * seconds_in_day)

    hours = seconds // seconds_in_hour
    seconds = seconds - (minuets * seconds_in_hour)

    minuets = seconds // seconds_in_minuet
    seconds = secinds - (minuets * seconds_in_minuet)
    return'{0:.0f} days, {1:.0f} hours, {2:.0f} minuets, {3:0f} seconds'.format(days, hours, minuets, seconds)

def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCALCULATING TRIP DURATION...\n')
    start_time = time.time()

#Display total travel time
    print('Total Travel Time: ', seconds_to_datestamp(df['Trip Duration'].sum()))

#Display mean travel time
    print('Mean Travel Time: ', seconds_to_datestamp(df['Trip Duration'].mean()))

    print('\nThis took %s seconds.' % (time.time() - start_time))
    print('-'*40)

def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('CALCULATING USER STATS...\n')
    start_time = time.time()

#Display counts of user types
    if 'User Type' in df.columns:
        x = '\n' + df['User Type'].value_counts().to_string()
    else:
        x = 'N/A'
    print('Count of user types: {}'.format(x))

#Display counts of gender
    if 'Gender' in df.columns:
        x = '\n' + df['Gender'].value_counts().to_string()
    else:
        x = 'N/A'
    print('\nCount of Genders: {}'.format(x))

#Display earliest, most recent, and most common year of birth
    if 'Birth Year' in df.columns:
        x = int(df['Birth Year'].min())
    else:
        x = 'N/A'
    print('\nEarliest year of birth: {}'.format(x))

    if 'Birth Year' in df.columns:
        x = int(df['Birth Year'].max())
    else:
        x = 'N/A'
    print('\n Most recent birth year: {}'.format(x))

    if 'Birth Year' in df.columns:
        x = int(df['Birth Year'].mode()[0])
    else:
        x = 'N/A'
    print('Most common year of birth: {}'.format(x))

    print('\nThis took %s seconds.' % (time.time() - start_time))
    print('-'*40)

def main():

    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

        counter = 1
        for i, (index,row) in enumerate(df.iterrows()):
            print('\n', row)
            if counter%5 == 0:
                if input('\nWould you like to view more records? (Yes or No)').lower() == 'no':
                    break
                elif (i == df.shape[0]):
                    break
                counter += 1

        restart = input('\nWould you like to restart? (Yes or No)').lower()
        if restart.lower() != 'yes':
            break
            

if __name__ == '__main__':
    main()

Error:Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3118, in get_value
    tz=getattr(series.dtype, 'tz', None))
  File "pandas/_libs/index.pyx", line 106, in pandas._libs.index.IndexEngine.get_value
  File "pandas/_libs/index.pyx", line 114, in pandas._libs.index.IndexEngine.get_value
  File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 958, in pandas._libs.hashtable.Int64HashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 964, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 0

During handling of the above exception, another exception occurred:

Error:Traceback (most recent call last):
  File "bikeshare.py", line 239, in <module>
    main()
  File "bikeshare.py", line 218, in main
    time_stats(df)
  File "bikeshare.py", line 105, in time_stats
    print('The most common month is: ', months[df['month'].mode()[0] - 1].title())
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/series.py", line 767, in __getitem__
    result = self.index.get_value(self, key)
  File "/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3124, in get_value
    return libindex.get_value_box(s, key)
  File "pandas/_libs/index.pyx", line 55, in pandas._libs.index.get_value_box
  File "pandas/_libs/index.pyx", line 70, in pandas._libs.index.get_value_box
IndexError: index out of bounds

I have replaced:

days = ['mon', 'tues', 'wed', 'thur', 'fri', 'sat', 'sun']

with:

days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']

However, I am getting the following ErrorMessage:

Error:Traceback (most recent call last):
  File "bikeshare.py", line 238, in <module>
    main()
  File "bikeshare.py", line 219, in main
    trip_duration_stats(df)
  File "bikeshare.py", line 161, in trip_duration_stats
    print('Total Travel Time: ', seconds_to_datestamp(df['Trip Duration'].sum()))
  File "bikeshare.py", line 145, in seconds_to_datestamp
    seconds = seconds - (days * seconds_in_day)
TypeError: ufunc 'subtract' did not contain a loop with signature matching types dtype('<U10') dtype('<U10') dtype('<U10')

    # TO DO: display the most common month
    print('The most common month is: ', months[df['month'].mode()[0] - 1].title())

This is the line that throws the error "IndexError: index out of bounds"
so you need to have a closer look at that line.

Hi ThiomasL. Many thanks for your message. Having converted the days from days -[mon,tue,wed...ect] to days[monday, tuesday,wednesday...etc]I am not seeing the error as posted above. I was wondering if you could see why?

LeoGER

LeoGER

ThomasL

LeoGER