May-23-2020, 03:30 PM
I'm doing some experiments using the classic Notebook Auto MPG Dataset and building a model to predict the fuel efficiency of late-1970s and early 1980s automobiles.
I did two codes:
One that download the Dataset from repository UCI Machine Learning (auto-mpg.data) and another that download the file im my computer (auto-mpg.csv).
Wher I run the program using the Dataset from UCI Machine Learning, it works well. When I run the program using the Dataset from my computer (auto-mpg.csv), it doesn't work.
Below can be seen both codes.
1. using Dataset from UCI Machine Learning
When I use the Dataset from my computer:
I did two codes:
One that download the Dataset from repository UCI Machine Learning (auto-mpg.data) and another that download the file im my computer (auto-mpg.csv).
Wher I run the program using the Dataset from UCI Machine Learning, it works well. When I run the program using the Dataset from my computer (auto-mpg.csv), it doesn't work.
Below can be seen both codes.
1. using Dataset from UCI Machine Learning
1 |
!pip install - q seaborn |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
from __future__ import absolute_import, division, print_function, unicode_literals import pathlib import matplotlib.pyplot as plt import pandas as pd import seaborn as sns import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers print (tf.__version__) |
1 2 |
dataset_path = tf.keras.utils.get_file( "auto-mpg.data" , "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data" ) dataset_path |
1 2 3 4 5 6 7 8 |
column_names = [ 'MPG' , 'Cylinders' , 'Displacement' , 'Horsepower' , 'Weight' , 'Acceleration' , 'Model Year' , 'Origin' ] raw_dataset = pd.read_csv(dataset_path, names = column_names, na_values = "?" , comment = '\t' , sep = " " , skipinitialspace = True ) dataset = raw_dataset.copy() dataset.tail( 5 ) |
1 |
dataset.isna(). sum () |
1 |
dataset = dataset.dropna() |
1 |
origin = dataset.pop( 'Origin' ) |
1 2 3 4 |
dataset[ 'USA' ] = (origin = = 1 ) * 1.0 dataset[ 'Europe' ] = (origin = = 2 ) * 1.0 dataset[ 'Japan' ] = (origin = = 3 ) * 1.0 dataset.tail() |
1 2 |
train_dataset = dataset.sample(frac = 0.8 ,random_state = 0 ) test_dataset = dataset.drop(train_dataset.index) |
1 |
sns.pairplot(train_dataset[[ "MPG" , "Cylinders" , "Displacement" , "Weight" ]], diag_kind = "kde" ) |
1 2 3 4 |
train_stats = train_dataset.describe() train_stats.pop( "MPG" ) train_stats = train_stats.transpose() train_stats |
1 2 |
train_labels = train_dataset.pop( 'MPG' ) test_labels = test_dataset.pop( 'MPG' ) |
1 2 3 4 |
def norm(x): return (x - train_stats[ 'mean' ]) / train_stats[ 'std' ] normed_train_data = norm(train_dataset) normed_test_data = norm(test_dataset) |
1 2 |
dataset_path = pd.read_csv(r 'C:\Users\ee0547\Documents\DISSERTAÇÃO DALPIAZ\EXEMPLOS DE REDES_22_05_2020\auto-mpg.csv' ) dataset_path |
1 2 3 4 5 6 7 8 |
column_names = [ 'MPG' , 'Cylinders' , 'Displacement' , 'Horsepower' , 'Weight' , 'Acceleration' , 'Model Year' , 'Origin' ] raw_dataset = pd.read_csv(dataset_path, names = column_names, na_values = "?" , comment = '\t' , sep = " " , skipinitialspace = True ) dataset = raw_dataset.copy() dataset.tail( 5 ) |
Error:---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-6-3381f4da9f77> in <module>
3 raw_dataset =pd.read_csv(dataset_path, names=column_names,
4 na_values = "?", comment='\t',
----> 5 sep=" ", skipinitialspace=True)
6
7 dataset = raw_dataset.copy()
~\Anaconda3\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
674 )
675
--> 676 return _read(filepath_or_buffer, kwds)
677
678 parser_f.__name__ = name
~\Anaconda3\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
429 # See https://github.com/python/mypy/issues/1297
430 fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
--> 431 filepath_or_buffer, encoding, compression
432 )
433 kwds["compression"] = compression
~\Anaconda3\lib\site-packages\pandas\io\common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
198 if not is_file_like(filepath_or_buffer):
199 msg = f"Invalid file path or buffer object type: {type(filepath_or_buffer)}"
--> 200 raise ValueError(msg)
201
202 return filepath_or_buffer, None, compression, False
ValueError: Invalid file path or buffer object type: <class 'pandas.core.frame.DataFrame'>