# Evaluate model on the Test Set

NIRAV_RAJ · December 8, 2020, 4:57pm

i did not understand why did this happen??

X_test_prepared=full_pipeline.transform(X_test)

KeyError Traceback (most recent call last)
in
1 #Run full pipeline to transform the data
----> 2 X_test_prepared=full_pipeline.transform(X_test)
3 final_predictions = final_model.predict(X_test_prepared)

~\anaconda3\lib\site-packages\sklearn\pipeline.py in transform(self, X)
998 sum of n_components (output dimension) over transformers.
999 “”"
-> 1000 Xs = Parallel(n_jobs=self.n_jobs)(
1001 delayed(_transform_one)(trans, X, None, weight)
1002 for name, trans, weight in self._iter())

~\anaconda3\lib\site-packages\joblib\parallel.py in call(self, iterable)
1027 # remaining jobs.
1028 self._iterating = False
-> 1029 if self.dispatch_one_batch(iterator):
1030 self._iterating = self._original_iterator is not None
1031

~\anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
845 return False
846 else:
–> 847 self._dispatch(tasks)
848 return True
849

~\anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
763 with self._lock:
764 job_idx = len(self._jobs)
–> 765 job = self._backend.apply_async(batch, callback=cb)
766 # A job can complete so quickly than its callback is
767 # called before we get here, causing self._jobs to

~\anaconda3\lib\site-packages\joblib_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 “”“Schedule a func to be run”""
–> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)

~\anaconda3\lib\site-packages\joblib_parallel_backends.py in init(self, batch)
570 # Don’t delay the application, to avoid keeping the input
571 # arguments in memory
–> 572 self.results = batch()
573
574 def get(self):

~\anaconda3\lib\site-packages\joblib\parallel.py in call(self)
250 # change the default number of processes to -1
251 with parallel_backend(self._backend, n_jobs=self._n_jobs):
–> 252 return [func(*args, **kwargs)
253 for func, args, kwargs in self.items]
254

~\anaconda3\lib\site-packages\joblib\parallel.py in (.0)
250 # change the default number of processes to -1
251 with parallel_backend(self._backend, n_jobs=self._n_jobs):
–> 252 return [func(*args, **kwargs)
253 for func, args, kwargs in self.items]
254

~\anaconda3\lib\site-packages\sklearn\pipeline.py in _transform_one(transformer, X, y, weight, **fit_params)
717
718 def _transform_one(transformer, X, y, weight, **fit_params):
–> 719 res = transformer.transform(X)
720 # if we have a weight for this transformer, multiply output
721 if weight is None:

~\anaconda3\lib\site-packages\sklearn\pipeline.py in _transform(self, X)
547 Xt = X
548 for _, _, transform in self._iter():
–> 549 Xt = transform.transform(Xt)
550 return Xt
551

in transform(self, X)
9 return self
10 def transform(self, X):
—> 11 return X[self.attribute_names].values

~\anaconda3\lib\site-packages\pandas\core\frame.py in getitem(self, key)
2804 if is_iterator(key):
2805 key = list(key)
-> 2806 indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
2807
2808 # take() does not accept boolean indexers

~\anaconda3\lib\site-packages\pandas\core\indexing.py in _get_listlike_indexer(self, key, axis, raise_missing)
1550 keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
1551
-> 1552 self._validate_read_indexer(
1553 keyarr, indexer, o._get_axis_number(axis), raise_missing=raise_missing
1554 )

~\anaconda3\lib\site-packages\pandas\core\indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing)
1644 if not (self.name == “loc” and not raise_missing):
1645 not_found = list(set(key) - set(ax))
-> 1646 raise KeyError(f"{not_found} not in index")
1647
1648 # we skip the warning on Categorical/Interval

KeyError: “[‘median_house_value’] not in index”

I have dropped the value as Sandeep Giri sir said but it shows me error.

#Get predictors and labels from the test set
#Predictors
X_test=strat_test_set.drop(“median_house_value”,axis=1)
#labels
y_test=strat_test_set[“median_house_value”].copy()

NIRAV_RAJ · December 8, 2020, 4:57pm

How can i fix it??

rajtilakb · December 9, 2020, 5:51am

As suggested, please refer to the code given in the Jupyter notebook at our GitHub repository from the below link:

ml/end_to_end_project.ipynb at master · cloudxlab/ml (github.com)

NIRAV_RAJ · December 9, 2020, 12:39pm

I did copy and paste from git hub repository code but it still giving that error.

How can I fix it??

rajtilakb · December 10, 2020, 4:53am

Nirav, instead of copy/pasting the codes from our repository, please clone it to your lab using the below command on a web console in our lab and work on the local copy.

git clone https://github.com/cloudxlab/ml ~/ml

A lot of information often gets lost while copy/pasting. Please follow this process and let me know if you have any further challenges.

NIRAV_RAJ · December 10, 2020, 4:52pm

It says it already exists

fatal: destination path ‘/home/niravpandit00027136/ml’ already exists and is not an empty directory.

but when i do

housing=pd.read_csv(“housing.csv”)

FileNotFoundError Traceback (most recent call last)
in
----> 1 housing=pd.read_csv(‘housing.csv’)

/usr/local/anaconda/lib/python3.6/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
683 )
684
–> 685 return _read(filepath_or_buffer, kwds)
686
687 parser_f.name = name

/usr/local/anaconda/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
455
456 # Create the parser.
–> 457 parser = TextFileReader(fp_or_buf, **kwds)
458
459 if chunksize or iterator:

/usr/local/anaconda/lib/python3.6/site-packages/pandas/io/parsers.py in init(self, f, engine, **kwds)
893 self.options[“has_index_names”] = kwds[“has_index_names”]
894
–> 895 self._make_engine(self.engine)
896
897 def close(self):

/usr/local/anaconda/lib/python3.6/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
1133 def _make_engine(self, engine=“c”):
1134 if engine == “c”:
-> 1135 self._engine = CParserWrapper(self.f, **self.options)
1136 else:
1137 if engine == “python”:

/usr/local/anaconda/lib/python3.6/site-packages/pandas/io/parsers.py in init(self, src, **kwds)
1915 kwds[“usecols”] = self.usecols
1916
-> 1917 self._reader = parsers.TextReader(src, **kwds)
1918 self.unnamed_cols = self._reader.unnamed_cols
1919

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.cinit()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: [Errno 2] File b’housing.csv’ does not exist: b’housing.csv’

It says it does not exist.

rajtilakb · December 11, 2020, 5:07am

Please try the following line of code instead:

housing=pd.read_csv(‘../ml/machine_learning/datasets/housing/housing.csv’)

NIRAV_RAJ · December 11, 2020, 12:21pm

I am still getting same error.

housing=pd.read_csv(’…/ml/machine_learning/datasets/housing/housing.csv’)

FileNotFoundError Traceback (most recent call last)
in
----> 1 housing=pd.read_csv(’…/ml/machine_learning/datasets/housing/housing.csv’)

/usr/local/anaconda/lib/python3.6/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
683 )
684
–> 685 return _read(filepath_or_buffer, kwds)
686
687 parser_f.name = name

/usr/local/anaconda/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
455
456 # Create the parser.
–> 457 parser = TextFileReader(fp_or_buf, **kwds)
458
459 if chunksize or iterator:

/usr/local/anaconda/lib/python3.6/site-packages/pandas/io/parsers.py in init(self, f, engine, **kwds)
893 self.options[“has_index_names”] = kwds[“has_index_names”]
894
–> 895 self._make_engine(self.engine)
896
897 def close(self):

/usr/local/anaconda/lib/python3.6/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
1133 def _make_engine(self, engine=“c”):
1134 if engine == “c”:
-> 1135 self._engine = CParserWrapper(self.f, **self.options)
1136 else:
1137 if engine == “python”:

/usr/local/anaconda/lib/python3.6/site-packages/pandas/io/parsers.py in init(self, src, **kwds)
1915 kwds[“usecols”] = self.usecols
1916
-> 1917 self._reader = parsers.TextReader(src, **kwds)
1918 self.unnamed_cols = self._reader.unnamed_cols
1919

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.cinit()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: [Errno 2] File b’…/ml/machine_learning/datasets/housing/housing.csv’ does not exist: b’…/ml/machine_learning/datasets/housing/housing.csv’

In [ ]:

I tried one both python3 and python2.

How can I fix this??

NIRAV_RAJ · December 11, 2020, 12:22pm

python 2 error