Not able to read json (Spark on Jupyter)

Saptadeep · May 29, 2018, 12:12am

I am trying to read a json file that I uploaded on cloudxlab. I am using the following command

spark = SparkSession.builder.appName(‘Basics’).getOrCreate()
df = spark.read.json(‘people.json’)

people.json is on my local machine that I uploaded.

Error –

AnalysisException Traceback (most recent call last)
in ()
----> 1 df = spark.read.json(‘people.json’)

/usr/spark2.0.1/python/pyspark/sql/readwriter.py in json(self, path, schema, primitivesAsString, prefersDecimal, allowComments, allowUnquotedFieldNames, allowSingleQuotes, allowNumericLeadingZero, allowBackslashEscapingAnyCharacter, mode, columnNameOfCorruptRecord, dateFormat, timestampFormat)
227 path = [path]
228 if type(path) == list:
–> 229 return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))
230 elif isinstance(path, RDD):
231 def func(iterator):

/usr/spark2.0.1/python/lib/py4j-0.10.3-src.zip/py4j/java_gateway.py in call(self, *args)
1131 answer = self.gateway_client.send_command(command)
1132 return_value = get_return_value(
-> 1133 answer, self.gateway_client, self.target_id, self.name)
1134
1135 for temp_arg in temp_args:

/usr/spark2.0.1/python/pyspark/sql/utils.py in deco(*a, **kw)
67 e.java_exception.getStackTrace()))
68 if s.startswith('org.apache.spark.sql.AnalysisException: ‘):
—> 69 raise AnalysisException(s.split(’: ‘, 1)[1], stackTrace)
70 if s.startswith(‘org.apache.spark.sql.catalyst.analysis’):
71 raise AnalysisException(s.split(’: ', 1)[1], stackTrace)

AnalysisException: u’Path does not exist: hdfs://ip-172-31-53-48.ec2.internal:8020/user/saptadeepkchanda5373/people.json;’