Hi, who can help to fix the error while running the below scripts in Jupyter Notebook? thanks.
Errors:
Name: Compile Error
Message: :24: error: value toDF is not a member of org.apache.spark.rdd.RDD[LogRecord]
val accessDf = accessLog.toDF()
^
StackTrace:
Scripts:
case class LogRecord( host: String, timeStamp: String, url:String,httpCode:Int)
val PATTERN = “”"^(\S+) (\S+) (\S+) [([\w:/]+\s[+-]\d{4})] “(\S+) (\S+)(.*)” (\d{3}) (\S+)""".r
def parseLogLine(log: String):
LogRecord = {
val res = PATTERN.findFirstMatchIn(log)
if (res.isEmpty)
{
println("Rejected Log Line: " + log)
LogRecord(“Empty”, “”, “”, -1 )
}
else
{
val m = res.get
LogRecord(m.group(1), m.group(4),m.group(6), m.group(8).toInt)
}
}
val logFile = sc.textFile("/data/spark/project/NASA_access_log_Aug95.gz")
val accessLog = logFile.map(parseLogLine)
val accessDf = accessLog.toDF()
accessDf.printSchema
accessDf.createOrReplaceTempView(“nasalog”)
val output = spark.sql(“select * from nasalog”)
output.createOrReplaceTempView(“nasa_log”)
spark.sql(“cache TABLE nasa_log”)