Samouczek Spark Mllib
from pyspark import SparkConf, SparkContext
from pyspark.sql import SQLContext
Sc = SparkContext()
sqlContext = SQLContext(sc)
Terrible Tarantula
from pyspark import SparkConf, SparkContext
from pyspark.sql import SQLContext
Sc = SparkContext()
sqlContext = SQLContext(sc)
# Use the Spark CSV datasource with options specifying:
# - First line of file is a header
# - Automatically infer the schema of the data
data = spark.read.format("csv")
.option("header", "true")
.option("inferSchema", "true")
.load("/databricks-datasets/samples/population-vs-price/data_geo.csv")
data.cache() # Cache data for faster reuse