Dataframe save as csv

 

from pyspark.sql import SparkSession

Create SparkSession

spark = SparkSession.builder
.master(“local[1]”)
.appName(“npblue.com”)
.getOrCreate()

data = [(‘Narender’,”,‘Paul’,‘1991-04-01’,‘M’,3000), (‘John ’,‘Rose’,”,‘2000-05-19’,‘M’,4000), (‘Hello’,”,‘Williams’,‘1978-09-05’,‘M’,4000), (‘Maria’,‘Anne’,‘Jones’,‘1967-12-01’,‘F’,4000), (‘Jeo’,‘Mary’,‘Brown’,‘1980-02-17’,‘F’,-1) ]

columns = [“firstname”,“middlename”,“lastname”,“dob”,“gender”,“salary”] df = spark.createDataFrame(data=data, schema = columns)

df.foreach(print)

df.write.csv(‘/Users/npblue/PycharmProjects/learning/data/output’)

##Output ##

 


/data
  /output
._SUCCESS.crc
.part-00000-961c8e77-bff4-4165-a941-a3d5d88f6530-c000.csv.crc
_SUCCESS
part-00000-961c8e77-bff4-4165-a941-a3d5d88f6530-c000.csv