Dataframe save as parquet

 
from pyspark.sql import SparkSession

# Create SparkSession
spark = SparkSession.builder \
  .master("local[1]") \
  .appName("npblue.com") \
  .getOrCreate()


data = [('Narender','','Paul','1991-04-01','M',3000),
  ('John ','Rose','','2000-05-19','M',4000),
  ('Hello','','Williams','1978-09-05','M',4000),
  ('Maria','Anne','Jones','1967-12-01','F',4000),
  ('Jeo','Mary','Brown','1980-02-17','F',-1)
]

columns = ["firstname","middlename","lastname","dob","gender","salary"]
df = spark.createDataFrame(data=data, schema = columns)


df.write.parquet('/Users/npblue/PycharmProjects/learning/data/output')

 

/data
	/output
		._SUCCESS.crc
		.part-00000-0decc3e8-94a0-4932-97e6-ccc531c1477e-c000.snappy.parquet.crc
		_SUCCESS
		 part-00000-0decc3e8-94a0-4932-97e6-ccc531c1477e-c000.snappy.parquet