Create a Spark Data Frame from a CSV File
from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession
sc = SparkContext.getOrCreate()
spark = SparkSession(sc)
df1 = spark.read.format("csv") \
.option("inferschema","True") \
.option("header","True") \
.load("/FileStore/tables/insurance.csv")
df1.show(5000)
Create a Temporary View on Spark Data Frame
Create a Pandas Data Frame from a CSV File
import pandas as pd
pd_df = pd.read_csv('/FileStore/tables/insurance.csv')
print(pd_df)
import pandas as pd
pd_df = pd.read_excel('C:\\Move\\test.xlsx')
#print(pd_df)
from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession
sc = SparkContext.getOrCreate()
spark = SparkSession(sc)
# Before that make sure to install PyArrow
# PIP INSTALL PyArrow
spark.conf.set("spark.sql.execution.arrow.enabled","true")
sparkDF=spark.createDataFrame(pd_df)
#sparkDF.printSchema()
sparkDF.show(500)
0 Comments