[Spark By Example] Spark SQL – TempView

With Spark SQL, you can use the familiar SQL syntax to query the data.

TempView

A Temporary view in Spark is similar to a real SQL table or view that contains rows and columns. But the temp view will be lazily evaluated when it is queried against.

  • Create or replace a view from DataFrame
    • createOrReplaceTempView(viewName)

[Note] When the underlying DataFrame schema is changed, the view should be updated again.


Data File

  • Create a csv file with the following content.
first name,last name,age
Paul,Henderson,20
Grace,Carr,32
Julia,Jackson,14
Alexandra,May,43
Anna,Jones,24

Result


Python Application

from pyspark.sql import SparkSession
from pyspark.sql.types import *

if __name__ == "__main__":

  spark = (SparkSession
        .builder
        .appName("people-sql")
        .getOrCreate())

  data_file = "test/data/people.csv"

  # schema
  custom_schema = StructType([
        StructField("first name", StringType(), False),
        StructField("last name", StringType(), False),
        StructField("age", IntegerType(), False)])

  data_df = (spark.read.format("csv")
          .option("header", True)
          .schema(custom_schema)
          .load(data_file))
  data_df = data_df.withColumnRenamed("first name", "firstname")
  data_df = data_df.withColumnRenamed("last name", "lastname")

  # create a view
  data_df.createOrReplaceTempView("people_view")

  # spark sql - projection
  query = """SELECT firstname, lastname, age
        FROM people_view
        ORDER BY lastname
  """
  spark.sql(query).show()

  # spark sql - filtering
  query = """SELECT firstname, lastname, age
        FROM people_view
        WHERE age > 20
        ORDER BY age DESC
  """
  spark.sql(query).show()

  # spark sql - complex projection
  query = """SELECT firstname, lastname, age, 
                CASE 
                  WHEN age >= 20 THEN 'Adult'
                  WHEN age >= 13 THEN 'Teen' 
                  ELSE 'Child'
                END AS who
             FROM people_view
             ORDER BY age DESC
  """
  spark.sql(query).show()

  spark.stop()

C# Application

using Microsoft.Spark.Sql;
using Microsoft.Spark.Sql.Types;

namespace MySpark.Examples
{
    internal class PeopleSql
    {
        public static void Run()
        {
            SparkSession spark =
                SparkSession
                    .Builder()
                    .AppName("column-test")
                    .GetOrCreate();

            string filePath = "data/people.csv";

            // schema
            StructField[] fields = {
                new StructField("first name", new StringType(), false),
                new StructField("last name", new StringType(), false),
                new StructField("age", new IntegerType(), false)
            };
            StructType schema = new StructType(fields);

            // initial data frame
            DataFrame dataDF = spark.Read()
                .Format("csv")
                .Option("header", true)
                .Schema(schema)
                .Load(filePath);
            dataDF = dataDF
                .WithColumnRenamed("first name", "firstname")
                .WithColumnRenamed("last name", "lastname");

            // create a view
            dataDF.CreateOrReplaceTempView("people_view");

            // spark sql - projection
            string query = @"
                SELECT firstname, lastname, age
                FROM people_view
                ORDER BY lastname";
            spark.Sql(query).Show();

            // spark sql - filtering
            query = @"
                SELECT firstname, lastname, age
                FROM people_view
                WHERE age > 20
                ORDER BY age DESC";
            spark.Sql(query).Show();

            // spark sql - complex projection
            query = @"
                SELECT firstname, lastname, age, 
                CASE 
                  WHEN age >= 20 THEN 'Adult'
                  WHEN age >= 13 THEN 'Teen' 
                  ELSE 'Child'
                END AS who
                FROM people_view
                ORDER BY age DESC";
            spark.Sql(query).Show();

            spark.Stop();
        }
    }
}

Leave a Comment

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s