Package Spark Scala Code and Deploy it on Kubernetes using Spark-on-k8s-Operator

  1. Spark: 3.0.0
  2. Scala: 2.12
  3. SBT: 1.3.13
  4. Docker On Mac: 2.2.0.0
  5. Kubernetes: v1.15.5
  6. spark-on-k8s-operator: sparkoperator.k8s.io/v1beta2
  1. The plugins.sbt file inside the project folder is required for building a flat jar.
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10")
sbt.version=1.3.13
assemblyMergeStrategy in assembly := {
case "META-INF/services/org.apache.spark.sql.sources.DataSourceRegister" => MergeStrategy.concat
case PathList("META-INF", xs@_*) => MergeStrategy.discard
case "application.conf" => MergeStrategy.concat
case x => MergeStrategy.first
}
ARG SPARK_IMAGE=gcr.io/spark-operator/spark:v3.0.0
FROM ${SPARK_IMAGE}

ENV SBT_VERSION 1.3.13


# Switch to user root so we can add additional jars, packages and configuration files.
USER root

RUN apt-get -y update && apt-get install -y curl

USER ${spark_uid}

WORKDIR /app

#Install SBT
RUN curl -fsL https://github.com/sbt/sbt/releases/download/v$SBT_VERSION/sbt-$SBT_VERSION.tgz | tar xfz - -C /usr/local
ENV PATH /usr/local/sbt/bin:${PATH}

RUN sbt update

ENTRYPOINT ["/opt/entrypoint.sh"]
docker build -t test/spark-operator:latest .
FROM test/spark-operator:latest

# Add project files
ADD build.sbt /app/
ADD project/plugins.sbt /app/project/
ADD project/build.properties /app/project/
ADD src/. /app/src/


#Build the projects
RUN sbt clean assembly

ENTRYPOINT ["/opt/entrypoint.sh"]
docker build -f Dockerfile-app -t test/spark-scala-k8-app:latest .
  1. spec.image: “test/spark-scala-k8-app:latest .Provide the image name from Step 5
  2. spec.imagePullPolicy: Never . Since we are pulling image from local.
  3. spec.mainClass: com.AppK8Demo . Provide the path to your job class. AppK8Demo is a sample Job class from my Github repo.
apiVersion: "sparkoperator.k8s.io/v1beta2"
kind: SparkApplication
metadata:
name: spark-scala-k8-app
namespace: default
spec:
type: Scala
mode: cluster
image: "test/spark-scala-k8-app:latest"
imagePullPolicy: Never
mainClass: com.AppK8Demo
mainApplicationFile: "local:///app/target/scala-2.12/spark-scala-k8-app-assembly-0.1.jar"
sparkVersion: "3.0.0"
restartPolicy:
type: Never
volumes:
- name: "test-volume"
hostPath:
path: "/tmp"
type: Directory
driver:
cores: 1
coreLimit: "1200m"
memory: "512m"
labels:
version: 3.0.0
serviceAccount: spark
volumeMounts:
- name: "test-volume"
mountPath: "/tmp"
executor:
cores: 1
instances: 1
memory: "512m"
labels:
version: 3.0.0
volumeMounts:
- name: "test-volume"
mountPath: "/tmp"
kubectl apply -f examples/spark-scala-k8-app.yaml
Kubernetes-Dashboard
App-Logs

--

--

--

Love podcasts or audiobooks? Learn on the go with our new app.

Recommended from Medium

Job PostingsAPI: Stable Release

Creating a Terraform module to configure AWS ec2 instance and launch docker

Prometheus-Grafana-Persistent_D

My Obsession with Simple Code

Lightning activated shutter trigger using Thunder click

Playing with Dasel

Introducing HandyStratum for Handshake

How to decide that Which architecture is best suitable to develop mobile app

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Suchit Gupta

Suchit Gupta

More from Medium

Apache Spark on Kubernetes

Archiving Parquet files

Spark Shines brighter with Project Tungsten

Getting Started with Lagom Framework

Lagom model