Skip to content

Commit

Permalink
Merge pull request #5 from aagumin/feature/mtls
Browse files Browse the repository at this point in the history
Feature/mtls
  • Loading branch information
aagumin authored Oct 31, 2024
2 parents ce9e9f8 + a4ed512 commit 6e7312a
Show file tree
Hide file tree
Showing 11 changed files with 174 additions and 59 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,4 @@ cython_debug/

# custom values file
charts/values.yaml
charts/values-st2.yaml
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Spark Connect is an application that allows you to perform data processing tasks
- [X] Dynamic package installation
- [X] Apache Celeborn support
- [ ] Optional mTLS.
- [ ] Istio support
- [ ] Istio support

## Requirements

Expand Down
3 changes: 3 additions & 0 deletions charts/spark-connect/templates/executor-pod-template-cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ data:
name: spark-connect-kubernetes-executor
labels:
app.kubernetes.io/instance: {{ include "spark.name" . }}-executor
{{ if .Values.mtls.enabled }}
sidecar.istio.io/inject: "true"
{{- end }}
spark-role: executor
{{- with .Values.spark.executor.labels }}
{{- toYaml . | nindent 16 }}
Expand Down
9 changes: 9 additions & 0 deletions charts/spark-connect/templates/spark-properties-cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,15 @@ data:
spark.kubernetes.executor.request.cores {{ .Values.spark.executor.requestCoresMilliCPU }}m
spark.kubernetes.local.dirs.tmpfs false
spark.kubernetes.namespace {{ .Release.Namespace }}
{{ if .Values.mtls.enabled }}
spark.kubernetes.executor.service true
spark.driver.bindAddress 0.0.0.0
spark.driver.port 7078
spark.blockManager.port 7079
spark.executor.bindAddress 0.0.0.0
spark.kubernetes.post.stop.script {{ .Values.mtls.poststopscript }}
spark.kubernetes.pre.start.script {{ .Values.mtls.prestartscript }}
{{- end }}
{{- with .Values.spark.sparkConfig }}
{{- range $key, $value := . }}
{{ printf "%s %s" $key (toString $value) }}
Expand Down
3 changes: 3 additions & 0 deletions charts/spark-connect/templates/stateful-set.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ spec:
labels:
{{- include "spark.selectorLabels" . | nindent 8 }}
spark-driver-affinity-id: {{ uuidv4 | quote }}
{{ if .Values.mtls.enabled}}
sidecar.istio.io/inject: "true"
{{- end }}
spec:
dnsConfig:
options:
Expand Down
4 changes: 2 additions & 2 deletions charts/spark-connect/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ extraEnv: []

mtls:
enabled: false
pre.start.script: ""
post.stop.script: ""
prestartscript: ""
poststopscript: ""

containerPorts:
sparkUi: 4040
Expand Down
7 changes: 7 additions & 0 deletions docker/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
<artifactId>celeborn-client-spark-3-shaded_2.12</artifactId>
<version>0.5.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-aws</artifactId>
<version>3.3.4</version>
</dependency>

</dependencies>
<build>
<plugins>
Expand Down
84 changes: 84 additions & 0 deletions mtls/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
FROM eclipse-temurin:17 as builder

ENV MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g"
ENV SPARK_VERSION=3.5.3
ENV SPARK_HOME=/opt/spark
ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}.tgz

WORKDIR /opt

RUN apt-get update; \
apt-get install -y wget patch gettext-base gnupg2 bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \
rm -rf /var/lib/apt/lists/*


RUN set -ex; \
mkdir -p $SPARK_HOME; \
wget -nv -O /opt/spark.tgz "$SPARK_TGZ_URL"; \
tar -zxf /opt/spark.tgz --strip-components=1 --directory=$SPARK_HOME; \
rm /opt/spark.tgz


WORKDIR $SPARK_HOME

COPY mtls/spark-40909.patch .
RUN patch -p1 < $SPARK_HOME/spark-40909.patch

RUN ./dev/make-distribution.sh \
--name spark-mtls \
-Pconnect \
-Pkubernetes \
-Phadoop-3 \
-Phadoop-cloud \
-Pparquet-provided \
-Phive \
-Phive-thriftserver

## IMPORTANT! We must delete the spark-connect-commom jar from the jars directory!
## see: https://issues.apache.org/jira/browse/SPARK-45201
#RUN rm "${SPARK_HOME}/jars/spark-connect-common_${SCALA_VERSION}-${SPARK_VERSION}.jar"
FROM python:3.10.14-slim-bookworm
ARG spark_uid=185
ENV SPARK_HOME=/opt/spark

RUN apt-get update; \
apt-get install -y --no-install-recommends openjdk-17-jre tini procps gettext-base maven gettext-base curl; \
rm -rf /var/lib/apt/lists/*


RUN groupadd --system --gid=${spark_uid} spark && \
useradd --system --uid=${spark_uid} --gid=spark spark

COPY --from=builder /opt/spark/dist/ ${SPARK_HOME}/

RUN chown -R spark:spark ${SPARK_HOME}/

RUN cp ${SPARK_HOME}/kubernetes/dockerfiles/spark/entrypoint.sh /opt/entrypoint.sh; \
chmod a+x /opt/entrypoint.sh; \
cp ${SPARK_HOME}/kubernetes/dockerfiles/spark/decom.sh /opt/decom.sh; \
chmod a+x /opt/decom.sh

COPY mtls/scripts/wait_for_istio_sidecar.sh /opt/scripts/wait_for_istio_sidecar.sh
RUN chmod +x /opt/scripts/wait_for_istio_sidecar.sh

COPY mtls/scripts/stop_istio_sidecar.sh /opt/scripts/stop_istio_sidecar.sh
RUN chmod +x /opt/scripts/stop_istio_sidecar.sh

# switch to spark user

WORKDIR /opt
COPY docker/pom.xml .

RUN mvn validate

RUN mvn install

RUN mvn dependency:copy-dependencies package

USER spark
WORKDIR /home/spark

COPY docker/requirements.txt .
RUN pip install -r requirements.txt

ENTRYPOINT ["/opt/entrypoint.sh"]
20 changes: 20 additions & 0 deletions mtls/scripts/stop_istio_sidecar.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

# Define the namespace and pod name where the Istio sidecar is running

# Function to stop the Istio sidecar
stop_istio_sidecar() {
echo "Stopping Istio sidecar for pod"

# Send a termination signal to the istio-proxy sidecar
curl -sf -XPOST 127.0.0.1:15000/quitquitquit

if [ $? -eq 0 ]; then
echo "Istio sidecar stopped successfully."
else
echo "Failed to stop Istio sidecar."
fi
}

# Call the function to stop the sidecar
stop_istio_sidecar
16 changes: 16 additions & 0 deletions mtls/scripts/wait_for_istio_sidecar.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

# Function to check if the Istio sidecar is ready
is_sidecar_ready() {
# Check if the sidecar proxy (Envoy) is running and ready
curl -s http://localhost:15000/ready | grep "LIVE"
}

# Wait for the sidecar proxy to be ready
echo "Waiting for Istio sidecar to be ready..."
until is_sidecar_ready; do
echo "Sidecar not ready yet. Waiting..."
sleep 3
done

echo "Istio sidecar is ready."
Loading

0 comments on commit 6e7312a

Please sign in to comment.