Skip to content

Commit

Permalink
try simplifying levels
Browse files Browse the repository at this point in the history
  • Loading branch information
mattkappel committed Oct 16, 2023
1 parent 674f2dd commit fb14205
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 18 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Stage 1: Start with the Apache Spark base image
FROM apache/spark as spark-base
#FROM apache/spark as spark-base

# Stage 2: Start with the miniconda3 base image
FROM continuumio/miniconda3 as conda-base
Expand All @@ -18,7 +18,8 @@ RUN conda init bash \
&& conda create -n pvs_like_case_study_spark_local --file=pvs_like_case_study_spark_local_lock_no_jupyter.txt

# Final Stage: Copy from the previous stages
FROM spark-base
#FROM spark-base
FROM apache/spark as spark-base

# COPY --from=conda-base /opt/conda/envs/pvs_like_case_study_spark_local /opt/conda/envs/pvs_like_case_study_spark_local
COPY --from=conda-base /opt/conda /opt/conda
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
FROM ubuntu:23.04
ENV PATH="/root/miniconda3/bin:${PATH}"
ARG PATH="/root/miniconda3/bin:${PATH}"
RUN apt-get update
# Stage 1: Start with the Apache Spark base image
FROM continuumio/miniconda3 as conda-base
FROM apache/spark as spark-base

RUN apt-get install -y wget && rm -rf /var/lib/apt/lists/*
COPY --from=conda-base /opt/conda /opt/conda

# Install Spark and Java
RUN apt-get install -y openjdk-8-jdk-headless -qq > /dev/null && \
wget -q https://downloads.apache.org/spark/spark-3.0.1/spark-3.0.1-bin-hadoop2.7.tgz && \
tar xf spark-3.0.1-bin-hadoop2.7.tgz && \
rm spark-3.0.1-bin-hadoop2.7.tgz

RUN wget \
https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
&& mkdir /root/.conda \
&& bash Miniconda3-latest-Linux-x86_64.sh -b \
&& rm -f Miniconda3-latest-Linux-x86_64.sh
RUN conda --version

# Setup I/O directories and copy the environment and Python script
RUN mkdir -p /input_data
RUN mkdir -p /results
VOLUME /results
VOLUME /input_data
COPY pvs_like_case_study_sample_data_spark_local.py pvs_like_case_study_spark_local_lock_no_jupyter.txt ./

# Create a new conda environment
SHELL ["/bin/bash", "--login", "-c"]
RUN conda init bash \
&& . ~/.bashrc \
&& conda create -n pvs_like_case_study_spark_local --file=pvs_like_case_study_spark_local_lock_no_jupyter.txt

# Set PATH for conda environment and conda itself
ENV PATH=/opt/conda/envs/pvs_like_case_study_spark_local/bin:/opt/conda/condabin:${PATH}

# Run your script with Spark on startup
CMD ["/bin/bash", "-c", "conda run -n pvs_like_case_study_spark_local python pvs_like_case_study_sample_data_spark_local.py \
&& mv census_2030_with_piks_sample.parquet /results/"]

0 comments on commit fb14205

Please sign in to comment.