Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect multiple jars on the classpath when init plugin [databricks] #9654

Merged
merged 35 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
2f1fae2
Detect multiple jars on the classpath when init plugin
thirtiseven Nov 7, 2023
0c5df62
clean up
thirtiseven Nov 7, 2023
742b508
Apply suggestions from code review
thirtiseven Nov 8, 2023
47ef387
print version info and also check jni/cudf
thirtiseven Nov 8, 2023
171c016
print version info and also check jni/cudf
thirtiseven Nov 8, 2023
e29e37d
add config for allowing multiple jars
thirtiseven Nov 8, 2023
0441f53
keep jar path in error messages
thirtiseven Nov 8, 2023
ded091e
address comments
thirtiseven Nov 9, 2023
42fc474
Update sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala
thirtiseven Nov 9, 2023
6bbedfb
Update sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala
thirtiseven Nov 9, 2023
d9264d8
address comments
thirtiseven Nov 9, 2023
5c108bb
Merge branch 'NVIDIA:branch-23.12' into detect_multiple_jars
thirtiseven Nov 13, 2023
c2a0402
Use unique properties for intermediate jars
thirtiseven Nov 13, 2023
14c536d
clean up
thirtiseven Nov 13, 2023
b099358
address comment
thirtiseven Nov 13, 2023
74010e0
Apply suggestions from code review
thirtiseven Nov 14, 2023
fd129c9
address comments
thirtiseven Nov 14, 2023
5e292c4
add the project.artifactId to build-info and check it
thirtiseven Nov 14, 2023
15022d7
remove unnecessary copyright update
thirtiseven Nov 14, 2023
28c8dcd
remove log
thirtiseven Nov 14, 2023
3dc12e4
Add 2.13 support
thirtiseven Nov 14, 2023
14fe6fc
use revision to check duplicate jars
thirtiseven Nov 15, 2023
5dcc15f
fix 2.13 build
thirtiseven Nov 15, 2023
08f4088
support both SAME_REVISION and NEVER mode
thirtiseven Nov 22, 2023
0ca06b5
Avoid CI change and filter out test
thirtiseven Nov 22, 2023
c7ca0ad
check values for config
thirtiseven Nov 23, 2023
a833c73
use enum
thirtiseven Nov 23, 2023
f5bd0c9
fix two nits
thirtiseven Nov 24, 2023
405bf05
Merge branch 'branch-23.12' into detect_multiple_jars
thirtiseven Nov 24, 2023
7d11a4f
Do not print log if no multiple jar
thirtiseven Nov 24, 2023
c4eef25
ignore subdir when checking multiple jars
thirtiseven Nov 27, 2023
bf4477a
Update sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala
thirtiseven Nov 28, 2023
39e0b56
wip ut
thirtiseven Nov 27, 2023
c3279f4
address comment
thirtiseven Nov 28, 2023
70b1247
Merge branch 'branch-24.02' into detect_multiple_jars
thirtiseven Nov 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package com.nvidia.spark.rapids

import java.lang.reflect.InvocationTargetException
import java.net.URL
import java.time.ZoneId
import java.util.Properties

Expand All @@ -25,6 +26,7 @@ import scala.sys.process._
import scala.util.Try

import ai.rapids.cudf.{Cuda, CudaException, CudaFatalException, CudfException, MemoryCleaner}
import com.nvidia.spark.rapids.RapidsConf.AllowMultipleJars
import com.nvidia.spark.rapids.filecache.{FileCache, FileCacheLocalityManager, FileCacheLocalityMsg}
import com.nvidia.spark.rapids.python.PythonWorkerSemaphore
import org.apache.commons.lang3.exception.ExceptionUtils
Expand Down Expand Up @@ -111,6 +113,67 @@ object RapidsPluginUtils extends Logging {
}
}

private def detectMultipleJar(propName: String, jarName: String, conf: RapidsConf): Unit = {
val classloader = ShimLoader.getShimClassLoader()
val possibleRapidsJarURLs = classloader.getResources(propName).asScala.toSet.toSeq.filter {
url => {
val urlPath = url.toString
// Filter out submodule jars, e.g. rapids-4-spark-aggregator_2.12-23.12.0-spark341.jar,
// and files stored under subdirs of '!/', e.g.
// rapids-4-spark_2.12-23.12.0-cuda11.jar!/spark330/rapids4spark-version-info.properties
// We only want to find the main jar, e.g.
// rapids-4-spark_2.12-23.12.0-cuda11.jar!/rapids4spark-version-info.properties
!urlPath.contains("rapids-4-spark-") && urlPath.endsWith("!/" + propName)
}
}
val revisionRegex = "revision=(.*)".r
val revisionMap: Map[String, Seq[URL]] = possibleRapidsJarURLs.map { url =>
val versionInfo = scala.io.Source.fromURL(url).getLines().toSeq
val revision = versionInfo
.collect {
case revisionRegex(revision) => revision
}
.headOption
.getOrElse("UNKNOWN")
(revision, url)
}.groupBy(_._1).mapValues(_.map(_._2)).toMap
lazy val rapidsJarsVersMsg = revisionMap.map {
case (revision, urls) => {
s"revison: $revision" + urls.map {
url => "\n\tjar URL: " + url.toString.split("!").head + "\n\t" +
scala.io.Source.fromURL(url).getLines().toSeq.mkString("\n\t")
}.mkString + "\n"
}
}.mkString
// scalastyle:off line.size.limit
lazy val msg = s"""Multiple $jarName jars found in the classpath:
|$rapidsJarsVersMsg
|Please make sure there is only one $jarName jar in the classpath.
|If it is impossible to fix the classpath you can suppress the error by setting ${RapidsConf.ALLOW_MULTIPLE_JARS.key} to SAME_REVISION or ALWAYS.
""".stripMargin
// scalastyle:on line.size.limit

conf.allowMultipleJars match {
firestarman marked this conversation as resolved.
Show resolved Hide resolved
case AllowMultipleJars.ALWAYS =>
if (revisionMap.size != 1 || revisionMap.values.exists(_.size != 1)) {
logWarning(msg)
}
case AllowMultipleJars.SAME_REVISION =>
require(revisionMap.size == 1, msg)
if (revisionMap.values.exists(_.size != 1)) {
logWarning(msg)
}
case AllowMultipleJars.NEVER =>
require(revisionMap.size == 1 && revisionMap.values.forall(_.size == 1), msg)
}
}

def detectMultipleJars(conf: RapidsConf): Unit = {
detectMultipleJar(PLUGIN_PROPS_FILENAME, "rapids-4-spark", conf)
detectMultipleJar(JNI_PROPS_FILENAME, "spark-rapids-jni", conf)
detectMultipleJar(CUDF_PROPS_FILENAME, "cudf", conf)
}

// This assumes Apache Spark logic, if CSPs are setting defaults differently, we may need
// to handle.
def estimateCoresOnExec(conf: SparkConf): Int = {
Expand Down Expand Up @@ -309,6 +372,7 @@ class RapidsDriverPlugin extends DriverPlugin with Logging {
val sparkConf = pluginContext.conf
RapidsPluginUtils.fixupConfigsOnDriver(sparkConf)
val conf = new RapidsConf(sparkConf)
RapidsPluginUtils.detectMultipleJars(conf)
RapidsPluginUtils.logPluginMode(conf)
GpuCoreDumpHandler.driverInit(sc, conf)

Expand Down Expand Up @@ -363,6 +427,9 @@ class RapidsExecutorPlugin extends ExecutorPlugin with Logging {
val numCores = RapidsPluginUtils.estimateCoresOnExec(sparkConf)
val conf = new RapidsConf(extraConf.asScala.toMap)

// Fail if there are multiple plugin jars in the classpath.
RapidsPluginUtils.detectMultipleJars(conf)

// Compare if the cudf version mentioned in the classpath is equal to the version which
// plugin expects. If there is a version mismatch, throw error. This check can be disabled
// by setting this config spark.rapids.cudfVersionOverride=true
Expand Down
27 changes: 27 additions & 0 deletions sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1840,6 +1840,22 @@ object RapidsConf {
.booleanConf
.createWithDefault(false)

object AllowMultipleJars extends Enumeration {
val ALWAYS, SAME_REVISION, NEVER = Value
}

val ALLOW_MULTIPLE_JARS = conf("spark.rapids.sql.allowMultipleJars")
.internal()
.startupOnly()
.doc("Allow multiple rapids-4-spark, spark-rapids-jni, and cudf jars on the classpath. " +
"Spark will take the first one it finds, so the version may not be expected. Possisble " +
"values are ALWAYS: allow all jars, SAME_REVISION: only allow jars with the same " +
"revision, NEVER: do not allow multiple jars at all.")
.stringConf
.transform(_.toUpperCase(java.util.Locale.ROOT))
.checkValues(AllowMultipleJars.values.map(_.toString))
.createWithDefault(AllowMultipleJars.SAME_REVISION.toString)

val ALLOW_DISABLE_ENTIRE_PLAN = conf("spark.rapids.allowDisableEntirePlan")
.internal()
.doc("The plugin has the ability to detect possibe incompatibility with some specific " +
Expand Down Expand Up @@ -2641,6 +2657,17 @@ class RapidsConf(conf: Map[String, String]) extends Logging {

lazy val cudfVersionOverride: Boolean = get(CUDF_VERSION_OVERRIDE)

lazy val allowMultipleJars: AllowMultipleJars.Value = {
get(ALLOW_MULTIPLE_JARS) match {
case "ALWAYS" => AllowMultipleJars.ALWAYS
case "NEVER" => AllowMultipleJars.NEVER
case "SAME_REVISION" => AllowMultipleJars.SAME_REVISION
case other =>
throw new IllegalArgumentException(s"Internal Error $other is not supported for " +
s"${ALLOW_MULTIPLE_JARS.key}")
}
}

lazy val allowDisableEntirePlan: Boolean = get(ALLOW_DISABLE_ENTIRE_PLAN)

lazy val useArrowCopyOptimization: Boolean = get(USE_ARROW_OPT)
Expand Down