Skip to content

Commit

Permalink
[CELEBORN-1116] Read authentication configs from HADOOP_CONF_DIR
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
1. Make Celeborn read configs from HADOOP_COND_DIR.
2. Remove unnecessary Kerberos configs.

### Why are the changes needed?
To support HDFS with Kerberos.

### Does this PR introduce _any_ user-facing change?
NO.

### How was this patch tested?
GA and cluster.

Closes #2082 from FMX/B1116.

Lead-authored-by: mingji <[email protected]>
Co-authored-by: Fu Chen <[email protected]>
Co-authored-by: Cheng Pan <[email protected]>
Co-authored-by: Ethan Feng <[email protected]>
Signed-off-by: zky.zhoukeyong <[email protected]>
  • Loading branch information
3 people authored and waitinfuture committed Nov 9, 2023
1 parent 78c5b37 commit 2ee95cc
Show file tree
Hide file tree
Showing 7 changed files with 19 additions and 27 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ celeborn.rpc.askTimeout 240s
celeborn.worker.flusher.hdfs.buffer.size 4m
celeborn.storage.hdfs.dir hdfs://<namenode>/celeborn
celeborn.worker.replicate.fastFail.duration 240s
# Either principal/keytab or valid TGT cache is required to access kerberized HDFS
celeborn.storage.hdfs.kerberos.principal user@REALM
celeborn.storage.hdfs.kerberos.keytab /path/to/user.keytab

# If your hosts have disk raid or use lvm, set celeborn.worker.monitor.disk.enabled to false
celeborn.worker.monitor.disk.enabled false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1066,7 +1066,6 @@ class CelebornConf(loadDefaults: Boolean) extends Cloneable with Logging with Se
// //////////////////////////////////////////////////////
// kerberos //
// //////////////////////////////////////////////////////
def hdfsStorageKerberosEnabled = get(HDFS_STORAGE_TYPE_KERBEROS_ENABLED)
def hdfsStorageKerberosPrincipal = get(HDFS_STORAGE_KERBEROS_PRINCIPAL)
def hdfsStorageKerberosKeytab = get(HDFS_STORAGE_KERBEROS_KEYTAB)
}
Expand Down Expand Up @@ -3993,14 +3992,6 @@ object CelebornConf extends Logging {
.intConf
.createWithDefault(64)

val HDFS_STORAGE_TYPE_KERBEROS_ENABLED: ConfigEntry[Boolean] =
buildConf("celeborn.storage.hdfs.kerberos.enabled")
.categories("master", "worker")
.version("0.3.2")
.doc("Whether to enable kerberos authentication for HDFS storage connection.")
.booleanConf
.createWithDefault(false)

val HDFS_STORAGE_KERBEROS_PRINCIPAL: OptionalConfigEntry[String] =
buildConf("celeborn.storage.hdfs.kerberos.principal")
.categories("master", "worker")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,21 +77,20 @@ object CelebornHadoopUtils extends Logging {
}

def initKerberos(conf: CelebornConf, hadoopConf: Configuration): Unit = {
// If we are accessing HDFS and it has Kerberos enabled, we have to login
// from a keytab file so that we can access HDFS beyond the kerberos ticket expiration.
UserGroupInformation.setConfiguration(hadoopConf)
if (conf.hdfsStorageKerberosEnabled) {
val principal = conf.hdfsStorageKerberosPrincipal
.getOrElse(throw new NoSuchElementException(
CelebornConf.HDFS_STORAGE_KERBEROS_PRINCIPAL.key))
val keytab = conf.hdfsStorageKerberosKeytab
.getOrElse(throw new NoSuchElementException(CelebornConf.HDFS_STORAGE_KERBEROS_KEYTAB.key))
if (!new File(keytab).exists()) {
throw new CelebornException(s"Keytab file: ${keytab} does not exist")
} else {
logInfo("Attempting to login to Kerberos " +
s"using principal: ${principal} and keytab: ${keytab}")
UserGroupInformation.loginUserFromKeytab(principal, keytab)
if ("kerberos".equals(hadoopConf.get("hadoop.security.authentication").toLowerCase)) {
(conf.hdfsStorageKerberosPrincipal, conf.hdfsStorageKerberosKeytab) match {
case (Some(principal), Some(keytab)) =>
logInfo(
s"Attempting to login to Kerberos using principal: $principal and keytab: $keytab")
if (!new File(keytab).exists()) {
throw new CelebornException(s"Keytab file: $keytab does not exist")
}
UserGroupInformation.loginUserFromKeytab(principal, keytab)
case _ =>
logInfo("Kerberos is enabled without principal and keytab supplied," +
" assuming keytab is managed externally")
UserGroupInformation.getCurrentUser()
}
}
}
Expand Down
1 change: 0 additions & 1 deletion docs/configuration/master.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ license: |
| celeborn.master.workerUnavailableInfo.expireTimeout | 1800s | Worker unavailable info would be cleared when the retention period is expired | 0.3.1 |
| celeborn.storage.availableTypes | HDD | Enabled storages. Available options: MEMORY,HDD,SSD,HDFS. Note: HDD and SSD would be treated as identical. | 0.3.0 |
| celeborn.storage.hdfs.dir | &lt;undefined&gt; | HDFS base directory for Celeborn to store shuffle data. | 0.2.0 |
| celeborn.storage.hdfs.kerberos.enabled | false | Whether to enable kerberos authentication for HDFS storage connection. | 0.3.2 |
| celeborn.storage.hdfs.kerberos.keytab | &lt;undefined&gt; | Kerberos keytab file path for HDFS storage connection. | 0.3.2 |
| celeborn.storage.hdfs.kerberos.principal | &lt;undefined&gt; | Kerberos principal for HDFS storage connection. | 0.3.2 |
<!--end-include-->
1 change: 0 additions & 1 deletion docs/configuration/worker.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ license: |
| celeborn.shuffle.chunk.size | 8m | Max chunk size of reducer's merged shuffle data. For example, if a reducer's shuffle data is 128M and the data will need 16 fetch chunk requests to fetch. | 0.2.0 |
| celeborn.storage.availableTypes | HDD | Enabled storages. Available options: MEMORY,HDD,SSD,HDFS. Note: HDD and SSD would be treated as identical. | 0.3.0 |
| celeborn.storage.hdfs.dir | &lt;undefined&gt; | HDFS base directory for Celeborn to store shuffle data. | 0.2.0 |
| celeborn.storage.hdfs.kerberos.enabled | false | Whether to enable kerberos authentication for HDFS storage connection. | 0.3.2 |
| celeborn.storage.hdfs.kerberos.keytab | &lt;undefined&gt; | Kerberos keytab file path for HDFS storage connection. | 0.3.2 |
| celeborn.storage.hdfs.kerberos.principal | &lt;undefined&gt; | Kerberos principal for HDFS storage connection. | 0.3.2 |
| celeborn.worker.activeConnection.max | &lt;undefined&gt; | If the number of active connections on a worker exceeds this configuration value, the worker will be marked as high-load in the heartbeat report, and the master will not include that node in the response of RequestSlots. | 0.3.1 |
Expand Down
3 changes: 3 additions & 0 deletions docs/deploy.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ celeborn.rpc.askTimeout 240s
celeborn.worker.flusher.hdfs.buffer.size 4m
celeborn.storage.hdfs.dir hdfs://<namenode>/celeborn
celeborn.worker.replicate.fastFail.duration 240s
# Either principal/keytab or valid TGT cache is required to access kerberized HDFS
celeborn.storage.hdfs.kerberos.principal user@REALM
celeborn.storage.hdfs.kerberos.keytab /path/to/user.keytab

# If your hosts have disk raid or use lvm, set celeborn.worker.monitor.disk.enabled to false
celeborn.worker.monitor.disk.enabled false
Expand Down
2 changes: 0 additions & 2 deletions sbin/load-celeborn-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
# limitations under the License.
#

unset HADOOP_CONF_DIR

# included in all the celeborn scripts with source command
# should not be executable directly
# also should not be passed any arguments, since we need original $*
Expand Down

0 comments on commit 2ee95cc

Please sign in to comment.