Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CELEBORN-1116] Read authentication configs from HADOOP_CONF_DIR #2082

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ celeborn.rpc.askTimeout 240s
celeborn.worker.flusher.hdfs.buffer.size 4m
celeborn.storage.hdfs.dir hdfs://<namenode>/celeborn
celeborn.worker.replicate.fastFail.duration 240s
# If you HDFS is enabled with Kerberos.
# You will need to set the following configs or use kinit to get valid TGT.
celeborn.storage.hdfs.kerberos.principal user@REALM
celeborn.storage.hdfs.kerberos.keytab /path/to/user.keytab

# If your hosts have disk raid or use lvm, set celeborn.worker.monitor.disk.enabled to false
celeborn.worker.monitor.disk.enabled false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1069,7 +1069,6 @@ class CelebornConf(loadDefaults: Boolean) extends Cloneable with Logging with Se
// //////////////////////////////////////////////////////
// kerberos //
// //////////////////////////////////////////////////////
def hdfsStorageKerberosEnabled = get(HDFS_STORAGE_TYPE_KERBEROS_ENABLED)
def hdfsStorageKerberosPrincipal = get(HDFS_STORAGE_KERBEROS_PRINCIPAL)
def hdfsStorageKerberosKeytab = get(HDFS_STORAGE_KERBEROS_KEYTAB)
}
Expand Down Expand Up @@ -4017,14 +4016,6 @@ object CelebornConf extends Logging {
.intConf
.createWithDefault(64)

val HDFS_STORAGE_TYPE_KERBEROS_ENABLED: ConfigEntry[Boolean] =
buildConf("celeborn.storage.hdfs.kerberos.enabled")
.categories("master", "worker")
.version("0.3.2")
.doc("Whether to enable kerberos authentication for HDFS storage connection.")
.booleanConf
.createWithDefault(false)

val HDFS_STORAGE_KERBEROS_PRINCIPAL: OptionalConfigEntry[String] =
buildConf("celeborn.storage.hdfs.kerberos.principal")
.categories("master", "worker")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,21 +77,22 @@ object CelebornHadoopUtils extends Logging {
}

def initKerberos(conf: CelebornConf, hadoopConf: Configuration): Unit = {
// If we are accessing HDFS and it has Kerberos enabled, we have to login
// from a keytab file so that we can access HDFS beyond the kerberos ticket expiration.
UserGroupInformation.setConfiguration(hadoopConf)
if (conf.hdfsStorageKerberosEnabled) {
val principal = conf.hdfsStorageKerberosPrincipal
.getOrElse(throw new NoSuchElementException(
CelebornConf.HDFS_STORAGE_KERBEROS_PRINCIPAL.key))
val keytab = conf.hdfsStorageKerberosKeytab
.getOrElse(throw new NoSuchElementException(CelebornConf.HDFS_STORAGE_KERBEROS_KEYTAB.key))
if (!new File(keytab).exists()) {
throw new CelebornException(s"Keytab file: ${keytab} does not exist")
} else {
logInfo("Attempting to login to Kerberos " +
s"using principal: ${principal} and keytab: ${keytab}")
UserGroupInformation.loginUserFromKeytab(principal, keytab)
if ("kerberos".equals(hadoopConf.get("hadoop.security.authentication").toLowerCase)) {
val principalOpt = conf.hdfsStorageKerberosPrincipal
val keytabOpt = conf.hdfsStorageKerberosKeytab
(principalOpt, keytabOpt) match {
pan3793 marked this conversation as resolved.
Show resolved Hide resolved
case (Some(principal), Some(keytab)) =>
logInfo("Attempting to login to Kerberos " +
s"using principal: ${principal} and keytab: ${keytab}")
cfmcgrady marked this conversation as resolved.
Show resolved Hide resolved
if (!new File(keytab).exists()) {
throw new CelebornException(s"Keytab file: ${keytab} does not exist")
cfmcgrady marked this conversation as resolved.
Show resolved Hide resolved
}
UserGroupInformation.loginUserFromKeytab(principal, keytab)
case _ =>
logInfo("Kerberos is enabled without principal and keytab supplied," +
" assuming keytab is managed externally")
UserGroupInformation.getCurrentUser()
}
}
}
Expand Down
1 change: 0 additions & 1 deletion docs/configuration/master.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ license: |
| celeborn.master.workerUnavailableInfo.expireTimeout | 1800s | Worker unavailable info would be cleared when the retention period is expired | 0.3.1 |
| celeborn.storage.availableTypes | HDD | Enabled storages. Available options: MEMORY,HDD,SSD,HDFS. Note: HDD and SSD would be treated as identical. | 0.3.0 |
| celeborn.storage.hdfs.dir | &lt;undefined&gt; | HDFS base directory for Celeborn to store shuffle data. | 0.2.0 |
| celeborn.storage.hdfs.kerberos.enabled | false | Whether to enable kerberos authentication for HDFS storage connection. | 0.3.2 |
| celeborn.storage.hdfs.kerberos.keytab | &lt;undefined&gt; | Kerberos keytab file path for HDFS storage connection. | 0.3.2 |
| celeborn.storage.hdfs.kerberos.principal | &lt;undefined&gt; | Kerberos principal for HDFS storage connection. | 0.3.2 |
<!--end-include-->
1 change: 0 additions & 1 deletion docs/configuration/worker.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ license: |
| celeborn.shuffle.chunk.size | 8m | Max chunk size of reducer's merged shuffle data. For example, if a reducer's shuffle data is 128M and the data will need 16 fetch chunk requests to fetch. | 0.2.0 |
| celeborn.storage.availableTypes | HDD | Enabled storages. Available options: MEMORY,HDD,SSD,HDFS. Note: HDD and SSD would be treated as identical. | 0.3.0 |
| celeborn.storage.hdfs.dir | &lt;undefined&gt; | HDFS base directory for Celeborn to store shuffle data. | 0.2.0 |
| celeborn.storage.hdfs.kerberos.enabled | false | Whether to enable kerberos authentication for HDFS storage connection. | 0.3.2 |
| celeborn.storage.hdfs.kerberos.keytab | &lt;undefined&gt; | Kerberos keytab file path for HDFS storage connection. | 0.3.2 |
| celeborn.storage.hdfs.kerberos.principal | &lt;undefined&gt; | Kerberos principal for HDFS storage connection. | 0.3.2 |
| celeborn.worker.activeConnection.max | &lt;undefined&gt; | If the number of active connections on a worker exceeds this configuration value, the worker will be marked as high-load in the heartbeat report, and the master will not include that node in the response of RequestSlots. | 0.3.1 |
Expand Down
4 changes: 4 additions & 0 deletions docs/deploy.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ celeborn.rpc.askTimeout 240s
celeborn.worker.flusher.hdfs.buffer.size 4m
celeborn.storage.hdfs.dir hdfs://<namenode>/celeborn
celeborn.worker.replicate.fastFail.duration 240s
# If you HDFS is enabled with Kerberos.
# You will need to set the following configs or use kinit to get valid TGT.
FMX marked this conversation as resolved.
Show resolved Hide resolved
celeborn.storage.hdfs.kerberos.principal user@REALM
celeborn.storage.hdfs.kerberos.keytab /path/to/user.keytab

# If your hosts have disk raid or use lvm, set celeborn.worker.monitor.disk.enabled to false
celeborn.worker.monitor.disk.enabled false
Expand Down
2 changes: 0 additions & 2 deletions sbin/load-celeborn-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
# limitations under the License.
#

unset HADOOP_CONF_DIR

# included in all the celeborn scripts with source command
# should not be executable directly
# also should not be passed any arguments, since we need original $*
Expand Down
Loading