Skip to content

Commit

Permalink
[HUDI-8855] Add bucket properties for spark bucket index query pruning
Browse files Browse the repository at this point in the history
  • Loading branch information
xicm committed Jan 10, 2025
1 parent dc001ea commit 88aa176
Showing 1 changed file with 8 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ import org.apache.hudi.util.JFunction
import org.apache.hadoop.fs.{FileStatus, Path}
import org.apache.spark.internal.Logging
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable
import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
import org.apache.spark.sql.execution.datasources.{FileIndex, FileStatusCache, NoopCache, PartitionDirectory}
import org.apache.spark.sql.hudi.HoodieSqlCommonUtils
Expand Down Expand Up @@ -534,6 +535,12 @@ object HoodieFileIndex extends Logging {
properties.setProperty(RECORDKEY_FIELD.key, tableConfig.getRecordKeyFields.orElse(Array.empty).mkString(","))
properties.setProperty(PRECOMBINE_FIELD.key, Option(tableConfig.getPreCombineField).getOrElse(""))
properties.setProperty(PARTITIONPATH_FIELD.key, HoodieTableConfig.getPartitionFieldPropForKeyGenerator(tableConfig).orElse(""))

// for simple bucket index, we need to set the INDEX_TYPE, BUCKET_INDEX_HASH_FIELD, BUCKET_INDEX_NUM_BUCKETS
val dataBase = tableConfig.getDatabaseName
val tableIdentifier = if (dataBase != null && dataBase.nonEmpty) TableIdentifier(tableConfig.getTableName, Some(dataBase)) else TableIdentifier(dataBase)
val table = HoodieCatalogTable(spark, tableIdentifier)
table.catalogProperties.foreach(kv => properties.setProperty(kv._1, kv._2))
}

properties
Expand Down

0 comments on commit 88aa176

Please sign in to comment.