Merge branch 'master' into vignette

dmlc · Jan 13, 2025 · da4325d · da4325d
2 parents 6b921f0 + c3aa7fe
commit da4325d
Show file tree

Hide file tree

Showing 53 changed files with 541 additions and 584 deletions.
diff --git a/.github/workflows/jvm_tests.yml b/.github/workflows/jvm_tests.yml
@@ -247,10 +247,10 @@ jobs:
       matrix:
         variant:
           - name: cpu
-            container_id: xgb-ci.jvm
+            image_repo: xgb-ci.jvm
             artifact_from: build-test-jvm-packages
           - name: gpu
-            container_id: xgb-ci.jvm_gpu_build
+            image_repo: xgb-ci.jvm_gpu_build
             artifact_from: build-jvm-gpu
         scala_version: ['2.12', '2.13']
     steps:
@@ -272,4 +272,4 @@ jobs:
       - name: Deploy JVM packages to S3
         run: |
           bash ops/pipeline/deploy-jvm-packages.sh ${{ matrix.variant.name }} \
-            ${{ matrix.variant.container_id }} ${{ matrix.scala_version }}
+            ${{ matrix.variant.image_repo }} ${{ matrix.scala_version }}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -220,22 +220,22 @@ jobs:
       matrix:
         include:
           - description: single-gpu
-            container: xgb-ci.gpu
+            image_repo: xgb-ci.gpu
             suite: gpu
             runner: linux-amd64-gpu
             artifact_from: build-cuda
           - description: multiple-gpu
-            container: xgb-ci.gpu
+            image_repo: xgb-ci.gpu
             suite: mgpu
             runner: linux-amd64-mgpu
             artifact_from: build-cuda
           - description: cpu-amd64
-            container: xgb-ci.cpu
+            image_repo: xgb-ci.cpu
             suite: cpu
             runner: linux-amd64-cpu
             artifact_from: build-cuda
           - description: cpu-arm64
-            container: xgb-ci.aarch64
+            image_repo: xgb-ci.aarch64
             suite: cpu-arm64
             runner: linux-arm64-cpu
             artifact_from: build-cpu-arm64
@@ -257,4 +257,4 @@ jobs:
           mv -v wheelhouse/xgboost .
           chmod +x ./xgboost
       - name: Run Python tests, ${{ matrix.description }}
-        run: bash ops/pipeline/test-python-wheel.sh ${{ matrix.suite }} ${{ matrix.container }}
+        run: bash ops/pipeline/test-python-wheel.sh ${{ matrix.suite }} ${{ matrix.image_repo }}
diff --git a/R-package/R/utils.R b/R-package/R/utils.R
@@ -560,6 +560,29 @@ deprecated_train_params <- list(
   ),
   removed = character()
 )
+deprecated_xgboost_params <- list(
+  renamed = list(
+    'data' = 'x',
+    'label' = 'y',
+    'eta' = 'learning_rate',
+    'gamma' = 'min_split_loss',
+    'lambda' = 'reg_lambda',
+    'alpha' = 'reg_alpha',
+    'min.split.loss' = 'min_split_loss',
+    'reg.lambda' = 'reg_lambda',
+    'reg.alpha' = 'reg_alpha',
+    'watchlist' = 'evals'
+  ),
+  removed = c(
+    'params',
+    'save_period',
+    'save_name',
+    'xgb_model',
+    'callbacks',
+    'missing',
+    'maximize'
+  )
+)
 deprecated_dttree_params <- list(
   renamed = list('n_first_tree' = 'trees'),
   removed = c("feature_names", "text")

diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R
@@ -1066,6 +1066,7 @@ check.early.stopping.rounds <- function(early_stopping_rounds, eval_set) {
 #' - `"shotgun"`: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run.
 #' - `"coord_descent"`: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. When the `device` parameter is set to `"cuda"` or `"gpu"`, a GPU variant would be used.
 #' @inheritParams xgb.params
+#' @inheritParams xgb.train
 #' @return A model object, inheriting from both `xgboost` and `xgb.Booster`. Compared to the regular
 #'   `xgb.Booster` model class produced by [xgb.train()], this `xgboost` class will have an
 #'
@@ -1163,9 +1164,11 @@ xgboost <- function(
   tweedie_variance_power = NULL,
   huber_slope = NULL,
   quantile_alpha = NULL,
-  aft_loss_distribution = NULL
+  aft_loss_distribution = NULL,
+  ...
 ) {
 # nolint end
+  check.deprecation(deprecated_xgboost_params, match.call(), ...)
   params <- as.list(environment())
   params <- params[
     (names(params) %in% formalArgs(xgb.params))

diff --git a/R-package/configure b/R-package/configure
@@ -3338,7 +3338,7 @@ printf "%s\n" "${ac_pkg_openmp}" >&6; }
     OPENMP_LIB=''
     echo '*****************************************************************************************'
     echo '         OpenMP is unavailable on this Mac OSX system. Training speed may be suboptimal.'
-    echo '         To use all CPU cores for training jobs, you should install OpenMP by running\n'
+    echo '         To use all CPU cores for training jobs, you should install OpenMP by running'
     echo '             brew install libomp'
     echo '*****************************************************************************************'
   fi

diff --git a/R-package/configure.ac b/R-package/configure.ac
@@ -108,7 +108,7 @@ then
     OPENMP_LIB=''
     echo '*****************************************************************************************'
     echo '         OpenMP is unavailable on this Mac OSX system. Training speed may be suboptimal.'
-    echo '         To use all CPU cores for training jobs, you should install OpenMP by running\n'
+    echo '         To use all CPU cores for training jobs, you should install OpenMP by running'
     echo '             brew install libomp'
     echo '*****************************************************************************************'
   fi

diff --git a/R-package/man/xgboost.Rd b/R-package/man/xgboost.Rd
diff --git a/README.md b/README.md
@@ -49,8 +49,8 @@ Become a sponsor and get a logo here. See details at [Sponsoring the XGBoost Pro
 [[Become a sponsor](https://opencollective.com/xgboost#sponsor)]
 
 <a href="https://www.nvidia.com/en-us/" target="_blank"><img src="https://raw.githubusercontent.com/xgboost-ai/xgboost-ai.github.io/master/images/sponsors/nvidia.jpg" alt="NVIDIA" width="72" height="72"></a>
-<a href="https://www.intel.com/" target="_blank"><img src="https://images.opencollective.com/intel-corporation/2fa85c1/logo/256.png" width="72" height="72"></a>
 <a href="https://www.comet.com/site/?utm_source=xgboost&utm_medium=github&utm_content=readme" target="_blank"><img src="https://cdn.comet.ml/img/notebook_logo.png" height="72"></a>
+<a href="https://opencollective.com/guest-f5ebfc79" target="_blank"><img src="https://images.opencollective.com/guest-f5ebfc79/avatar/256.png" height="72"></a>
 
 ### Backers
 [[Become a backer](https://opencollective.com/xgboost#backer)]

diff --git a/doc/contrib/ci.rst b/doc/contrib/ci.rst
@@ -44,16 +44,17 @@ To make changes to the CI container, carry out the following steps:
 4. Submit a pull request to `dmlc/xgboost-devops <https://github.com/dmlc/xgboost-devops>`_ with
    the proposed changes to the Dockerfile. Make note of the pull request number. Example: ``#204``
 5. Clone `dmlc/xgboost <https://github.com/dmlc/xgboost>`_ and update all references to the
-   old container to point to the new container. More specifically, all Docker tags of format
-   ``492475357299.dkr.ecr.us-west-2.amazonaws.com/[container_id]:main`` should have the last
-   component replaced with ``PR-#``, where ``#`` is the pull request number. For the example above,
+   old container to point to the new container. More specifically, all container image URIs of form
+   ``492475357299.dkr.ecr.us-west-2.amazonaws.com/[image_repo]:main`` should have its image tag
+   (last component) replaced with ``PR-#``, where ``#`` is the pull request number.
+   For the example above,
    we'd replace ``492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main`` with
    ``492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:PR-204``.
 6. Now submit a pull request to `dmlc/xgboost <https://github.com/dmlc/xgboost>`_. The CI will
    run tests using the new container. Verify that all tests pass.
 7. Merge the pull request in ``dmlc/xgboost-devops``. Wait until the CI completes on the ``main`` branch.
-8. Go back to the the pull request for ``dmlc/xgboost`` and change the container references back
-   to ``:main``.
+8. Go back to the the pull request for ``dmlc/xgboost`` and revise all the container references to use
+   the old tag ``:main``.
 9. Merge the pull request in ``dmlc/xgboost``.
 
 .. _build_run_docker_locally:
@@ -83,11 +84,12 @@ and invoke ``containers/docker_build.sh`` as follows:
   # For local testing, set them to "main"
   export GITHUB_SHA="main"
   export BRANCH_NAME="main"
-  bash containers/docker_build.sh CONTAINER_ID
+  bash containers/docker_build.sh IMAGE_REPO
 
-where ``CONTAINER_ID`` identifies for the container. The wrapper script will look up the YAML file
-``containers/ci_container.yml``. For example, when ``CONTAINER_ID`` is set to ``xgb-ci.gpu``,
-the script will use the corresponding entry from ``containers/ci_container.yml``:
+where ``IMAGE_REPO`` is the name of the container image. The wrapper script will look up the
+YAML file ``containers/ci_container.yml``. For example, when ``IMAGE_REPO`` is set to
+``xgb-ci.gpu``, the script will use the corresponding entry from
+``containers/ci_container.yml``:
 
 .. code-block:: yaml
 
@@ -113,10 +115,11 @@ the build arguments are:
 
 The build arguments provide inputs to the ``ARG`` instructions in the Dockerfile.
 
-When ``containers/docker_build.sh`` completes, you will have access to the container with tag
-``492475357299.dkr.ecr.us-west-2.amazonaws.com/[container_id]:main``. The prefix
-``492475357299.dkr.ecr.us-west-2.amazonaws.com/`` was added so that the container could
-later be uploaded to AWS Elastic Container Registry (ECR), a private Docker registry.
+When ``containers/docker_build.sh`` completes, you will have access to the container with the
+(fully qualified) URI ``492475357299.dkr.ecr.us-west-2.amazonaws.com/[image_repo]:main``.
+The prefix ``492475357299.dkr.ecr.us-west-2.amazonaws.com/`` was added so that
+the container could later be uploaded to AWS Elastic Container Registry (ECR),
+a private Docker registry.
 
 -----------------------------------------
 To run commands within a Docker container
@@ -126,7 +129,7 @@ Invoke ``ops/docker_run.py`` from the main ``dmlc/xgboost`` repo as follows:
 .. code-block:: bash
 
   python3 ops/docker_run.py \
-    --container-tag 492475357299.dkr.ecr.us-west-2.amazonaws.com/[container_id]:main \
+    --image-uri 492475357299.dkr.ecr.us-west-2.amazonaws.com/[image_repo]:[image_tag] \
     [--use-gpus] \
     -- "command to run inside the container"
 
@@ -138,12 +141,12 @@ For example:
 
   # Run without GPU
   python3 ops/docker_run.py \
-    --container-tag 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.cpu:main \
+    --image-uri 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.cpu:main \
     -- bash ops/pipeline/build-cpu-impl.sh cpu
 
   # Run with NVIDIA GPU
   python3 ops/docker_run.py \
-    --container-tag 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main \
+    --image-uri 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main \
     --use-gpus \
     -- bash ops/pipeline/test-python-wheel-impl.sh gpu
 
@@ -154,7 +157,7 @@ Optionally, you can specify ``--run-args`` to pass extra arguments to ``docker r
   # Allocate extra space in /dev/shm to enable NCCL
   # Also run the container with elevated privileges
   python3 ops/docker_run.py \
-    --container-tag 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main \
+    --image-uri 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main \
     --use-gpus \
     --run-args='--shm-size=4g --privileged' \
     -- bash ops/pipeline/test-python-wheel-impl.sh gpu
@@ -171,7 +174,7 @@ Examples: useful tasks for local development
 
     export DOCKER_REGISTRY=492475357299.dkr.ecr.us-west-2.amazonaws.com
     python3 ops/docker_run.py \
-      --container-tag ${DOCKER_REGISTRY}/xgb-ci.gpu_build_rockylinux8:main \
+      --image-uri ${DOCKER_REGISTRY}/xgb-ci.gpu_build_rockylinux8:main \
       -- ops/pipeline/build-cuda-impl.sh
 
 * Run Python tests
@@ -180,7 +183,7 @@ Examples: useful tasks for local development
 
     export DOCKER_REGISTRY=492475357299.dkr.ecr.us-west-2.amazonaws.com
     python3 ops/docker_run.py \
-      --container-tag ${DOCKER_REGISTRY}/xgb-ci.cpu:main \
+      --image-uri ${DOCKER_REGISTRY}/xgb-ci.cpu:main \
       -- ops/pipeline/test-python-wheel-impl.sh cpu
 
 * Run Python tests with GPU algorithm
@@ -189,7 +192,7 @@ Examples: useful tasks for local development
 
     export DOCKER_REGISTRY=492475357299.dkr.ecr.us-west-2.amazonaws.com
     python3 ops/docker_run.py \
-      --container-tag ${DOCKER_REGISTRY}/xgb-ci.gpu:main \
+      --image-uri ${DOCKER_REGISTRY}/xgb-ci.gpu:main \
       --use-gpus \
       -- ops/pipeline/test-python-wheel-impl.sh gpu
 
@@ -199,7 +202,7 @@ Examples: useful tasks for local development
 
     export DOCKER_REGISTRY=492475357299.dkr.ecr.us-west-2.amazonaws.com
     python3 ops/docker_run.py \
-      --container-tag ${DOCKER_REGISTRY}/xgb-ci.gpu:main \
+      --image-uri ${DOCKER_REGISTRY}/xgb-ci.gpu:main \
       --use-gpus \
       --run-args='--shm-size=4g' \
       -- ops/pipeline/test-python-wheel-impl.sh mgpu
@@ -212,7 +215,7 @@ Examples: useful tasks for local development
     export DOCKER_REGISTRY=492475357299.dkr.ecr.us-west-2.amazonaws.com
     export SCALA_VERSION=2.12  # Specify Scala version (2.12 or 2.13)
     python3 ops/docker_run.py \
-      --container-tag ${DOCKER_REGISTRY}/xgb-ci.jvm:main \
+      --image-uri ${DOCKER_REGISTRY}/xgb-ci.jvm:main \
       --run-args "-e SCALA_VERSION" \
       -- ops/pipeline/build-test-jvm-packages-impl.sh
 
@@ -224,7 +227,7 @@ Examples: useful tasks for local development
     export SCALA_VERSION=2.12  # Specify Scala version (2.12 or 2.13)
     export USE_CUDA=1
     python3 ops/docker_run.py \
-      --container-tag ${DOCKER_REGISTRY}/xgb-ci.jvm_gpu_build:main \
+      --image-uri ${DOCKER_REGISTRY}/xgb-ci.jvm_gpu_build:main \
       --use-gpus \
       --run-args "-e SCALA_VERSION -e USE_CUDA --shm-size=4g" \
       -- ops/pipeline/build-test-jvm-packages-impl.sh
@@ -456,7 +459,7 @@ For example, when you run ``bash containers/docker_build.sh xgb-ci.gpu``, the lo
 
   # docker_build.sh calls docker_build.py...
   python3 containers/docker_build.py --container-def gpu \
-    --container-tag 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main \
+    --image-uri 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main \
     --build-arg CUDA_VERSION_ARG=12.4.1 --build-arg NCCL_VERSION_ARG=2.23.4-1 \
     --build-arg RAPIDS_VERSION_ARG=24.10
 
@@ -480,14 +483,14 @@ Here is an example with ``docker_run.py``:
 
   # Run without GPU
   python3 ops/docker_run.py \
-    --container-tag 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.cpu:main \
+    --image-uri 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.cpu:main \
     -- bash ops/pipeline/build-cpu-impl.sh cpu
 
   # Run with NVIDIA GPU
   # Allocate extra space in /dev/shm to enable NCCL
   # Also run the container with elevated privileges
   python3 ops/docker_run.py \
-    --container-tag 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main \
+    --image-uri 492475357299.dkr.ecr.us-west-2.amazonaws.com/xgb-ci.gpu:main \
     --use-gpus \
     --run-args='--shm-size=4g --privileged' \
     -- bash ops/pipeline/test-python-wheel-impl.sh gpu

diff --git a/include/xgboost/gbm.h b/include/xgboost/gbm.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2014-2023 by XGBoost Contributors
+ * Copyright 2014-2025, XGBoost Contributors
  * \file gbm.h
  * \brief Interface of gradient booster,
  *  that learns through gradient statistics.
@@ -15,10 +15,8 @@
 #include <xgboost/model.h>
 
 #include <vector>
-#include <utility>
 #include <string>
 #include <functional>
-#include <unordered_map>
 #include <memory>
 
 namespace xgboost {
@@ -42,13 +40,13 @@ class GradientBooster : public Model, public Configurable {
  public:
   /*! \brief virtual destructor */
   ~GradientBooster() override = default;
-  /*!
-   * \brief Set the configuration of gradient boosting.
+  /**
+   * @brief Set the configuration of gradient boosting.
    *  User must call configure once before InitModel and Training.
    *
-   * \param cfg configurations on both training and model parameters.
+   * @param cfg configurations on both training and model parameters.
    */
-  virtual void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) = 0;
+  virtual void Configure(Args const& cfg) = 0;
   /*!
    * \brief load model from stream
    * \param fi input stream.
@@ -117,21 +115,6 @@ class GradientBooster : public Model, public Configurable {
                               bst_layer_t) const {
     LOG(FATAL) << "Inplace predict is not supported by the current booster.";
   }
-  /*!
-   * \brief online prediction function, predict score for one instance at a time
-   *  NOTE: use the batch prediction interface if possible, batch prediction is usually
-   *        more efficient than online prediction
-   *        This function is NOT threadsafe, make sure you only call from one thread
-   *
-   * \param inst the instance you want to predict
-   * \param out_preds output vector to hold the predictions
-   * \param layer_begin Beginning of boosted tree layer used for prediction.
-   * \param layer_end   End of booster layer. 0 means do not limit trees.
-   * \sa Predict
-   */
-  virtual void PredictInstance(const SparsePage::Inst& inst,
-                               std::vector<bst_float>* out_preds,
-                               unsigned layer_begin, unsigned layer_end) = 0;
   /*!
    * \brief predict the leaf index of each tree, the output will be nsample * ntree vector
    *        this is only valid in gbtree predictor