Back to Ray

Using LightGBM with Tune

doc/source/tune/examples/lightgbm_example.ipynb

1.13.13.1 KB
Original Source

(tune-lightgbm-example)=

Using LightGBM with Tune

<a id="try-anyscale-quickstart-ray-tune-lightgbm_example" href="https://console.anyscale.com/register/ha?render_flow=ray&utm_source=ray_docs&utm_medium=docs&utm_campaign=ray-tune-lightgbm_example"> </a> </br>
{image}
:align: center
:alt: LightGBM Logo
:height: 120px
:target: https://lightgbm.readthedocs.io
{contents}
:backlinks: none
:local: true

This tutorial shows how to use Ray Tune to optimize hyperparameters for a LightGBM model. We'll use the breast cancer classification dataset from scikit-learn to demonstrate how to:

  1. Set up a LightGBM training function with Ray Tune
  2. Configure hyperparameter search spaces
  3. Use the ASHA scheduler for efficient hyperparameter tuning
  4. Report and checkpoint training progress

Installation

First, let's install the required dependencies:

bash
pip install "ray[tune]" lightgbm scikit-learn numpy
python
import lightgbm as lgb
import numpy as np
import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split

from ray import tune
from ray.tune.schedulers import ASHAScheduler
from ray.tune.integration.lightgbm import TuneReportCheckpointCallback


def train_breast_cancer(config):

    data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
    train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.25)
    train_set = lgb.Dataset(train_x, label=train_y)
    test_set = lgb.Dataset(test_x, label=test_y)
    gbm = lgb.train(
        config,
        train_set,
        valid_sets=[test_set],
        valid_names=["eval"],
        callbacks=[
            TuneReportCheckpointCallback(
                {
                    "binary_error": "eval-binary_error",
                    "binary_logloss": "eval-binary_logloss",
                }
            )
        ],
    )
    preds = gbm.predict(test_x)
    pred_labels = np.rint(preds)
    tune.report(
        {
            "mean_accuracy": sklearn.metrics.accuracy_score(test_y, pred_labels),
            "done": True,
        }
    )


if __name__ == "__main__":
    config = {
        "objective": "binary",
        "metric": ["binary_error", "binary_logloss"],
        "verbose": -1,
        "boosting_type": tune.grid_search(["gbdt", "dart"]),
        "num_leaves": tune.randint(10, 1000),
        "learning_rate": tune.loguniform(1e-8, 1e-1),
    }

    tuner = tune.Tuner(
        train_breast_cancer,
        tune_config=tune.TuneConfig(
            metric="binary_error",
            mode="min",
            scheduler=ASHAScheduler(),
            num_samples=2,
        ),
        param_space=config,
    )
    results = tuner.fit()

    print(f"Best hyperparameters found were: {results.get_best_result().config}")

This should give an output like:

python
Best hyperparameters found were: {'objective': 'binary', 'metric': ['binary_error', 'binary_logloss'], 'verbose': -1, 'boosting_type': 'gbdt', 'num_leaves': 622, 'learning_rate': 0.003721286118355498}