(tune-lightgbm-example)=

Using LightGBM with Tune

{image}

:align: center
:alt: LightGBM Logo
:height: 120px
:target: https://lightgbm.readthedocs.io

{contents}

:backlinks: none
:local: true

This tutorial shows how to use Ray Tune to optimize hyperparameters for a LightGBM model. We'll use the breast cancer classification dataset from scikit-learn to demonstrate how to:

Set up a LightGBM training function with Ray Tune
Configure hyperparameter search spaces
Use the ASHA scheduler for efficient hyperparameter tuning
Report and checkpoint training progress

Installation

First, let's install the required dependencies:

bash

pip install "ray[tune]" lightgbm scikit-learn numpy

python

import lightgbm as lgb
import numpy as np
import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split

from ray import tune
from ray.tune.schedulers import ASHAScheduler
from ray.tune.integration.lightgbm import TuneReportCheckpointCallback


def train_breast_cancer(config):

    data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
    train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.25)
    train_set = lgb.Dataset(train_x, label=train_y)
    test_set = lgb.Dataset(test_x, label=test_y)
    gbm = lgb.train(
        config,
        train_set,
        valid_sets=[test_set],
        valid_names=["eval"],
        callbacks=[
            TuneReportCheckpointCallback(
                {
                    "binary_error": "eval-binary_error",
                    "binary_logloss": "eval-binary_logloss",
                }
            )
        ],
    )
    preds = gbm.predict(test_x)
    pred_labels = np.rint(preds)
    tune.report(
        {
            "mean_accuracy": sklearn.metrics.accuracy_score(test_y, pred_labels),
            "done": True,
        }
    )


if __name__ == "__main__":
    config = {
        "objective": "binary",
        "metric": ["binary_error", "binary_logloss"],
        "verbose": -1,
        "boosting_type": tune.grid_search(["gbdt", "dart"]),
        "num_leaves": tune.randint(10, 1000),
        "learning_rate": tune.loguniform(1e-8, 1e-1),
    }

    tuner = tune.Tuner(
        train_breast_cancer,
        tune_config=tune.TuneConfig(
            metric="binary_error",
            mode="min",
            scheduler=ASHAScheduler(),
            num_samples=2,
        ),
        param_space=config,
    )
    results = tuner.fit()

    print(f"Best hyperparameters found were: {results.get_best_result().config}")

This should give an output like:

python

Best hyperparameters found were: {'objective': 'binary', 'metric': ['binary_error', 'binary_logloss'], 'verbose': -1, 'boosting_type': 'gbdt', 'num_leaves': 622, 'learning_rate': 0.003721286118355498}