pgml-cms/docs/open-source/pgml/guides/supervised-learning/regression.md
We currently support regression algorithms from scikit-learn, XGBoost, LightGBM and Catboost.
This example trains models on the sklean diabetes dataset. This example uses multiple input features to predict a single output variable.
-- load the dataset
SELECT pgml.load_dataset('diabetes');
-- view the dataset
SELECT * FROM pgml.diabetes LIMIT 10;
-- train a simple model on the data
SELECT * FROM pgml.train('Diabetes Progression', 'regression', 'pgml.diabetes', 'target');
-- check out the predictions
SELECT target, pgml.predict('Diabetes Progression', ARRAY[age, sex, bmi, bp, s1, s2, s3, s4, s5, s6]) AS prediction
FROM pgml.diabetes
LIMIT 10;
| Algorithm | Reference |
|---|---|
xgboost | XGBRegressor |
xgboost_random_forest | XGBRFRegressor |
lightgbm | LGBMRegressor |
catboost | CatBoostRegressor |
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'xgboost', hyperparams => '{"n_estimators": 10}');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'xgboost_random_forest', hyperparams => '{"n_estimators": 10}');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'lightgbm', hyperparams => '{"n_estimators": 1}');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'catboost', hyperparams => '{"n_estimators": 10}');
| Algorithm | Reference |
|---|---|
ada_boost | AdaBoostRegressor |
bagging | BaggingRegressor |
extra_trees | ExtraTreesRegressor |
gradient_boosting_trees | GradientBoostingRegressor |
random_forest | RandomForestRegressor |
hist_gradient_boosting | HistGradientBoostingRegressor |
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'ada_boost', hyperparams => '{"n_estimators": 5}');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'bagging', hyperparams => '{"n_estimators": 5}');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'extra_trees', hyperparams => '{"n_estimators": 5}');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'gradient_boosting_trees', hyperparams => '{"n_estimators": 5}');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'random_forest', hyperparams => '{"n_estimators": 5}');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'hist_gradient_boosting', hyperparams => '{"max_iter": 10}');
| Algorithm | Reference |
|---|---|
svm | SVR |
nu_svm | NuSVR |
linear_svm | LinearSVR |
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'svm', hyperparams => '{"max_iter": 100}');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'nu_svm', hyperparams => '{"max_iter": 10}');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'linear_svm', hyperparams => '{"max_iter": 100}');
| Algorithm | Reference |
|---|---|
linear | LinearRegression |
ridge | Ridge |
lasso | Lasso |
elastic_net | ElasticNet |
least_angle | LARS |
lasso_least_angle | LassoLars |
orthoganl_matching_pursuit | OrthogonalMatchingPursuit |
bayesian_ridge | BayesianRidge |
automatic_relevance_determination | ARDRegression |
stochastic_gradient_descent | SGDRegressor |
passive_aggressive | PassiveAggressiveRegressor |
ransac | RANSACRegressor |
theil_sen | TheilSenRegressor |
huber | HuberRegressor |
quantile | QuantileRegressor |
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'linear');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'ridge');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'lasso');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'elastic_net');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'least_angle');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'lasso_least_angle');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'orthogonal_matching_pursuit');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'bayesian_ridge');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'automatic_relevance_determination');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'stochastic_gradient_descent');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'passive_aggressive');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'ransac');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'theil_sen', hyperparams => '{"max_iter": 10, "max_subpopulation": 100}');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'huber');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'quantile');
| Algorithm | Reference |
|---|---|
kernel_ridge | KernelRidge |
gaussian_process | GaussianProcessRegressor |
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'kernel_ridge');
SELECT * FROM pgml.train('Diabetes Progression', algorithm => 'gaussian_process');