Commit 5db1ca57 authored by Gavin Lee's avatar Gavin Lee
Browse files

Auto-saving for lee.gavin.k@gmail.com on branch master from commit 5d3fe8b2

parent 333a9b29
Pipeline #310438 passed with stage
in 1 minute and 31 seconds
%% Cell type:code id:b8fa1017-812e-4a6b-9cf5-d69be1a718cb tags:
``` python
import numpy as np
```
%% Cell type:markdown id:e1a3af63-e023-4d31-b550-aede5d359569 tags:
# FROM
https://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html#sphx-glr-auto-examples-classification-plot-digits-classification-py
%% Cell type:code id:1bc291cd-0192-4611-b77e-7e941238e7fc tags:
``` python
# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
# License: BSD 3 clause
# Standard scientific Python imports
import matplotlib.pyplot as plt
# Import datasets, classifiers and performance metrics
from sklearn import datasets, svm, metrics
from sklearn.model_selection import train_test_split
```
%% Cell type:code id:c521b6ba-f363-4444-a929-915b3962ec9c tags:
``` python
digits = datasets.load_digits()
_, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3))
for ax, image, label in zip(axes, digits.images, digits.target):
ax.set_axis_off()
ax.imshow(image, cmap=plt.cm.gray_r, interpolation="nearest")
ax.set_title("Training: %i" % label)
```
%%%% Output: display_data
![]()
%% Cell type:code id:95670bd9-85b5-4f46-a8bb-68b9d8a7f210 tags:
``` python
# flatten the images
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))
# Create a classifier: a support vector classifier
clf = svm.SVC(gamma=0.001)
# Split data into 50% train and 50% test subsets
X_train, X_test, y_train, y_test = train_test_split(
data, digits.target, test_size=0.5, shuffle=False
)
# Learn the digits on the train subset
clf.fit(X_train, y_train)
# Predict the value of the digit on the test subset
predicted = clf.predict(X_test)
```
%% Cell type:code id:b7e1ce4d-5b4b-43ba-97e4-f2c9e61339b4 tags:
``` python
_, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3))
for ax, image, prediction in zip(axes, X_test, predicted):
ax.set_axis_off()
image = image.reshape(8, 8)
ax.imshow(image, cmap=plt.cm.gray_r, interpolation="nearest")
ax.set_title(f"Prediction: {prediction}")
```
%%%% Output: display_data
![]()
%% Cell type:code id:13fe2601-b30e-4115-b783-7aef14622d6b tags:
``` python
from sklearn.metrics import accuracy_score
```
%% Cell type:code id:5170f2c1-d181-4b17-9d9f-cbbf25edfa3d tags:
``` python
from mlsconverters import export
```
%% Cell type:code id:21b21aec-5b1c-4b5c-8a2f-13ad9ff62ed0 tags:
``` python
## MLS to schema
acc = accuracy_score(y_test, predicted)
export(clf, evaluation_measure=(accuracy_score, acc))
```
%% Cell type:code id:196c78ba-3f86-429d-914e-19be240023c6 tags:
``` python
!cd ../; renku mls leaderboard
```
%%%% Output: stream
+--------+-------+--------+----------+
| Run ID | Model | Inputs | accuracy |
+--------+-------+--------+----------+
+--------+-------+--------+----------+
%% Cell type:code id:a61d638b-6795-4a26-b376-69a7c868b0fb tags:
``` python
!renku --version
```
%%%% Output: stream
1.0.2
%% Cell type:code id:4489f54d-4297-4c21-99cf-9f794272afb5 tags:
``` python
!cd ../; renku mls params
```
%%%% Output: stream
+--------+-------+------------------+
| Run ID | Model | Hyper-Parameters |
+--------+-------+------------------+
+--------+-------+------------------+
%% Cell type:code id:512ef6ad-33ab-4170-95c3-766c148eceb6 tags:
``` python
import numpy as np
```
%% Cell type:markdown id:162a4d10-249d-492b-96b6-2b6a912a5c7f tags:
# FROM
https://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html#sphx-glr-auto-examples-linear-model-plot-ols-py
%% Cell type:code id:56b4295d-72e8-48ff-8337-800e958d2c96 tags:
``` python
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
# Load the diabetes dataset
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)
```
%% Cell type:code id:51305061-20a6-4939-96c2-80b87e14113e tags:
``` python
# Use only one feature
diabetes_X = diabetes_X[:, np.newaxis, 2]
# Split the data into training/testing sets
diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]
# Split the targets into training/testing sets
diabetes_y_train = diabetes_y[:-20]
diabetes_y_test = diabetes_y[-20:]
# Create linear regression object
regr = linear_model.LinearRegression()
# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)
# Make predictions using the testing set
diabetes_y_pred = regr.predict(diabetes_X_test)
```
%% Cell type:code id:30ed4160-070f-4119-9394-dd4e84450764 tags:
``` python
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(diabetes_y_test, diabetes_y_pred)
```
%% Cell type:code id:05afb8c6-71d7-4a84-be85-b3d4723e2329 tags:
``` python
## mlschema
from mlsconverters import export
export(regr, evaluation_measure=(mean_squared_error, mse))
```
%%%% Output: error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_513/4199068773.py in <module>
2 from mlsconverters import export
3
----> 4 export(regr, evaluation_measure=(mean_squared_error, mse))
/opt/conda/lib/python3.9/site-packages/mlsconverters/__init__.py in export(model, force, **kwargs)
27
28 def export(model, force=False, **kwargs):
---> 29 mls = _extract_mls(model, **kwargs)
30 io.log_renku_mls(mls, str(model.__hash__()), force)
/opt/conda/lib/python3.9/site-packages/mlsconverters/__init__.py in _extract_mls(model, **kwargs)
11 from . import sklearn
12
---> 13 return sklearn.to_mls(model, **kwargs)
14 elif model.__module__.startswith("xgboost"):
15 from . import xgboost
/opt/conda/lib/python3.9/site-packages/mlsconverters/sklearn.py in to_mls(sklearn_model, **kwargs)
110 if EVALUATION_MEASURE_KEY in kwargs:
111 eval_measure = kwargs[EVALUATION_MEASURE_KEY]
--> 112 output_values.append(evaluation_measure(eval_measure[0], eval_measure[1]))
113 model = Run(model_hash, implementation, input_values, output_values, algo)
114 return RunSchema().dumps(model)
/opt/conda/lib/python3.9/site-packages/mlsconverters/sklearn.py in evaluation_measure(func, value)
42 )
43 else:
---> 44 raise ValueError("unsupported evaluation measure")
45
46
ValueError: unsupported evaluation measure
%% Cell type:markdown id:8ecd75fc-a0da-402f-a4a0-81531511a4db tags:
# MLS converters only supports the following metrics:
- accuracy_score (classification)
- roc_auc_score (classification)
- f1_score (classification)
%% Cell type:markdown id:cac88086-1bc3-4916-809f-85e7e94f8fe0 tags:
## Renku MLS Plug-in demo
%% Cell type:markdown id:2b928da5-636d-463c-a081-98586cf8c468 tags:
This plug-in allows you to compare across different `renku run` iterations in terms of pre-defined metrics.
This plug-in allows you to compare across different `renku run` iterations in terms of pre-defined metrics. It supports the following frameworks:
- sklearn
- XGBoost
- keras
with the following metrics for classification:
- accuracy_score
- roc_auc_score
- f1_score.
%% Cell type:markdown id:a13b4e8a-660f-402a-8e53-72b68b198985 tags:
See `src/train.py` for the demonstration training file.
%% Cell type:code id:4b06791b-a3c2-45db-a788-69c8c39d0cf0 tags:
``` python
%%bash
cd ../ # Return to the main repository
renku run -- python src/train.py data/wine/wine.data label RandomForestClassifier models/RFC
```
%%%% Output: stream
You chose the RandomForestClassifier model.
Accuracy: 1.0
Info: Adding these files to Git LFS:
models/RFC
To disable this message in the future, run:
renku config set show_lfs_message False
%%%% Output: stream
/opt/conda/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
from pandas import MultiIndex, Int64Index
%% Cell type:code id:c6544915-fda6-4728-92a4-faa27acb2be0 tags:
``` python
%%bash
cd ../ # Return to the main repository
renku run -- python src/train.py data/wine/wine.data label LinearSVC models/SVC
```
%%%% Output: stream
You chose the LinearSVC model.
Accuracy: 0.8305084745762712
%%%% Output: stream
/opt/conda/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
from pandas import MultiIndex, Int64Index
/opt/conda/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
warnings.warn(
%% Cell type:code id:7d2b84bb-0c7d-4095-8af5-83ff05e50c4f tags:
``` python
%%bash
cd ../ # Return to the main repository
renku run -- python src/train.py data/wine/wine.data label XGBClassifier models/XGB
```
%%%% Output: stream
You chose the XGBClassifier model.
[02:52:22] WARNING: ../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'multi:softprob' was changed from 'merror' to 'mlogloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
Accuracy: 0.9830508474576272
Info: Adding these files to Git LFS:
models/XGB
To disable this message in the future, run:
renku config set show_lfs_message False
%%%% Output: stream
/opt/conda/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
from pandas import MultiIndex, Int64Index
/opt/conda/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].
warnings.warn(label_encoder_deprecation_msg, UserWarning)
%% Cell type:markdown id:9dab8308-6b91-4338-ab33-d44cb6e15459 tags:
## View the leaderboard
%% Cell type:code id:42914a63-4354-461f-8f1d-54a5c647d41d tags:
``` python
! cd ../; renku mls leaderboard
```
%%%% Output: stream
+----------------------------------+-------------------------------------------------+-----------------------------------------+--------------------+
| Run ID | Model | Inputs | accuracy |
+----------------------------------+-------------------------------------------------+-----------------------------------------+--------------------+
| 83dbed2912bd440e97681720ac2b588e | sklearn.ensemble._forest.RandomForestClassifier | ['data/wine/wine.data', 'src/train.py'] | 1.0 |
| 150a59441ca54dfcba3365db804fab99 | xgboost.sklearn.XGBClassifier | ['data/wine/wine.data', 'src/train.py'] | 0.9830508474576272 |
| 148fc15412a7430e9268849c8bb1df84 | sklearn.svm._classes.LinearSVC | ['data/wine/wine.data', 'src/train.py'] | 0.8305084745762712 |
+----------------------------------+-------------------------------------------------+-----------------------------------------+--------------------+
%% Cell type:markdown id:5e6e8e44-ce99-40fd-a11e-3225f28bb7ff tags:
## View the hyper-parameters in each of the models
%% Cell type:code id:879cf6b8-890a-43d3-8034-5ed3dfa2b71e tags:
``` python
! cd ../; renku mls params
```
%%%% Output: stream

| Run ID | Model | Hyper-Parameters |

| 148fc15412a7430e9268849c8bb1df84 | sklearn.svm._classes.LinearSVC | {"C": "1.0", "dual": "true", "fit_intercept": "true", "intercept_scaling": "1", "loss": "squared_hinge", "max_iter": "1000", "multi_class": "ovr", "penalty": "l2", "tol": "0.0001", "verbose": "0"} |
| 150a59441ca54dfcba3365db804fab99 | xgboost.sklearn.XGBClassifier | {"base_score": "0.5", "booster": "gbtree", "colsample_bylevel": "1", "colsample_bynode": "1", "colsample_bytree": "1", "enable_categorical": "false", "gamma": "0", "gpu_id": "-1", "interaction_constraints": "", "learning_rate": "0.300000012", "max_delta_step": "0", "max_depth": "6", "min_child_weight": "1", "missing": "nan", "monotone_constraints": "()", "n_estimators": "100", "n_jobs": "8", "num_parallel_tree": "1", "objective": "multi:softprob", "predictor": "auto", "random_state": "0", "reg_alpha": "0", "reg_lambda": "1", "subsample": "1", "tree_method": "exact", "use_label_encoder": "true", "validate_parameters": "1"} |
| 83dbed2912bd440e97681720ac2b588e | sklearn.ensemble._forest.RandomForestClassifier | {"bootstrap": "true", "ccp_alpha": "0.0", "criterion": "gini", "max_features": "auto", "min_impurity_decrease": "0.0", "min_samples_leaf": "1", "min_samples_split": "2", "min_weight_fraction_leaf": "0.0", "n_estimators": "100", "oob_score": "false", "verbose": "0", "warm_start": "false"} |

......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment