Commit 952e32fa authored by Gavin Lee's avatar Gavin Lee
Browse files

update script and requirements

parent 40c0b977
Pipeline #310400 passed with stage
in 7 minutes and 55 seconds
{"http://www.w3.org/ns/mls#implements": {"http://www.w3.org/2000/01/rdf-schema#label": "sklearn.ensemble._forest.RandomForestClassifier", "@id": "sklearn.ensemble._forest.RandomForestClassifier", "@type": ["http://www.w3.org/ns/mls#Algorithm"]}, "http://purl.org/dc/terms/title": null, "http://www.w3.org/ns/mls#executes": {"http://www.w3.org/ns/mls#implements": {"http://www.w3.org/2000/01/rdf-schema#label": "sklearn.ensemble._forest.RandomForestClassifier", "@id": "sklearn.ensemble._forest.RandomForestClassifier", "@type": ["http://www.w3.org/ns/mls#Algorithm"]}, "http://purl.org/dc/terms/title": null, "http://purl.org/dc/terms/hasVersion": "1.0.2", "@id": "http://www.w3.org/ns/mls#Implementation.179843134", "http://www.w3.org/ns/mls#hasHyperParameter": [{"http://www.w3.org/2000/01/rdf-schema#label": "bootstrap", "@id": "http://www.w3.org/ns/mls#HyperParameter.bootstrap.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "ccp_alpha", "@id": "http://www.w3.org/ns/mls#HyperParameter.ccp_alpha.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "class_weight", "@id": "http://www.w3.org/ns/mls#HyperParameter.class_weight.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "criterion", "@id": "http://www.w3.org/ns/mls#HyperParameter.criterion.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "max_depth", "@id": "http://www.w3.org/ns/mls#HyperParameter.max_depth.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "max_features", "@id": "http://www.w3.org/ns/mls#HyperParameter.max_features.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "max_leaf_nodes", "@id": "http://www.w3.org/ns/mls#HyperParameter.max_leaf_nodes.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "max_samples", "@id": "http://www.w3.org/ns/mls#HyperParameter.max_samples.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "min_impurity_decrease", "@id": "http://www.w3.org/ns/mls#HyperParameter.min_impurity_decrease.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "min_samples_leaf", "@id": "http://www.w3.org/ns/mls#HyperParameter.min_samples_leaf.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "min_samples_split", "@id": "http://www.w3.org/ns/mls#HyperParameter.min_samples_split.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "min_weight_fraction_leaf", "@id": "http://www.w3.org/ns/mls#HyperParameter.min_weight_fraction_leaf.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "n_estimators", "@id": "http://www.w3.org/ns/mls#HyperParameter.n_estimators.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "n_jobs", "@id": "http://www.w3.org/ns/mls#HyperParameter.n_jobs.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "oob_score", "@id": "http://www.w3.org/ns/mls#HyperParameter.oob_score.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "random_state", "@id": "http://www.w3.org/ns/mls#HyperParameter.random_state.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "verbose", "@id": "http://www.w3.org/ns/mls#HyperParameter.verbose.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, {"http://www.w3.org/2000/01/rdf-schema#label": "warm_start", "@id": "http://www.w3.org/ns/mls#HyperParameter.warm_start.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}], "@type": ["http://www.w3.org/ns/mls#Implementation"]}, "http://purl.org/dc/terms/hasVersion": null, "@id": "8781733584740", "http://www.w3.org/ns/mls#hasOutput": [{"http://www.w3.org/ns/mls#hasValue": 1.0, "@id": "http://www.w3.org/ns/mls#ModelEvaluation.179849348", "http://www.w3.org/ns/mls#specifiedBy": {"@id": "http://www.w3.org/ns/mls#accuracy", "@type": ["http://www.w3.org/ns/mls#EvaluationMeasure"]}, "@type": ["http://www.w3.org/ns/mls#ModelEvaluation"]}], "http://www.w3.org/ns/mls#hasInput": [{"http://www.w3.org/ns/mls#hasValue": {"@type": "xsd:boolean", "@value": true}, "@id": "http://www.w3.org/ns/mls#HyperParameterSetting.bootstrap.8781733584740", "http://www.w3.org/ns/mls#specifiedBy": {"@id": "http://www.w3.org/ns/mls#HyperParameter.bootstrap.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, "@type": ["http://www.w3.org/ns/mls#HyperParameterSetting"]}, {"http://www.w3.org/ns/mls#hasValue": {"@type": "xsd:float", "@value": 0.0}, "@id": "http://www.w3.org/ns/mls#HyperParameterSetting.ccp_alpha.8781733584740", "http://www.w3.org/ns/mls#specifiedBy": {"@id": "http://www.w3.org/ns/mls#HyperParameter.ccp_alpha.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, "@type": ["http://www.w3.org/ns/mls#HyperParameterSetting"]}, {"http://www.w3.org/ns/mls#hasValue": {"@type": "xsd:string", "@value": "gini"}, "@id": "http://www.w3.org/ns/mls#HyperParameterSetting.criterion.8781733584740", "http://www.w3.org/ns/mls#specifiedBy": {"@id": "http://www.w3.org/ns/mls#HyperParameter.criterion.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, "@type": ["http://www.w3.org/ns/mls#HyperParameterSetting"]}, {"http://www.w3.org/ns/mls#hasValue": {"@type": "xsd:string", "@value": "auto"}, "@id": "http://www.w3.org/ns/mls#HyperParameterSetting.max_features.8781733584740", "http://www.w3.org/ns/mls#specifiedBy": {"@id": "http://www.w3.org/ns/mls#HyperParameter.max_features.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, "@type": ["http://www.w3.org/ns/mls#HyperParameterSetting"]}, {"http://www.w3.org/ns/mls#hasValue": {"@type": "xsd:float", "@value": 0.0}, "@id": "http://www.w3.org/ns/mls#HyperParameterSetting.min_impurity_decrease.8781733584740", "http://www.w3.org/ns/mls#specifiedBy": {"@id": "http://www.w3.org/ns/mls#HyperParameter.min_impurity_decrease.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, "@type": ["http://www.w3.org/ns/mls#HyperParameterSetting"]}, {"http://www.w3.org/ns/mls#hasValue": {"@type": "xsd:int", "@value": 1}, "@id": "http://www.w3.org/ns/mls#HyperParameterSetting.min_samples_leaf.8781733584740", "http://www.w3.org/ns/mls#specifiedBy": {"@id": "http://www.w3.org/ns/mls#HyperParameter.min_samples_leaf.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, "@type": ["http://www.w3.org/ns/mls#HyperParameterSetting"]}, {"http://www.w3.org/ns/mls#hasValue": {"@type": "xsd:int", "@value": 2}, "@id": "http://www.w3.org/ns/mls#HyperParameterSetting.min_samples_split.8781733584740", "http://www.w3.org/ns/mls#specifiedBy": {"@id": "http://www.w3.org/ns/mls#HyperParameter.min_samples_split.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, "@type": ["http://www.w3.org/ns/mls#HyperParameterSetting"]}, {"http://www.w3.org/ns/mls#hasValue": {"@type": "xsd:float", "@value": 0.0}, "@id": "http://www.w3.org/ns/mls#HyperParameterSetting.min_weight_fraction_leaf.8781733584740", "http://www.w3.org/ns/mls#specifiedBy": {"@id": "http://www.w3.org/ns/mls#HyperParameter.min_weight_fraction_leaf.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, "@type": ["http://www.w3.org/ns/mls#HyperParameterSetting"]}, {"http://www.w3.org/ns/mls#hasValue": {"@type": "xsd:int", "@value": 100}, "@id": "http://www.w3.org/ns/mls#HyperParameterSetting.n_estimators.8781733584740", "http://www.w3.org/ns/mls#specifiedBy": {"@id": "http://www.w3.org/ns/mls#HyperParameter.n_estimators.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, "@type": ["http://www.w3.org/ns/mls#HyperParameterSetting"]}, {"http://www.w3.org/ns/mls#hasValue": {"@type": "xsd:boolean", "@value": false}, "@id": "http://www.w3.org/ns/mls#HyperParameterSetting.oob_score.8781733584740", "http://www.w3.org/ns/mls#specifiedBy": {"@id": "http://www.w3.org/ns/mls#HyperParameter.oob_score.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, "@type": ["http://www.w3.org/ns/mls#HyperParameterSetting"]}, {"http://www.w3.org/ns/mls#hasValue": {"@type": "xsd:int", "@value": 0}, "@id": "http://www.w3.org/ns/mls#HyperParameterSetting.verbose.8781733584740", "http://www.w3.org/ns/mls#specifiedBy": {"@id": "http://www.w3.org/ns/mls#HyperParameter.verbose.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, "@type": ["http://www.w3.org/ns/mls#HyperParameterSetting"]}, {"http://www.w3.org/ns/mls#hasValue": {"@type": "xsd:boolean", "@value": false}, "@id": "http://www.w3.org/ns/mls#HyperParameterSetting.warm_start.8781733584740", "http://www.w3.org/ns/mls#specifiedBy": {"@id": "http://www.w3.org/ns/mls#HyperParameter.warm_start.8781733584740", "@type": ["http://www.w3.org/ns/mls#HyperParameter"]}, "@type": ["http://www.w3.org/ns/mls#HyperParameterSetting"]}], "@type": ["http://www.w3.org/ns/mls#Run"]}
\ No newline at end of file
......@@ -2,4 +2,6 @@ renku-mls
mlschema-converters
sklearn
numpy
matplotlib
\ No newline at end of file
matplotlib
pandas
xgboost
\ No newline at end of file
####
#### Credits: Viktor Gal
####
import sys
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from mlsconverters import export
import pickle
# input processing
cat = LabelEncoder()
df = pd.read_csv(sys.argv[1])
label_column = sys.argv[2]
X = df.loc[:, df.columns != label_column].values
y = cat.fit_transform(df[label_column])
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.33, random_state=42
)
# model creation
model_name = sys.argv[3]
print("You chose the " + model_name + " model.")
model = eval(model_name + "()")
model.fit(X_train, y_train)
# model eval
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Accuracy: ", acc)
export(model, force=True, evaluation_measure=(accuracy_score, acc))
with open(sys.argv[4], "wb") as f:
pickle.dump(model, f)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment