Creating an end-to-end pipelines

Let’s create an end-to-end machine learning pipeline.

Importing the required packages

import numpy as np

from pjml.data.communication.report import Report
from pjml.data.evaluation.metric import Metric
from pjml.data.flow.file import File
from pjml.operator.pipeline import Pipeline
from pjml.stream.expand.partition import Partition
from pjml.stream.reduce.reduce import Reduce
from pjml.stream.reduce.summ import Summ
from pjml.stream.transform.map import Map
from pjpy.modeling.supervised.classifier.svmc import SVMC
from pjpy.processing.feature.reductor.pca import PCA
from pjpy.processing.feature.scaler.minmax import MinMax

np.random.seed(0)

First, we create a machine learning expression.

exp = Pipeline(MinMax(), PCA(), SVMC())

Let’s look at the sequence of operations and the hyperparameter values.

print(exp)

Out:

{
    "info": {
        "_id": "MinMax@pjpy.processing.feature.scaler.minmax",
        "config": {
            "feature_range": [
                0,
                1
            ]
        }
    },
    "enhance": true,
    "model": true
}
{
    "info": {
        "_id": "PCA@pjpy.processing.feature.reductor.pca",
        "config": {
            "n": 2
        }
    },
    "enhance": true,
    "model": true
}
{
    "info": {
        "_id": "SVMC@pjpy.modeling.supervised.classifier.svmc",
        "config": {
            "C": 1.0,
            "kernel": "rbf",
            "degree": 3,
            "gamma": "scale",
            "coef0": 0.0,
            "shrinking": true,
            "probability": false,
            "tol": 0.001,
            "cache_size": 200,
            "class_weight": null,
            "verbose": false,
            "max_iter": -1,
            "decision_function_shape": "ovr",
            "break_ties": false,
            "random_state": null,
            "seed": 0
        }
    },
    "enhance": true,
    "model": true
}

Defined our machine learning expression, we will create an end-to-end pipeline.

pipeline = Pipeline(
    File("../data/iris.arff"),
    Partition(),
    Map(exp, Metric()),
    Summ(),
    Reduce(),
    Report(),
)

or using only python operators

pipeline = (
    File("../data/iris.arff")
    * Partition()
    * Map(exp * Metric())
    * Summ(function="mean")
    * Reduce()
    * Report("Mean S: $S")
)

This pipeline represents and end-to-end machine learning experiment.

print(pipeline)

Out:

{
    "info": {
        "_id": "File@pjml.data.flow.file",
        "config": {
            "name": "../data/iris.arff",
            "path": "./",
            "description": "No description.",
            "hashes": {
                "X": "0ǏǍɽĊũÊүȏŵҖSîҕ",
                "Y": "0ЄϒɐĵǏȂϗƽўýÎʃȆ",
                "Xd": "5ɫңɖŇǓήʼnÝʑΏƀЀǔ",
                "Yd": "5mϛǖͶƅĞOȁЎžʛѲƨ",
                "Xt": "5ȥΔĨӑËҭȨƬδſΧȰɩ",
                "Yt": "5έēPaӹЄźգǩȱɟǟǹ"
            }
        }
    },
    "enhance": true,
    "model": true
}
{
    "info": {
        "_id": "Partition@pjml.stream.expand.partition",
        "config": {
            "split_type": "cv",
            "partitions": 10,
            "seed": 0,
            "fields": "X,Y"
        }
    },
    "enhance": true,
    "model": true
}
Map>>
    {"info": {"_id": "MinMax@pjpy.processing.feature.scaler.minmax","config": {"feature_range": [0,1],"model": true,"enhance": true}},"enhance": true,"model": true}
    {"info": {"_id": "PCA@pjpy.processing.feature.reductor.pca","config": {"n": 2,"model": true,"enhance": true}},"enhance": true,"model": true}
    {"info": {"_id": "SVMC@pjpy.modeling.supervised.classifier.svmc","config": {"C": 1.0,"kernel": "rbf","degree": 3,"gamma": "scale","coef0": 0.0,"shrinking": true,"probability": false,"tol": 0.001,"cache_size": 200,"class_weight": null,"verbose": false,"max_iter": -1,"decision_function_shape": "ovr","break_ties": false,"random_state": null,"seed": 0,"model": true,"enhance": true}},"enhance": true,"model": true}
    {"info": {"_id": "Metric@pjml.data.evaluation.metric","config": {"functions": ["accuracy"],"target": "Y","prediction": "Z","model": true,"enhance": true}},"enhance": true,"model": true}
<<Map
{
    "info": {
        "_id": "Summ@pjml.stream.reduce.summ",
        "config": {
            "field": "R",
            "function": "mean"
        }
    },
    "enhance": true,
    "model": true
}
{
    "info": {
        "_id": "Reduce@pjml.stream.reduce.reduce",
        "config": {}
    },
    "enhance": true,
    "model": true
}
{
    "info": {
        "_id": "Report@pjml.data.communication.report",
        "config": {
            "text": "Mean S: $S"
        }
    },
    "enhance": true,
    "model": true
}

Total running time of the script: ( 0 minutes 0.086 seconds)

Gallery generated by Sphinx-Gallery