Object-Oriented Programming (OOP) for Machine Learning

Data scientists who come to the career without a software background tend to use a procedural style of programming rather than taking an object oriented approach. Changing styles is a paradigm shift and really takes some time to wrap your mind around. Many of us who have been doing this for years still have trouble envisioning how objects can improve things.

The objective of this notebook is to apply Object Oriented Programming (OOP) via Machine Learning approach (by using Linear Regression models as an example here) for Data Science problems.

drawing

In [ ]:
# Base libraries
import os
import time
import datetime
import json
import logging

# Scientific libraries
import numpy as np
import pandas as pd
from sklearn import svm # or any other function
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression as LR
from sklearn.linear_model import SGDRegressor as SGDR
from sklearn.preprocessing import StandardScaler
from sklearn.utils.validation import check_X_y, check_array

# Visual libraries
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns
# most definitely plotly

# Helper libraries
from tqdm.notebook import tqdm, trange
from colorama import Fore, Back, Style
import warnings
warnings.filterwarnings('ignore')

# Visual setup
# plt.style.use('fivethirtyeight')   # alternatives below
plt.style.use("ggplot")
sns.set_style('darkgrid')
rcParams['axes.spines.right'] = False
rcParams['axes.spines.top'] = False
# plt.rcParams['figure.figsize'] = [10,6]
# rcParams["figure.autolayout"] = True
plt.rc('xtick', labelsize=11)
plt.rc('ytick', labelsize=11)
custom_colors = ['#74a09e','#86c1b2','#98e2c6','#f3c969','#f2a553', '#d96548', '#c14953']
sns.set_palette(custom_colors)
# custom_palette = ["#221f1f", "#b20710", "#e50914", "#f5f5f1"]
# sns.set_palette(custom_palette)
%config InlineBackend.figure_format = 'retina'

# Pandas options
# pd.set_option("max_colwidth", 100)
pd.set_option('max_colwidth', 40)
pd.set_option("display.precision", 4)
# pd.set_option("float_format", "{:.5f}".format)
# pd.options.display.max_columns = 12
# pd.options.display.max_columns = None  # Possible to limit
# pd.options.display.max_columns = None  # Possible to limit

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

import logging
logging.basicConfig(
    format="%(asctime)s - %(message)s", datefmt="%d-%b-%y %H:%M:%S", level=logging.INFO
)

# Seed value for numpy.random
np.random.seed(14)
In [ ]:
# Load the Boston Housing dataset
boston = load_boston()
X = boston.data
y = boston.target


# Load the Boston Housing dataset
boston = load_boston()
X = boston.data
y = boston.target

# Check for and handle missing values (NaN)
X = np.nan_to_num(X)

# Standardize the features to have mean=0 and std=1
scaler = StandardScaler()
X = scaler.fit_transform(X)
In [ ]:
from abc import ABC, abstractmethod
import inspect

class BasePredictor (ABC):
    
    @abstractmethod
    def fit(self, X, y):
        """ Fit predictor.
        Parameters 
        ----------
        X: (ndarray, sparse matrix) of shape (n_samples,, n_features)

        y: ndarray of shape (n_samples,)
           The input target values.
        
        Returns
        -------
        self : object 
               Fitted estimator.
        """
        pass

    @abstractmethod
    def predict(self, X):
        """
        Parameters
        ----------
        X: (array-like, sparse matrix) of shape (n_samples, n_features)

        Returns
        -------
        y_pred : ndarray of shape (n_samples,)

        """
        pass

    # copied from sklearn
    @classmethod
    def _get_param_names(cls):
        """Get parameter names for the estimator"""
        # fetch the constructor or the original constructor before
        # deprecation wrapping if any
        init = getattr(cls.__init__, "deprecated_original", cls.__init__)
        if init is object.__init__:
            # No explicit constructor to introspect
            return []

        # introspect the constructor arguments to find the model parameters
        # to represent
        init_signature = inspect.signature(init)
        # Consider the constructor parameters excluding 'self'
        parameters = [
            p
            for p in init_signature.parameters.values()
            if p.name != "self" and p.kind != p.VAR_KEYWORD
        ]
        for p in parameters:
            if p.kind == p.VAR_POSITIONAL:
                raise RuntimeError(
                    "scikit-learn estimators should always "
                    "specify their parameters in the signature"
                    " of their __init__ (no varargs)."
                    " %s with constructor %s doesn't "
                    " follow this convention." % (cls, init_signature)
                )
        # Extract and sort argument names excluding 'self'
        return sorted([p.name for p in parameters])
    
    # copied from sklearn
    def get_params(self, deep=True):
        """
        Get parameters for this estimator.
        Parameters
        ----------
        deep : bool, default=True
            If True, will return the parameters for this estimator and
            contained subobjects that are estimators.
        Returns
        -------
        params : dict
            Parameter names mapped to their values.
        """
        out = dict()
        for key in self._get_param_names():
            value = getattr(self, key)
            if deep and hasattr(value, "get_params"):
                deep_items = value.get_params().items()
                out.update((key + "__" + k, val) for k, val in deep_items)
            out[key] = value
        return out    
    
    def reset(self):
        """ A method for reseting the predictor """
        new = self.__class__(**self.get_params())
    
    def load_params(self, params):
        """ A method to load model configuration. 
        
        Parameters
        ----------
        param : dict of parameters

        Returns
        -------
        A new model instance with the new parameters.
        
        """
        self = self.__class__(**params)
        print("params loaded")

        return self
In [ ]:
class Data_plots():
    
    def __init__():
        pass
    
    def pairplot(self):
        '''Creates pairplot of all variables and the target using the Seaborn library'''
        
        print ("This may take a little time. Have patience...")
        from seaborn import pairplot
        from pandas import DataFrame
        df = DataFrame(np.hstack((X,y.reshape(-1,1))))
        pairplot(df)
        plt.show()
    
    def plot_fitted(self,reference_line=False):
        """
        Plots fitted values against the true output values from the data
        
        Arguments:
        reference_line: A Boolean switch to draw a 45-degree reference line on the plot
        """
        plt.title("True vs. fitted values",fontsize=14)
        plt.scatter(y,self.fitted,s=100,alpha=0.75,color='red',edgecolor='k')
        if reference_line:
            plt.plot(y,y,c='k',linestyle='dotted')
        plt.xlabel("True values")
        plt.ylabel("Fitted values")
        plt.grid(True)
        plt.show()
In [ ]:
class Metrics():
    
    def mean_squared_err(self, y, y_pred):
        error = np.subtract(y, y_pred)
        squared_error = error ** 2
        return np.mean(squared_error)
    
    def root_mean_squared_err(self, y, y_pred):
        return np.sqrt(mean_squared_err)
        
    def mean_absolute_err(self, y, y_pred):
        error = np.subtract(y, y_pred)
        abs_error = np.abs(error)
        return np.mean(abs_error)

Linear Regression in OOP¶

In linear regression we want to model the relationship between a scalar dependent variable $y$ and one or more independent (predictor) variables $\boldsymbol{x}$.

Given:

  • Dataset $D = \{(\pmb{x}^{(1)}, y^{(1)}), ..., (\pmb{x}^{(m)}, y^{(m)})\}$
  • With $\pmb{x}^{(i)}$ being a $d-$dimensional vector $\pmb{x}^{(i)} = (x^{(i)}_1, ..., x^{(i)}_d)$ and
  • $y^{(i)}$ being a scalar target variable

Assumptions:

  • We assume that dataset was produced by some unknown function $f$ which maps features $\pmb{x}$ to labels $y$
  • We further assume that the labels $y^{(i)}$ are noisy
  • In other words: the labels have been produced by adding some noise $\epsilon$ to the true function value: $y^{(i)} = f(\pmb{x}^{(i)}) + \epsilon$

Model:
The linear regression model can be interpreted as a very simple neural network:

  • It has a real-valued weight vector $\pmb{w}= (w_{1}, ..., w_{d})$
  • It has a real-valued bias $b$
  • It uses the identity function as its activation function
  • Note: in most books the parameter vector $\pmb{w}$ is denoted in a more general form, namely as $\pmb{\theta}$

Approach:
Our goal is to learn the underlying function $f$ such that we can predict function values at new input locations. In linear regression, we model $f$ using a linear combination of the input features:

$$ \begin{split} y^{(i)} &= b + w_1 x_1^{(i)} + w_2 x_2^{(i)} + ... + w_d x_d^{(i)} \\ &= \sum_{j=1}^{d} b + w_j x_j^{(i)} \\ &= \pmb{w}^T \pmb{x}^{(i)} + b \end{split} $$

Our goal is to learn a linear model $\hat{y}$ that models $y$ given $X$.

$\hat{y} = XW + b$

  • $\hat{y}$ = predictions | $\in \mathbb{R}^{NX1}$ ($N$ is the number of samples)
  • $X$ = inputs | $\in \mathbb{R}^{NXD}$ ($D$ is the number of features)
  • $W$ = weights | $\in \mathbb{R}^{DX1}$
  • $b$ = bias | $\in \mathbb{R}^{1}$

The normal equation (closed-form solution): $\boldsymbol{w} = (\boldsymbol{X}^T \boldsymbol{X})^{-1} \boldsymbol{X}^T \boldsymbol{y}$

In [ ]:
import numpy as np
from sklearn.utils.validation import check_X_y, check_array

class LinearRegression(BasePredictor, Data_plots, Metrics): # Class Inheritance
    def __init__(self, set_intercept = True):
        self.set_intercept = set_intercept
        self.intercept = None
        self.coefficients = None
    
    def fit(self, X, y):
        X, y = check_X_y(X, y) # checking if both X & y has correct shape 
                               # and converting X, y into 2-d amd 1-d array
                               # even pandas dataframe get coverted into arrays
        if self.set_intercept == True:
            X_ = np.c_[np.ones((X.shape[0], 1)), X] # adding x0 = 1
        else: 
            X_ = X
        self.beta = np.linalg.inv(X_.T.dot(X_)).dot(X_.T).dot(y)
        if self.set_intercept == True:
            self.coefficients = self.beta[1:]
            self.intercept = self.beta[0]
        else:
            self.coefficients = self.beta
        return self
    def predict(self, X):
        X_ = check_array(X) # validate the input, convert into 2-d numpy array
        self.fitted = X_@self.coefficients + self.intercept
        return self.fitted                    

Compare the predictions $\hat{y}$ with the actual target values $y$ using the objective (cost) function to determine the loss $J$. A common objective function for linear regression is mean squarred error (MSE). This function calculates the difference between the predicted and target values and squares it.

  • $J(\theta) = MSE = \frac{1}{N} \sum_{i-1}^{N} (y_i - \hat{y}_i)^2 $
    • ${y}$ = ground truth | $\in \mathbb{R}^{NX1}$
    • $\hat{y}$ = predictions | $\in \mathbb{R}^{NX1}$
In [ ]:
lr = LinearRegression()
lr.fit(X, y)
pred = lr.predict(X)
lr.mean_squared_err(y, pred)
Out[ ]:
<__main__.LinearRegression at 0x1f7bcc10a90>
Out[ ]:
21.894831181729202

Compare with Sklearn LinearRegression model

In [ ]:
lr_2 = LR()
lr_2.fit(X, y)
pred2 = lr_2.predict(X)
mean_squared_error(pred2, y)
Out[ ]:
LinearRegression()
Out[ ]:
21.894831181729206
In [ ]:
lr.plot_fitted(reference_line=True)
In [ ]:
lr.get_params()
lr.intercept
lr.coefficients
Out[ ]:
{'set_intercept': True}
Out[ ]:
22.532806324110677
Out[ ]:
array([-0.92814606,  1.08156863,  0.1409    ,  0.68173972, -2.05671827,
        2.67423017,  0.01946607, -3.10404426,  2.66221764, -2.07678168,
       -2.06060666,  0.84926842, -3.74362713])

Calculate the gradient of loss $J(\theta)$ w.r.t to the model weights.

  • $J(\theta) = \frac{1}{N} \sum_i (y_i - \hat{y}_i)^2 = \frac{1}{N}\sum_i (y_i - X_iW)^2 $
    • $\frac{\partial{J}}{\partial{W}} = -\frac{2}{N} \sum_i (y_i - X_iW) X_i = -\frac{2}{N} \sum_i (y_i - \hat{y}_i) X_i$
    • $\frac{\partial{J}}{\partial{b}} = -\frac{2}{N} \sum_i (y_i - X_iW)1 = -\frac{2}{N} \sum_i (y_i - \hat{y}_i)1$

Update the weights $W$ using a small learning rate $\alpha$.

  • $W = W - \alpha\frac{\partial{J}}{\partial{W}}$
  • $b = b - \alpha\frac{\partial{J}}{\partial{b}}$
In [ ]:
class LinearRegressionGD(BasePredictor, Data_plots, Metrics): # Class Inheritance
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.coefficients = None
        self.intercept = None
        self.cost_history = []

    def fit(self, X, y):
        X, y = check_X_y(X, y)
        n_samples, n_features = X.shape
        self.coefficients = np.zeros(n_features)
        self.intercept = 0

        for i in range(self.n_iterations):
            # Predictions
            y_pred = np.dot(X, self.coefficients) + self.intercept

            # Calculate the gradient with respect to weights and bias
            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            # Update weights and bias
            self.coefficients -= self.learning_rate * dw
            self.intercept -= self.learning_rate * db

            # Calculate the cost (Mean Squared Error)
            cost = (1 / (2 * n_samples)) * np.sum((y_pred - y) ** 2)
            # if i  % 100 == 0: 
            #     self.cost_history.append(cost)
            self.cost_history.append(cost)

    def predict(self, X):
        X = check_array(X)
        self.fitted = np.dot(X, self.coefficients) + self.intercept
        return self.fitted
In [ ]:
lrgd = LinearRegressionGD(learning_rate=0.1, n_iterations=10000)
lrgd.fit(X, y)
pred = lrgd.predict(X)
lrgd.mean_squared_err(y, pred)
Out[ ]:
21.894831181729202

Compare with Sklearn SGDRegressor model

In [ ]:
lrgd_2 = SGDR()
lrgd_2.fit(X, y)
pred_2 = lrgd.predict(X)
mean_squared_error(pred_2, y)
Out[ ]:
SGDRegressor()
Out[ ]:
21.894831181729202
In [ ]:
lrgd.get_params()
lrgd.intercept
lrgd.coefficients
Out[ ]:
{'learning_rate': 0.1, 'n_iterations': 10000}
Out[ ]:
22.532806324110666
Out[ ]:
array([-0.92814606,  1.08156863,  0.1409    ,  0.68173972, -2.05671827,
        2.67423017,  0.01946607, -3.10404426,  2.66221764, -2.07678168,
       -2.06060666,  0.84926842, -3.74362713])
In [ ]:
lrgd.plot_fitted(reference_line=True)