# Base libraries
import os
import time
import datetime
import json
import logging

# Scientific libraries
import numpy as np
import pandas as pd
from sklearn import svm # or any other function
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression as LR
from sklearn.linear_model import SGDRegressor as SGDR
from sklearn.preprocessing import StandardScaler
from sklearn.utils.validation import check_X_y, check_array

# Visual libraries
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns
# most definitely plotly

# Helper libraries
from tqdm.notebook import tqdm, trange
from colorama import Fore, Back, Style
import warnings
warnings.filterwarnings('ignore')

# Visual setup
# plt.style.use('fivethirtyeight')   # alternatives below
plt.style.use("ggplot")
sns.set_style('darkgrid')
rcParams['axes.spines.right'] = False
rcParams['axes.spines.top'] = False
# plt.rcParams['figure.figsize'] = [10,6]
# rcParams["figure.autolayout"] = True
plt.rc('xtick', labelsize=11)
plt.rc('ytick', labelsize=11)
custom_colors = ['#74a09e','#86c1b2','#98e2c6','#f3c969','#f2a553', '#d96548', '#c14953']
sns.set_palette(custom_colors)
# custom_palette = ["#221f1f", "#b20710", "#e50914", "#f5f5f1"]
# sns.set_palette(custom_palette)
%config InlineBackend.figure_format = 'retina'

# Pandas options
# pd.set_option("max_colwidth", 100)
pd.set_option('max_colwidth', 40)
pd.set_option("display.precision", 4)
# pd.set_option("float_format", "{:.5f}".format)
# pd.options.display.max_columns = 12
# pd.options.display.max_columns = None  # Possible to limit
# pd.options.display.max_columns = None  # Possible to limit

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

import logging
logging.basicConfig(
    format="%(asctime)s - %(message)s", datefmt="%d-%b-%y %H:%M:%S", level=logging.INFO
)

# Seed value for numpy.random
np.random.seed(14)


# Load the Boston Housing dataset
boston = load_boston()
X = boston.data
y = boston.target


# Load the Boston Housing dataset
boston = load_boston()
X = boston.data
y = boston.target

# Check for and handle missing values (NaN)
X = np.nan_to_num(X)

# Standardize the features to have mean=0 and std=1
scaler = StandardScaler()
X = scaler.fit_transform(X)


from abc import ABC, abstractmethod
import inspect

class BasePredictor (ABC):
    
    @abstractmethod
    def fit(self, X, y):
        """ Fit predictor.
        Parameters 
        ----------
        X: (ndarray, sparse matrix) of shape (n_samples,, n_features)

        y: ndarray of shape (n_samples,)
           The input target values.
        
        Returns
        -------
        self : object 
               Fitted estimator.
        """
        pass

    @abstractmethod
    def predict(self, X):
        """
        Parameters
        ----------
        X: (array-like, sparse matrix) of shape (n_samples, n_features)

        Returns
        -------
        y_pred : ndarray of shape (n_samples,)

        """
        pass

    # copied from sklearn
    @classmethod
    def _get_param_names(cls):
        """Get parameter names for the estimator"""
        # fetch the constructor or the original constructor before
        # deprecation wrapping if any
        init = getattr(cls.__init__, "deprecated_original", cls.__init__)
        if init is object.__init__:
            # No explicit constructor to introspect
            return []

        # introspect the constructor arguments to find the model parameters
        # to represent
        init_signature = inspect.signature(init)
        # Consider the constructor parameters excluding 'self'
        parameters = [
            p
            for p in init_signature.parameters.values()
            if p.name != "self" and p.kind != p.VAR_KEYWORD
        ]
        for p in parameters:
            if p.kind == p.VAR_POSITIONAL:
                raise RuntimeError(
                    "scikit-learn estimators should always "
                    "specify their parameters in the signature"
                    " of their __init__ (no varargs)."
                    " %s with constructor %s doesn't "
                    " follow this convention." % (cls, init_signature)
                )
        # Extract and sort argument names excluding 'self'
        return sorted([p.name for p in parameters])
    
    # copied from sklearn
    def get_params(self, deep=True):
        """
        Get parameters for this estimator.
        Parameters
        ----------
        deep : bool, default=True
            If True, will return the parameters for this estimator and
            contained subobjects that are estimators.
        Returns
        -------
        params : dict
            Parameter names mapped to their values.
        """
        out = dict()
        for key in self._get_param_names():
            value = getattr(self, key)
            if deep and hasattr(value, "get_params"):
                deep_items = value.get_params().items()
                out.update((key + "__" + k, val) for k, val in deep_items)
            out[key] = value
        return out    
    
    def reset(self):
        """ A method for reseting the predictor """
        new = self.__class__(**self.get_params())
    
    def load_params(self, params):
        """ A method to load model configuration. 
        
        Parameters
        ----------
        param : dict of parameters

        Returns
        -------
        A new model instance with the new parameters.
        
        """
        self = self.__class__(**params)
        print("params loaded")

        return self


class Data_plots():
    
    def __init__():
        pass
    
    def pairplot(self):
        '''Creates pairplot of all variables and the target using the Seaborn library'''
        
        print ("This may take a little time. Have patience...")
        from seaborn import pairplot
        from pandas import DataFrame
        df = DataFrame(np.hstack((X,y.reshape(-1,1))))
        pairplot(df)
        plt.show()
    
    def plot_fitted(self,reference_line=False):
        """
        Plots fitted values against the true output values from the data
        
        Arguments:
        reference_line: A Boolean switch to draw a 45-degree reference line on the plot
        """
        plt.title("True vs. fitted values",fontsize=14)
        plt.scatter(y,self.fitted,s=100,alpha=0.75,color='red',edgecolor='k')
        if reference_line:
            plt.plot(y,y,c='k',linestyle='dotted')
        plt.xlabel("True values")
        plt.ylabel("Fitted values")
        plt.grid(True)
        plt.show()


class Metrics():
    
    def mean_squared_err(self, y, y_pred):
        error = np.subtract(y, y_pred)
        squared_error = error ** 2
        return np.mean(squared_error)
    
    def root_mean_squared_err(self, y, y_pred):
        return np.sqrt(mean_squared_err)
        
    def mean_absolute_err(self, y, y_pred):
        error = np.subtract(y, y_pred)
        abs_error = np.abs(error)
        return np.mean(abs_error)


import numpy as np
from sklearn.utils.validation import check_X_y, check_array

class LinearRegression(BasePredictor, Data_plots, Metrics): # Class Inheritance
    def __init__(self, set_intercept = True):
        self.set_intercept = set_intercept
        self.intercept = None
        self.coefficients = None
    
    def fit(self, X, y):
        X, y = check_X_y(X, y) # checking if both X & y has correct shape 
                               # and converting X, y into 2-d amd 1-d array
                               # even pandas dataframe get coverted into arrays
        if self.set_intercept == True:
            X_ = np.c_[np.ones((X.shape[0], 1)), X] # adding x0 = 1
        else: 
            X_ = X
        self.beta = np.linalg.inv(X_.T.dot(X_)).dot(X_.T).dot(y)
        if self.set_intercept == True:
            self.coefficients = self.beta[1:]
            self.intercept = self.beta[0]
        else:
            self.coefficients = self.beta
        return self
    def predict(self, X):
        X_ = check_array(X) # validate the input, convert into 2-d numpy array
        self.fitted = X_@self.coefficients + self.intercept
        return self.fitted


lr = LinearRegression()
lr.fit(X, y)
pred = lr.predict(X)
lr.mean_squared_err(y, pred)

<__main__.LinearRegression at 0x1f7bcc10a90>

21.894831181729202


lr_2 = LR()
lr_2.fit(X, y)
pred2 = lr_2.predict(X)
mean_squared_error(pred2, y)

LinearRegression()

21.894831181729206


lr.plot_fitted(reference_line=True)


lr.get_params()
lr.intercept
lr.coefficients

{'set_intercept': True}

22.532806324110677

array([-0.92814606,  1.08156863,  0.1409    ,  0.68173972, -2.05671827,
        2.67423017,  0.01946607, -3.10404426,  2.66221764, -2.07678168,
       -2.06060666,  0.84926842, -3.74362713])


class LinearRegressionGD(BasePredictor, Data_plots, Metrics): # Class Inheritance
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.coefficients = None
        self.intercept = None
        self.cost_history = []

    def fit(self, X, y):
        X, y = check_X_y(X, y)
        n_samples, n_features = X.shape
        self.coefficients = np.zeros(n_features)
        self.intercept = 0

        for i in range(self.n_iterations):
            # Predictions
            y_pred = np.dot(X, self.coefficients) + self.intercept

            # Calculate the gradient with respect to weights and bias
            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            # Update weights and bias
            self.coefficients -= self.learning_rate * dw
            self.intercept -= self.learning_rate * db

            # Calculate the cost (Mean Squared Error)
            cost = (1 / (2 * n_samples)) * np.sum((y_pred - y) ** 2)
            # if i  % 100 == 0: 
            #     self.cost_history.append(cost)
            self.cost_history.append(cost)

    def predict(self, X):
        X = check_array(X)
        self.fitted = np.dot(X, self.coefficients) + self.intercept
        return self.fitted


lrgd = LinearRegressionGD(learning_rate=0.1, n_iterations=10000)
lrgd.fit(X, y)
pred = lrgd.predict(X)
lrgd.mean_squared_err(y, pred)

21.894831181729202


lrgd_2 = SGDR()
lrgd_2.fit(X, y)
pred_2 = lrgd.predict(X)
mean_squared_error(pred_2, y)

SGDRegressor()

21.894831181729202


lrgd.get_params()
lrgd.intercept
lrgd.coefficients

{'learning_rate': 0.1, 'n_iterations': 10000}

22.532806324110666

array([-0.92814606,  1.08156863,  0.1409    ,  0.68173972, -2.05671827,
        2.67423017,  0.01946607, -3.10404426,  2.66221764, -2.07678168,
       -2.06060666,  0.84926842, -3.74362713])


lrgd.plot_fitted(reference_line=True)

Object-Oriented Programming (OOP) for Machine Learning

Linear Regression in OOP¶