Skip to content

Commit

Permalink
Add R2 metric + test
Browse files Browse the repository at this point in the history
  • Loading branch information
chuvalniy committed Feb 6, 2024
1 parent f40c70b commit 58920c8
Show file tree
Hide file tree
Showing 11 changed files with 140 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
src/__pycache__
build
dist
tulia.egg-info
tulia.egg-info
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from setuptools import setup, find_packages

VERSION = '0.2.1'
VERSION = '0.3.0'
DESCRIPTION = 'numpy based machine learning package with sklearn-like API'

with open("README.md", "r") as fn:
Expand Down
Binary file modified src/ensemble/__pycache__/__init__.cpython-310.pyc
Binary file not shown.
Binary file modified src/ensemble/__pycache__/boosting.cpython-310.pyc
Binary file not shown.
Binary file modified src/ensemble/__pycache__/xgboost.cpython-310.pyc
Binary file not shown.
56 changes: 56 additions & 0 deletions src/ensemble/catboost.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from typing import Union

import numpy as np

from src.base import Model


# 1. Ordered Target encoding
# 2. Boostrap data
# 3. Symmetric tree
# 4.


class CatBoostClassifier(Model):
"""
CatBoost for classification tasks.
"""

def __init__(
self,
learning_rate: float = 3e-1,
n_steps: int = 100,
max_depth: int = 3,
cat_features: list = None
):
pass


def fit(self, x: np.ndarray, y: np.ndarray):
pass
def predict(self, x: np.ndarray) -> np.ndarray:
pass

def _predict(self, x: np.ndarray) -> Union[np.ndarray, float, int]:
pass

def _encode_cat_features(self, x: np.ndarray, y: np.ndarray) -> np.ndarray:
cat_feature_idxs = []

encoded_features = []
for idx in cat_feature_idxs:
encoded_feature = []
option_count = {}
total_count = {}
for i, x_sample in enumerate(x[:, idx]):
ctr = (option_count.get(x_sample, 0) + 0.05) / (total_count.get(x_sample, 0) + 1)
encoded_feature.append(ctr)

if y[i] == 1:
option_count[x_sample] = option_count.get(x_sample, 0) + 1
total_count[x_sample] = total_count.get(x_sample, 0) + 1

encoded_features.append(np.array(encoded_feature))

x[:, cat_feature_idxs] = encoded_features
return x
Binary file modified src/metrics/__pycache__/classification.cpython-310.pyc
Binary file not shown.
Binary file modified src/metrics/__pycache__/regression.cpython-310.pyc
Binary file not shown.
23 changes: 19 additions & 4 deletions src/metrics/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
def mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
"""
Calculate mean-squared error.
:param y_true: Target labels.
:param y_pred: Target predictions.
:param y_true: Target labels (n_examples, ).
:param y_pred: Target predictions (n_examples, ).
:return: Loss.
"""
n_examples = len(y_true)
Expand All @@ -17,9 +17,24 @@ def mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
def mean_absolute_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
"""
Calculate mean-absolute error.
:param y_true: Target labels.
:param y_pred: Target predictions.
:param y_true: Target labels (n_examples, ).
:param y_pred: Target predictions (n_examples, ).
:return: Loss.
"""
error = np.mean(np.abs(y_true - y_pred))
return error


def r2_score(y_true: np.ndarray, y_pred: np.ndarray) -> float:
"""
Calculate R-squared.
:param y_true: Target labels (n_examples, ).
:param y_pred: Target predictions (n_examples, ).
:return: R-squared score.
"""

tss = np.sum((y_true - np.mean(y_true))**2)
rss = np.sum((y_true - y_pred)**2)

r_squared = 1 - rss / tss
return r_squared
Binary file not shown.
63 changes: 63 additions & 0 deletions tests/metrics/regression/test_r2_score.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import numpy as np

from src.metrics import r2_score


def test_r2_identical_arrays():
y_true = np.array([1, 2, 3, 4, 5])
y_pred = np.array([1, 2, 3, 4, 5])

expected_r2 = 1.0
r2 = r2_score(y_true, y_pred)

assert np.isclose(expected_r2, r2, atol=1e-5, rtol=1e-5)


def test_r2_shifted_arrays():
y_true = np.array([1, 2, 3, 4, 5])
y_pred = np.array([2, 3, 4, 5, 6])

expected_r2 = 0.5
r2 = r2_score(y_true, y_pred)

assert np.isclose(expected_r2, r2, atol=1e-5, rtol=1e-5)


def test_r2_reversed_arrays():
y_true = np.array([1, 2, 3, 4, 5])
y_pred = np.array([5, 4, 3, 2, 1])

expected_r2 = -3.0
r2 = r2_score(y_true, y_pred)

assert np.isclose(expected_r2, r2, atol=1e-5, rtol=1e-5)


def test_r2_large_numbers():
y_true = np.array([10, 20, 30, 40, 50])
y_pred = np.array([15, 25, 35, 45, 55])

expected_r2 = 0.875
r2 = r2_score(y_true, y_pred)

assert np.isclose(expected_r2, r2, atol=1e-5, rtol=1e-5)


def test_mse_decimal_numbers():
y_true = np.array([0.5, 0.6, 0.7, 0.8, 0.9])
y_pred = np.array([0.6, 0.7, 0.8, 0.9, 1.0])

expected_r2 = 0.5
r2 = r2_score(y_true, y_pred)

assert np.isclose(expected_r2, r2, atol=1e-5, rtol=1e-5)


def test_mse_with_outliers():
y_true = np.array([10, 20, 30, 40, 135])
y_pred = np.array([15, 25, 35, 45, 55])

expected_r2 = 1 - 6500 / 10180
r2 = r2_score(y_true, y_pred)

assert np.isclose(expected_r2, r2, atol=1e-5, rtol=1e-5)

0 comments on commit 58920c8

Please sign in to comment.