-
Notifications
You must be signed in to change notification settings - Fork 0
/
MultiUserLoanEligibilityPredictor.py
88 lines (65 loc) · 3.81 KB
/
MultiUserLoanEligibilityPredictor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import joblib
import os
import numpy as np
import pandas as pd
# Importing for logging purpose
import logging
from log_config import configure_logger
# Configure logger
configure_logger()
# Get logger
logger = logging.getLogger(__name__)
UPLOAD_FOLDER = 'output'
def predictor(test_df, singleuser=False):
try:
logger.debug('In the MultiUserLoanEligibilityPredictor.predictor')
# Loading datapreProcessor to disk for preprocessing the data
preprocessor_filename = os.path.join(UPLOAD_FOLDER, 'dataPreProcessing.pkl')
preprocessor = joblib.load(preprocessor_filename)
logger.debug('Loaded the preprocessor object')
# Performing data pre processing on test data
preprocessed_df = preprocessor.transform(test_df)
# Converting the columns to numeric for further processing - Starts
numeric_columns_to_be_imputed = ['Credit Score', 'Years in current job', 'Annual Income', 'Monthly Debt', 'Months since last delinquent',
'Maximum Open Credit','Bankruptcies', 'Tax Liens']
columns_for_outlier_treatment = ['Current Loan Amount', 'Credit Score', 'Annual Income', 'Monthly Debt', 'Current Credit Balance',
'Maximum Open Credit']
for col in columns_for_outlier_treatment:
preprocessed_df[col] = pd.to_numeric(preprocessed_df[col], errors='coerce')
for col in numeric_columns_to_be_imputed:
preprocessed_df[col] = pd.to_numeric(preprocessed_df[col], errors='coerce')
# Converting the columns to numeric for further processing - Ends
# Loading imputeNumericalValues to disk for imputing the values
imputer_filename = os.path.join(UPLOAD_FOLDER, 'imputeNumericalValues.pkl')
imputer = joblib.load(imputer_filename)
logger.debug('Loaded the imputer object')
# Imputing the missing values in the dataset for prediction
imputer_output_df = imputer.transform(preprocessed_df)
# Loading outlierTreatment to disk for imputing the outlier values
outlierProcessor_filename = os.path.join(UPLOAD_FOLDER, 'outlierTreatment.pkl')
outlierProcessor = joblib.load(outlierProcessor_filename)
logger.debug('Loaded the outlierProcessor object')
# Updating outlier values in the test dataset
outlier_treatment_df = outlierProcessor.transform(imputer_output_df)
# Loading categoricalEncoding to disk for Encoding the categorical values
encoder_filename = os.path.join(UPLOAD_FOLDER, 'categoricalEncoding.pkl')
encoder = joblib.load(encoder_filename)
logger.debug('Loaded the encoder object')
# Encoding the test data so that it can be utilized for inference
encoded_df = encoder.transform(outlier_treatment_df)
# Loading XGB model to disk for prediction
model_filename = os.path.join(UPLOAD_FOLDER, 'xgb_threshold_model.pkl')
model = joblib.load(model_filename)
logger.debug('Loaded the model object')
# Predicting the target variable using trained and loaded model
test_prediction = model.predict(encoded_df)
if not singleuser:
output_df = test_df.copy()
output_df['Loan Status'] = test_prediction
output_df['Loan Status'] = output_df['Loan Status'].replace({1: 'Loan Approved', 0: 'Loan Rejected'})
else:
return test_prediction[0]
except Exception as e:
logger.debug(f'Exception in MultiUserLoanEligibilityPrediction.predictor {e}')
else:
return output_df