-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathols_regression.py
84 lines (67 loc) · 2.46 KB
/
ols_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import numpy as np
import pandas as pd
import seaborn as sns
import warnings
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.compat import lzip
import statsmodels.formula.api as smf
warnings.simplefilter(action='ignore', category=FutureWarning)
print("\nSTATS MODELS | OLS Linear Regression")
# ----------------------------- DATASET
df = sm.datasets.get_rdataset("Guerry", "HistData").data
print(f'{df.columns}\n')
# ----------------------------- COLUMNS
dep = 'Suicides' # dep. variable
col1 = 'Wealth'
col2 = 'Prostitutes'
col3 = 'Distance'
col4 = 'Literacy'
# ----------------------------- MODEL
df2 = df[[dep, col1, col2, col3, col4]].dropna()
y = df2[[dep]]
x = df2[[col1, col2, col3, col4]]
print(f'{round(df2.describe())}\n')
# ----------------------------- FIT
mod = sm.OLS(y, x).fit()
print(mod.summary())
# ----------------------------- GRAPHS | PLOTS
exog_idx = col1 # Change col to drive the diagnostics from 1-3
print(f"\nSelected Regression Exog: {col1}\n")
plot_choose= int(input("Choose plot category:\n"
"1. Single Variable Regression Diagnostics {exog}\n"
"2. Plot fit against one regressor {exog}\n"
"3. Plot of influence in regression {exog}\n"
"4. Complete Pairgrid Plot {df2}\n"))
doc_choose = input("Show DOC (0/1):\n").lower()
plt.rc("figure", figsize=(12, 8))
plt.rc("font", size=10)
# ----------------------------- Single Variable Regression Diagnostics
if plot_choose == 1:
fig = sm.graphics.plot_regress_exog(mod, exog_idx)
fig.tight_layout(pad=1.0)
plt.show()
if doc_choose == "1":
print(sm.graphics.plot_regress_exog.__doc__)
# ----------------------------- Fit Plot
if plot_choose == 2:
fig = sm.graphics.plot_fit(mod, exog_idx)
fig.tight_layout(pad=1.0)
plt.show()
if doc_choose == "1":
print(sm.graphics.plot_fit.__doc__)
# ----------------------------- Influence Plot
if plot_choose == 3:
fig = sm.graphics.influence_plot(mod, exog_idx)
fig.tight_layout(pad=1.0)
plt.show()
if doc_choose == "1":
print(sm.graphics.influence_plot.__doc__)
# ----------------------------- Complete Pairgrid Plot
if plot_choose == 4:
g = sns.PairGrid(df2, diag_sharey=False, corner=True)
g.fig.set_size_inches(12, 8)
g.map_upper(sns.scatterplot)
g.map_lower(sns.kdeplot)
g.map_diag(sns.kdeplot)
plt.show()