-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtime_random_forest.py
109 lines (93 loc) · 2.77 KB
/
time_random_forest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from multiprocessing import cpu_count
from adaXT.random_forest import RandomForest
from adaXT.criteria import Gini_index, Squared_error, Entropy
import matplotlib.pyplot as plt
import time
import numpy as np
def plot_running_time(n_jobs, running_time, ax, title):
ax.plot(n_jobs, running_time, title=title)
def get_regression_data(
n,
m,
random_state: np.random.RandomState,
lowx=0,
highx=100,
lowy=0,
highy=5):
X = random_state.uniform(lowx, highx, (n, m))
Y = random_state.uniform(lowy, highy, n)
return (X, Y)
def get_classification_data(
n,
m,
random_state: np.random.RandomState,
lowx=0,
highx=100,
lowy=0,
highy=5):
X = random_state.uniform(lowx, highx, (n, m))
Y = random_state.randint(lowy, highy, n)
return (X, Y)
def run_gini_index(X, Y, n_jobs, n_estimators):
forest = RandomForest(
forest_type="Classification",
criteria=Gini_index,
n_estimators=n_estimators,
n_jobs=n_jobs,
)
st = time.time()
forest.fit(X, Y)
et = time.time()
return et - st
def run_entropy(X, Y, n_jobs, n_estimators):
forest = RandomForest(
forest_type="Classification",
criteria=Entropy,
n_estimators=n_estimators,
n_jobs=n_jobs,
)
st = time.time()
forest.fit(X, Y)
et = time.time()
return et - st
def run_squared_error(X, Y, n_jobs, n_estimators):
forest = RandomForest(
forest_type="Regression",
criteria=Squared_error,
n_estimators=n_estimators,
n_jobs=n_jobs,
)
st = time.time()
forest.fit(X, Y)
et = time.time()
return et - st
def running_time(n, m, random_state, n_jobs, n_estimators):
X_cla, Y_cla = get_classification_data(n, m, random_state=random_state)
X_reg, Y_reg = get_regression_data(n, m, random_state=random_state)
return [
run_entropy(X_cla, Y_cla, n_jobs=n_jobs, n_estimators=n_estimators),
run_gini_index(X_cla, Y_cla, n_jobs=n_jobs, n_estimators=n_estimators),
run_squared_error(X_reg, Y_reg, n_jobs=n_jobs, n_estimators=n_estimators),
]
if __name__ == "__main__":
random_state = np.random.RandomState(2024)
n_jobs = []
mean_running_times = []
n = 10000
m = 4
n_estimators = 100
for i in range(1, cpu_count()):
print(f"njobs = {i}")
n_jobs.append(i)
running_times = []
for _ in range(1):
running_times.append(
running_time(
n,
m,
random_state,
i,
n_estimators))
mean_running_times.append(np.mean(running_times, axis=0))
plt.plot(n_jobs, mean_running_times)
plt.show()