-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathSpeaR.py
129 lines (103 loc) · 4.6 KB
/
SpeaR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# -*- coding: utf-8 -*-
#!/usr/bin/python
# Author: Niam Moltta
# UY - 2017
# Spearman Correlation Coefficient
import numpy as np
from scipy.stats import spearmanr
import matplotlib.pylab as plt
import re
import seaborn
from sklearn import preprocessing
import pandas as pd
print ' '
print ' '
print ' Welcome to SpeaR.py'
print ' - by Niam Moltta -'
print ' ~~/\//V\ '
print ' '
print ' '
print ' '
print "Application: SPEARMAN'S CORRELATION COEFFICIENT.\n\nINSTRUCTIONS:\n\n- Select file, select two columns.\n- Returns Spearman's correlation coefficient and p-value.\n- Returns graph of correlation relationship.\n\n"
fhand = raw_input('Enter .csv file name: ')
print ' '
if fhand == '':
print ' '
print 'Arrivederci!'
print ' '
exit()
filecsv = str(fhand)
data = pd.read_csv(filecsv)
print ' '
frame = pd.DataFrame(data)
colist = frame.columns
columns = np.asarray(colist)
while True:
print ' '
print 'Columns in', re.findall('(.+?).csv', filecsv), 'are:\n'
print columns
print ' '
hand = raw_input('Enter column header for variable x: ')
column1 = str(hand)
print ' '
if (column1 == 'ya') | (column1 == ''):
break
else:
hand2 = raw_input('Enter column header for variable y: ')
column2 = str(hand2)
print ' '
if (column2 == 'ya') | (column2 == ''):
break
else:
print ' --------------------------------------------------------- '
print "Calculating correlation for:\n", column1,"and", column2
print ' --------------------------------------------------------- '
C1 = data[column1]
C2 = data[column2]
x = np.asarray(C1)
y = np.asarray(C2)
# Calculate the Spearman coefficient and the p-value for testing non-correlation
Spear = spearmanr(x, y)
if (Spear[0] == 1)|(Spear[0] == -1):
print "Spearman's Correlation =", Spear[0]
print ' '
else:
print "Spearman's Correlation =", Spear[0]
print ' '
print 'p-value =', Spear[1]
print ' '
Coef = Spear[0]
pval = Spear[1]
r2 = str(Coef)
p = str(pval)
pvalue = 'p-value = '+ p
R2 = "Spearman's = "+ r2
xcums = np.cumsum(x)
ycums = np.cumsum(y)
yc = sorted(ycums, reverse=True)
if Coef < 0 :
plt.plot(xcums, 'g', label=column1)
plt.plot(yc, 'b', label=column2)
plt.title(R2)
plt.xlabel(pvalue)
plt.ylabel("Correlation")
print ('To continue, you must save the figure and close it, or just close it. You can also zoom in it or move the graph to see it better, use the buttons.\n')
plt.legend()
plt.show()
print ' '
else:
plt.plot(xcums, 'g', label=column1)
plt.plot(ycums, 'b', label=column2)
plt.title(R2)
plt.xlabel(pvalue)
plt.ylabel("Correlation")
print ('To continue, you must save the figure and close it, or just close it. You can also zoom in it or move the graph to see it better, use the buttons.\n')
plt.legend()
plt.show()
print ' '
'''The Spearman correlation is a nonparametric measure of the monotonicity of the relationship between two datasets. Unlike the Pearson correlation, the Spearman correlation does not assume that both datasets are normally distributed. Like other correlation coefficients, this one varies between -1 and +1 with 0 implying no correlation. Correlations of -1 or +1 imply an exact monotonic relationship. Positive correlations imply that as x increases, so does y. Negative correlations imply that as x increases, y decreases.
The p-value roughly indicates the probability of an uncorrelated system producing datasets that have a Spearman correlation at least as extreme as the one computed from these datasets. The p-values are not entirely reliable but are probably reasonable for datasets larger than 500 or so.'''
print ' '
print 'Hasta la vista, human.'
print ' '
exit()