add a datsets option to specify the datset you want, add a plot script
This commit is contained in:
48
analyze.py
Normal file
48
analyze.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import csv
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy import stats
|
||||
import pandas as pd
|
||||
|
||||
def plot(l):
|
||||
labels = ['0-10k', '10k-20k,', '20k-30k', '30k-40k', '40k-50k', '50k-60k', '60k-70k']
|
||||
l = [i/15625 for i in l]
|
||||
l = l[:7]
|
||||
plt.bar(labels, l)
|
||||
plt.savefig('plot.png')
|
||||
|
||||
def analyse(filename):
|
||||
l = [0 for i in range(10)]
|
||||
scores = []
|
||||
count = 0
|
||||
best_value = -1
|
||||
with open(filename) as file:
|
||||
reader = csv.reader(file)
|
||||
header = next(reader)
|
||||
data = [row for row in reader]
|
||||
|
||||
for row in data:
|
||||
score = row[0]
|
||||
best_value = max(best_value, float(score))
|
||||
# print(score)
|
||||
ind = float(score) // 10000
|
||||
ind = int(ind)
|
||||
l[ind] += 1
|
||||
acc = row[1]
|
||||
index = row[2]
|
||||
datas = list(zip(score, acc, index))
|
||||
scores.append(score)
|
||||
print(max(scores))
|
||||
results = pd.DataFrame(datas, columns=['swap_score', 'valid_acc', 'index'])
|
||||
print(results['swap_score'].max())
|
||||
print(best_value)
|
||||
plot(l)
|
||||
return stats.spearmanr(results.swap_score, results.valid_acc)[0]
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(analyse('output/swap_results.csv'))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user