This repository was archived by the owner on Jun 16, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
Statistics Playground
Ahmed Elsafty edited this page Sep 10, 2018
·
2 revisions
import os
files = []
for root, _, fnames in os.walk("No Videos"):
files.extend([os.path.join(root, filename) for filename in fnames if filename.endswith('.csv')])
%%capture
import pandas as pd
df = pd.DataFrame()
list_ = []
for f in files:
fpd = pd.read_csv(f,index_col=None, header=0)
fpd.participantId = os.path.dirname(f).split('/')[-1]
list_.append(fpd)
frame = pd.concat(list_)
frame.head()
participantId | condition | timeSinceStartup | correctNodeHit | keypressed | calibrationData | bubbleSize | numberNodes | targetNode | currentSelectedNode | currentState | correctedEyeX | correctedEyeY | rawEyeX | rawEyeY | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | NaN | 3.825378 | NaN | HAPRING_TIP | noCalibrationDataSet | NaN | 0.0 | NaN | -1.0 | Introduction | 0.0 | 0.0 | 960.0 | 540.0 |
1 | 1 | NaN | 7.018346 | NaN | HAPRING_TIP | noCalibrationDataSet | NaN | 0.0 | NaN | -1.0 | Introduction | 0.0 | 0.0 | 960.0 | 540.0 |
2 | 1 | NaN | 7.033599 | NaN | NaN | noCalibrationDataSet | NaN | 0.0 | NaN | -1.0 | Introduction | 0.0 | 0.0 | 960.0 | 540.0 |
3 | 1 | NaN | 7.034281 | NaN | NaN | noCalibrationDataSet | NaN | 0.0 | NaN | -1.0 | Introduction | 0.0 | 0.0 | 960.0 | 540.0 |
4 | 1 | NaN | 7.036498 | NaN | NaN | noCalibrationDataSet | NaN | 0.0 | NaN | -1.0 | Introduction | 0.0 | 0.0 | 960.0 | 540.0 |
frame.condition.unique()
array([nan, 'WITHCUSTOMCALIB', 'MOUSE', 'EYE', 'noCalibrationDataSet'], dtype=object)
import numpy as np
df = pd.DataFrame(frame, columns=['participantId', 'condition','timeSinceStartup']).groupby(['participantId', 'condition'])
total_time_df = df.agg({'condition': 'first','timeSinceStartup': lambda x: max(x) - min(x)})
total_time_df = total_time_df[total_time_df.condition.isin(['WITHCUSTOMCALIB', 'MOUSE', 'EYE'])]
import matplotlib.pyplot as plt
plt.style.use('ggplot')
pd.DataFrame.boxplot(total_time_df, by='condition')
plt.show()
So science, much wiki.