Skip to content
This repository was archived by the owner on Jun 16, 2023. It is now read-only.

Statistics Playground

Ahmed Elsafty edited this page Sep 10, 2018 · 2 revisions
import os
files = []
for root, _, fnames in os.walk("No Videos"):
    files.extend([os.path.join(root, filename) for filename in fnames if filename.endswith('.csv')])
%%capture
import pandas as pd
df = pd.DataFrame()
list_ = []
for f in files:
    fpd = pd.read_csv(f,index_col=None, header=0)
    fpd.participantId = os.path.dirname(f).split('/')[-1]
    list_.append(fpd)
frame = pd.concat(list_)
frame.head()
participantId condition timeSinceStartup correctNodeHit keypressed calibrationData bubbleSize numberNodes targetNode currentSelectedNode currentState correctedEyeX correctedEyeY rawEyeX rawEyeY
0 1 NaN 3.825378 NaN HAPRING_TIP noCalibrationDataSet NaN 0.0 NaN -1.0 Introduction 0.0 0.0 960.0 540.0
1 1 NaN 7.018346 NaN HAPRING_TIP noCalibrationDataSet NaN 0.0 NaN -1.0 Introduction 0.0 0.0 960.0 540.0
2 1 NaN 7.033599 NaN NaN noCalibrationDataSet NaN 0.0 NaN -1.0 Introduction 0.0 0.0 960.0 540.0
3 1 NaN 7.034281 NaN NaN noCalibrationDataSet NaN 0.0 NaN -1.0 Introduction 0.0 0.0 960.0 540.0
4 1 NaN 7.036498 NaN NaN noCalibrationDataSet NaN 0.0 NaN -1.0 Introduction 0.0 0.0 960.0 540.0
frame.condition.unique()
array([nan, 'WITHCUSTOMCALIB', 'MOUSE', 'EYE', 'noCalibrationDataSet'], dtype=object)
import numpy as np
df  = pd.DataFrame(frame, columns=['participantId', 'condition','timeSinceStartup']).groupby(['participantId', 'condition'])

total_time_df = df.agg({'condition': 'first','timeSinceStartup': lambda x: max(x) - min(x)})
total_time_df = total_time_df[total_time_df.condition.isin(['WITHCUSTOMCALIB', 'MOUSE', 'EYE'])]
import matplotlib.pyplot as plt
plt.style.use('ggplot')

pd.DataFrame.boxplot(total_time_df, by='condition')
plt.show()

png

Clone this wiki locally