Skip to content

Commit 935cfa6

Browse files
authored
Update demo.py
1 parent 28013a0 commit 935cfa6

File tree

1 file changed

+350
-24
lines changed

1 file changed

+350
-24
lines changed

demo.py

Lines changed: 350 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,354 @@
1-
import os
2-
import pandas as pd
3-
4-
folderBase = os.getcwd()
5-
dataFolderName = "cleanData"
6-
dataFolderPath = os.path.join(folderBase,dataFolderName)
7-
outputData_clean = pd.DataFrame(columns=["Participant ID","Participant Group","ListID","Presented Word","Present Position","Recalled Word","Recall Position","Reaction Time","Hit"])
8-
outputData_intrusion = pd.DataFrame(columns=["Participant ID","Participant Group","ListID","Presented Word","Present Position","Recalled Word","Recall Position","Reaction Time","Hit"])
9-
10-
for file in os.listdir(dataFolderPath):
11-
participantId = file.split("-")[0]
12-
participantGroup = file.split("-")[1][:3]
13-
inputData = pd.read_csv(os.path.join(dataFolderPath,file))
14-
15-
if "clean" in file:
16-
outputData_clean = pd.concat([inputData, outputData_clean], ignore_index=True)
17-
outputData_clean["Participant ID"].fillna(f"{participantId}",inplace=True)
18-
outputData_clean["Participant Group"].fillna(f"{participantGroup}",inplace=True)
1+
import os
2+
import pandas as pd
3+
import Levenshtein as lv
4+
import unicodedata
5+
import numpy as np
6+
from openpyxl.utils import column_index_from_string as cifs
7+
import configparser
8+
import logging
9+
import time
10+
11+
columnsNumbers = []
12+
logger = logging.getLogger(__name__)
13+
logging.basicConfig(
14+
filename=f"{time.strftime("%d_%H_%M", time.localtime())}_demo.log",
15+
encoding="utf-8",
16+
filemode="a",
17+
format="{asctime} - {levelname} - {message}",
18+
style="{",
19+
datefmt="%Y-%m-%d %H:%M",
20+
level = 10
21+
)
22+
23+
RED = '\033[91m'
24+
GREEN = '\033[92m'
25+
BLUE = '\033[94m'
26+
RESET = '\033[0m'
27+
28+
29+
def normalize(text): #normalize to unicode, latin letters
30+
return (
31+
unicodedata.normalize('NFKD', text.upper())
32+
.replace('İ', 'I')
33+
.replace('Ş', 'S')
34+
.replace('Ğ', 'G')
35+
.replace('Ü', 'U')
36+
.replace('Ö', 'O') #İYİLİK - IYILIG
37+
.replace('Ç', 'C')
38+
.encode('ASCII', 'ignore')
39+
.decode('utf-8')
40+
)
41+
def listCSV(lst):
42+
listOut = []
43+
for file in lst:
44+
if file.endswith(".csv"):
45+
listOut.append(file)
46+
else:
47+
continue
48+
return listOut
49+
50+
def levenshtein(word, target):
51+
word = normalize(word)
52+
target = normalize(target) #Normalize input
53+
dist = lv.distance(word, target)
54+
if dist <= numTolerate:
55+
return True
56+
57+
58+
def convertLetterstoNumbers(lst): #For converting the letter inputs into values we can iterate through,
59+
#and that will match with the indexing of the data
60+
columnsNumbers.clear()
61+
for str in lst:
62+
idX = cifs(str)-1
63+
columnsNumbers.append(idX)
64+
logger.info(f"Columns converted to index numbers. \n Numbers:: {columnsNumbers}")
65+
66+
def dropColumns(x): #for removing the columns we did not want
67+
tempRemove=[]
68+
for i in range(len(x.columns)):
69+
if i not in columnsNumbers:
70+
tempRemove.append(x.columns[i])
71+
else:
72+
continue
73+
for i in tempRemove:
74+
x = x.drop(columns=[f"{i}"])
75+
logger.info("Columns Dropped!")
76+
return x
77+
78+
def columnStrip(df,col,char):
79+
length = len(df[col])
80+
for i in range(length):
81+
df.loc[i, col] = df.loc[i, col].strip(char)
82+
return df
83+
84+
def noneFound():
85+
pass
86+
87+
def findColumnTypes(df, searchedType):
88+
temp = [] #returns names of columns with desired ending
89+
for column in df.columns:
90+
if column.endswith(searchedType):
91+
temp.append(column)
92+
93+
if len(temp) <4: #rt are not
94+
logger.error(f"{temp}, unsatisfactory column count.")
95+
return temp
96+
logger.info(f"Columns found for: {searchedType}, \n and are {temp}")
97+
return temp
98+
99+
def isOneCellMP(cell): #checks if a single cell is one cell multiple response type
100+
101+
if type(cell) == str:
102+
splitList = cell.split(" ")
103+
if len(splitList) > 1 and len(splitList[1]) > 1:
104+
return True
105+
else:
106+
return False
19107

20-
if "intrusion" in file:
21-
outputData_intrusion = pd.concat([inputData, outputData_intrusion], ignore_index=True)
22-
outputData_intrusion["Participant ID"].fillna(f"{participantId}",inplace=True)
23-
outputData_intrusion["Participant Group"].fillna(f"{participantGroup}",inplace=True)
108+
def arrayCombine(array): #combines the inputs in the array
109+
rows = [] #to be used for extracting rt values
110+
combined = []
111+
for i in range(len(array)):
112+
cell = array[i]
113+
if isOneCellMP(cell) == True:
114+
dct = cell.split(" ")
115+
for word in dct:
116+
combined.append(word)
117+
rows.append(i)
118+
#logger.info("Cell Sepeerated")
24119

25-
outputData_clean.to_csv(os.path.join(dataFolderPath,"CombinedClean.csv"), index=False, encoding="utf-8-sig")
26-
outputData_intrusion.to_csv(os.path.join(dataFolderPath,"CombinedIntrusion.csv"), index=False, encoding="utf-8-sig")
120+
if isOneCellMP(cell) == False and type(cell) == str:
121+
combined.append(cell)
122+
rows.append(i)
123+
#logger.info(f"Column seperated and combined: {combined, rows} ")
124+
125+
return combined, rows
126+
# def indexOneCellMultipleResponse(array):
127+
# tmp = array
128+
# output = []
129+
# for i in range(len(tmp)):
130+
# cell = tmp[i]
131+
# if isOneCellMP(cell) == True:
132+
# output.append(i)
133+
# else: continue
27134

135+
# return output
136+
137+
# def OneCellDeconstruct(array):
138+
# newArray = []
139+
# for cellnum in indexOneCellMultipleResponse(array):
140+
# cellSplit = array[cellnum].split(" ")
141+
# for word in cellSplit:
142+
# newArray.append(word)
143+
# return newArray
144+
145+
def createRecallRtimeTable():
146+
RecallRtimeTable = pd.DataFrame(columns=["ListID","Recalled Word","Recall Position","Reaction Time","RT Viable"])
147+
return RecallRtimeTable
148+
149+
def fillRecallRtimeTable(df,table,RecallEnd,ReactionEnd):
150+
temp = table
151+
recallColumnNames = findColumnTypes(df, RecallEnd)
152+
pushtoRow = 0 #which row to push to
153+
for ncol in range(len(recallColumnNames)): #iterate through the recall cols list
154+
recallCol = recallColumnNames[ncol] #get the name, ncol is used to have the listID
155+
array = df[recallCol] #set the array
156+
recallWords, rows = arrayCombine(array) #get combined array. with row ids to use in getting reaction times.
157+
for i in range(len(recallWords)):
158+
temp.loc[pushtoRow, "ListID"] = ncol
159+
temp.loc[pushtoRow, "Recalled Word"] = recallWords[i]
160+
temp.loc[pushtoRow, "Recall Position"] = i+1
161+
temp.loc[pushtoRow, "Reaction Time"] = rows[i]
162+
logger.info(f"To {pushtoRow}:: {ncol,recallWords[i],i+1,rows[i]} pushed.")
163+
pushtoRow += 1
164+
165+
166+
167+
reactionColumnNames = findColumnTypes(df, ReactionEnd) #take the columns with endind
168+
#gives an error when none found, we must take it in mind and create some instance for it.
169+
170+
for i in range(len(temp["Recalled Word"])):
171+
gettable = temp["ListID"][i] #for the given row, take the table id
172+
getindex = temp["Reaction Time"][i] #for the given row, take the row number from reaction time column. where the previous function wrote.
173+
##ERROR
174+
#HANDLE FOR NO REACTION COLUMNS
175+
if len(reactionColumnNames) <5:
176+
reactionCol = [0 for _ in range(500)] #create this so we cann fill and dont have error.
177+
else:
178+
reactionCol = df[reactionColumnNames[gettable]] #take the relevant reaction time column
179+
temp.loc[i,"Reaction Time"] = reactionCol[getindex] #take the value
180+
181+
temp = columnStrip(temp, "Recalled Word", "\n")
182+
183+
return temp
184+
185+
def whichRowstoLists(df, columnName: str, listNum: int):
186+
"""
187+
Returns the row indexes where a list starts and ends.
188+
[start, end, start, end...]
189+
190+
Args:
191+
columnName: string type name, where the function will work
192+
listNum: Number of lists to look for, there is an overflow problem of one extra list. Used for that. Exclude trial list if you have one on this count.
193+
194+
"""
195+
currentListCount = 0
196+
rowNumbersList = []
197+
length = len(df[columnName])
198+
array = df[columnName]
199+
200+
for i in range(length-1):
201+
Left = array[i]
202+
Right = array[i+1]
203+
if pd.isna(Left) == True and pd.isna(Right) == False:
204+
if currentListCount <= listNum:
205+
rowNumbersList.append(i+1)
206+
else:
207+
continue
208+
if pd.isna(Left) == False and pd.isna(Right) == True:
209+
if currentListCount <= listNum:
210+
rowNumbersList.append(i)
211+
currentListCount += 1
212+
else:
213+
continue
214+
return rowNumbersList
215+
216+
def createWordPresentTable():
217+
return pd.DataFrame(columns=["ListID","Words Presented","Present Position"])
218+
219+
def fillWordPresentTable(df, columnName:str, listNum: int, table):
220+
221+
rowNums = whichRowstoLists(df, columnName, listNum)
222+
array = df[columnName]
223+
224+
currentListIndexer = 0 #to generate indexes from rowNums lit
225+
currentListIdentifier = 0 #to write at column
226+
builderRowNum = 0 #for using at .loc
227+
228+
for x in range(listNum+1):
229+
stratRowNum = int(rowNums[currentListIndexer])
230+
endRowNum = int(rowNums[currentListIndexer+1])
231+
232+
for i in range(stratRowNum,endRowNum+1):
233+
table.loc[builderRowNum, "ListID"] = currentListIdentifier
234+
table.loc[builderRowNum, "Words Presented"] = array[i]
235+
table.loc[builderRowNum, "Present Position"] = (i - stratRowNum) + 1
236+
builderRowNum += 1
237+
238+
currentListIndexer += 2
239+
currentListIdentifier += 1
240+
return table
241+
242+
def mergeTables(wordPresent, recall):
243+
newTable = pd.DataFrame(columns=["ListID","Presented Word","Present Position","Recalled Word","Recall Position","Reaction Time","Hit"])
244+
# remainderRecall = recall
245+
currentRow = 0
246+
wordsCol = wordPresent["Words Presented"]
247+
recallCol = recall["Recalled Word"]
248+
249+
for i in range(len(wordsCol)):
250+
presented = normalize(str(wordsCol[i]))
251+
presentedListRelation = wordPresent["ListID"][i]
252+
253+
newTable.loc[currentRow, "ListID"] = wordPresent["ListID"][i]
254+
newTable.loc[currentRow, "Presented Word"] = wordPresent["Words Presented"][i]
255+
newTable.loc[currentRow, "Present Position"] = wordPresent["Present Position"][i]
28256

257+
found = False
258+
259+
toDrop = []
260+
261+
for x in range(len(recallCol)):
262+
recalled = normalize(str(recallCol[x]))
263+
recallListRelation = recall["ListID"][x]
264+
265+
if recalled == presented and recallListRelation == presentedListRelation and found == False:
266+
newTable.loc[currentRow, "Recalled Word"] = recall["Recalled Word"][x]
267+
newTable.loc[currentRow, "Recall Position"] = recall["Recall Position"][x]
268+
newTable.loc[currentRow, "Reaction Time"] = recall["Reaction Time"][x]
269+
newTable.loc[currentRow, "Hit"] = 1 # true hit
270+
toDrop.append(x)
271+
# try:
272+
# remainderRecall = remainderRecall.drop(index = toDrop).reset_index(drop=True) #to take in remaining values
273+
# except Exception as e:
274+
# logger.error(f"While on File: {file}, \n and on function merge \n index = x and array = {remainderRecall}\n faced with Exception: {e}")
275+
276+
currentRow += 1
277+
found = True
278+
279+
280+
if recalled == presented and recallListRelation != presentedListRelation and found == False:
281+
newTable.loc[currentRow, "Recalled Word"] = recall["Recalled Word"][x]
282+
newTable.loc[currentRow, "Recall Position"] = recall["Recall Position"][x]
283+
newTable.loc[currentRow, "Reaction Time"] = recall["Reaction Time"][x]
284+
newTable.loc[currentRow, "Hit"] = 2 #list intrusion
285+
# remainderRecall = remainderRecall.drop(index = x) #to take in remaining values
286+
currentRow += 1
287+
found = True
288+
289+
if found == False:
290+
newTable.loc[currentRow, "Hit"] = 0 #list intrusion
291+
currentRow += 1
292+
293+
return newTable
294+
295+
###Define folders
296+
folderBase = os.getcwd() #/../psychopy-recall-suffix
297+
folderDataRaw = os.path.join(folderBase, "data")
298+
folderRawCSV = listCSV(os.listdir(folderDataRaw))
299+
300+
#have config
301+
pathConfig = os.path.join(folderBase,"config.ini")
302+
config = configparser.ConfigParser()
303+
config.read(pathConfig)
304+
305+
numTolerate = int(config["DEFAULT"]["numTolerate"])
306+
numLists = int(config["DEFAULT"]["numLists"]) # Need to match key name
307+
columnsExtract = config["DEFAULT"].get("columnsExtract").replace(" ", "").split(",")
308+
trialyes = int(config["DEFAULT"]["trialyes"])
309+
310+
if "cleanData" not in os.listdir(folderBase): #create the cleanData folder
311+
os.mkdir("cleanData")
312+
313+
folderDataClean = os.path.join(folderBase, "cleanData")
314+
315+
for file in folderRawCSV: #start the loop, it starts if a data is not already cleaned
316+
317+
fileCleaned = file.replace(".csv","") + "_clean.csv"
318+
fileRemainder = file.replace(".csv","") + "_intrusions.csv"
319+
filePath = os.path.join(folderDataRaw, file)
320+
321+
if fileCleaned not in os.listdir(folderDataClean):
322+
#left here, continue from
323+
try:
324+
dataInput = pd.read_csv(filePath) #take file
325+
326+
convertLetterstoNumbers(columnsExtract) #Now we can turn it to numbers.
327+
#the number array is returned to a global list created at the start.
328+
#that number array is fed into dropColumns
329+
dataInput = dropColumns(dataInput)
330+
331+
recallTable = fillRecallRtimeTable(dataInput, createRecallRtimeTable(),".text",".rt")
332+
wordsTable = fillWordPresentTable(dataInput, "Words", 4, createWordPresentTable())
333+
334+
table = mergeTables(wordsTable,recallTable)
335+
336+
#save the files
337+
338+
output_path = os.path.join(folderDataClean,fileCleaned)
339+
340+
table.to_csv(output_path, index=False, encoding="utf-8-sig")
341+
# remainder.to_csv(os.path.join(folderDataClean,fileRemainder))
342+
343+
print(f"{GREEN}File created!{fileCleaned}{RESET}")
344+
except Exception as e:
345+
print(f"{RED}Faulty file: {file}, passed.{RESET}")
346+
logger.error(f"RAISED EXCEPTION. {e} for file: {file}")
347+
continue
348+
349+
350+
if fileCleaned in os.listdir(folderDataClean):
351+
print(f"{BLUE}File with name: {fileCleaned}, already exists in cleanData as {file}_clean.csv{RESET}")
352+
353+
continue
354+

0 commit comments

Comments
 (0)