@@ -117,29 +117,29 @@ library(excluder)
117
117
df <- qualtrics_text %> %
118
118
mark_preview() %> %
119
119
mark_duration(min_duration = 200 )
120
- # > i 2 rows were collected as previews. It is highly recommended to exclude these rows before further processing .
121
- # > i 23 out of 100 rows took less time than 200.
120
+ # > 2 out of 100 rows were collected as previews. It is highly recommended to exclude these rows before further checking .
121
+ # > 23 out of 100 rows took less time than the minimum duration of 200 seconds .
122
122
tibble :: glimpse(df )
123
123
# > Rows: 100
124
124
# > Columns: 18
125
- # > $ StartDate <dttm> 2020-12-11 12:06:52, 2020-12-11 12:06:43, 202~
126
- # > $ EndDate <dttm> 2020-12-11 12:10:30, 2020-12-11 12:11:27, 202~
127
- # > $ Status <chr> "Survey Preview", "Survey Preview", "IP Addres~
128
- # > $ IPAddress <chr> NA, NA, "73.23.43.0", "16.140.105.0", "107.57.~
129
- # > $ Progress <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 1~
130
- # > $ `Duration (in seconds)` <dbl> 465, 545, 651, 409, 140, 213, 177, 662, 296, 2~
131
- # > $ Finished <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE~
132
- # > $ RecordedDate <dttm> 2020-12-11 12:10:30, 2020-12-11 12:11:27, 202~
133
- # > $ ResponseId <chr> "R_xLWiuPaNuURSFXY", "R_Q5lqYw6emJQZx2o", "R_f~
134
- # > $ LocationLatitude <dbl> 29.73694, 39.74107, 34.03852, 44.96581, 27.980~
135
- # > $ LocationLongitude <dbl> -94.97599, -121.82490, -118.25739, -93.07187, ~
136
- # > $ UserLanguage <chr> "EN", "EN", "EN", "EN", "EN", "EN", "EN", "EN"~
137
- # > $ Browser <chr> "Chrome", "Chrome", "Chrome", "Chrome", "Chrom~
138
- # > $ Version <chr> "88.0.4324.41", "88.0.4324.50", "87.0.4280.88"~
139
- # > $ `Operating System` <chr> "Windows NT 10.0", "Macintosh", "Windows NT 10~
140
- # > $ Resolution <chr> "1366x768", "1680x1050", "1366x768", "1536x864~
141
- # > $ exclusion_preview <chr> "preview", "preview", "", "", "", "", "", "", ~
142
- # > $ exclusion_duration <chr> "", "", "", "", "duration_quick ", "", "duratio~
125
+ # > $ StartDate <dttm> 2020-12-11 12:06:52, 2020-12-11 12:06:43, 202…
126
+ # > $ EndDate <dttm> 2020-12-11 12:10:30, 2020-12-11 12:11:27, 202…
127
+ # > $ Status <chr> "Survey Preview", "Survey Preview", "IP Addres…
128
+ # > $ IPAddress <chr> NA, NA, "73.23.43.0", "16.140.105.0", "107.57.…
129
+ # > $ Progress <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 1…
130
+ # > $ `Duration (in seconds)` <dbl> 465, 545, 651, 409, 140, 213, 177, 662, 296, 2…
131
+ # > $ Finished <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE…
132
+ # > $ RecordedDate <dttm> 2020-12-11 12:10:30, 2020-12-11 12:11:27, 202…
133
+ # > $ ResponseId <chr> "R_xLWiuPaNuURSFXY", "R_Q5lqYw6emJQZx2o", "R_f…
134
+ # > $ LocationLatitude <dbl> 29.73694, 39.74107, 34.03852, 44.96581, 27.980…
135
+ # > $ LocationLongitude <dbl> -94.97599, -121.82490, -118.25739, -93.07187, …
136
+ # > $ UserLanguage <chr> "EN", "EN", "EN", "EN", "EN", "EN", "EN", "EN"…
137
+ # > $ Browser <chr> "Chrome", "Chrome", "Chrome", "Chrome", "Chrom…
138
+ # > $ Version <chr> "88.0.4324.41", "88.0.4324.50", "87.0.4280.88"…
139
+ # > $ `Operating System` <chr> "Windows NT 10.0", "Macintosh", "Windows NT 10…
140
+ # > $ Resolution <chr> "1366x768", "1680x1050", "1366x768", "1536x864…
141
+ # > $ exclusion_preview <chr> "preview", "preview", "", "", "", "", "", "", …
142
+ # > $ exclusion_duration <chr> "", "", "", "", "duration ", "", "duration", ""…
143
143
```
144
144
145
145
Use the
@@ -152,28 +152,28 @@ df <- qualtrics_text %>%
152
152
mark_preview() %> %
153
153
mark_duration(min_duration = 200 ) %> %
154
154
unite_exclusions()
155
- # > i 2 rows were collected as previews. It is highly recommended to exclude these rows before further processing .
156
- # > i 23 out of 100 rows took less time than 200.
155
+ # > 2 out of 100 rows were collected as previews. It is highly recommended to exclude these rows before further checking .
156
+ # > 23 out of 100 rows took less time than the minimum duration of 200 seconds .
157
157
tibble :: glimpse(df )
158
158
# > Rows: 100
159
159
# > Columns: 17
160
- # > $ StartDate <dttm> 2020-12-11 12:06:52, 2020-12-11 12:06:43, 202~
161
- # > $ EndDate <dttm> 2020-12-11 12:10:30, 2020-12-11 12:11:27, 202~
162
- # > $ Status <chr> "Survey Preview", "Survey Preview", "IP Addres~
163
- # > $ IPAddress <chr> NA, NA, "73.23.43.0", "16.140.105.0", "107.57.~
164
- # > $ Progress <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 1~
165
- # > $ `Duration (in seconds)` <dbl> 465, 545, 651, 409, 140, 213, 177, 662, 296, 2~
166
- # > $ Finished <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE~
167
- # > $ RecordedDate <dttm> 2020-12-11 12:10:30, 2020-12-11 12:11:27, 202~
168
- # > $ ResponseId <chr> "R_xLWiuPaNuURSFXY", "R_Q5lqYw6emJQZx2o", "R_f~
169
- # > $ LocationLatitude <dbl> 29.73694, 39.74107, 34.03852, 44.96581, 27.980~
170
- # > $ LocationLongitude <dbl> -94.97599, -121.82490, -118.25739, -93.07187, ~
171
- # > $ UserLanguage <chr> "EN", "EN", "EN", "EN", "EN", "EN", "EN", "EN"~
172
- # > $ Browser <chr> "Chrome", "Chrome", "Chrome", "Chrome", "Chrom~
173
- # > $ Version <chr> "88.0.4324.41", "88.0.4324.50", "87.0.4280.88"~
174
- # > $ `Operating System` <chr> "Windows NT 10.0", "Macintosh", "Windows NT 10~
175
- # > $ Resolution <chr> "1366x768", "1680x1050", "1366x768", "1536x864~
176
- # > $ exclusions <chr> "preview", "preview", "", "", "duration_quick"~
160
+ # > $ StartDate <dttm> 2020-12-11 12:06:52, 2020-12-11 12:06:43, 202…
161
+ # > $ EndDate <dttm> 2020-12-11 12:10:30, 2020-12-11 12:11:27, 202…
162
+ # > $ Status <chr> "Survey Preview", "Survey Preview", "IP Addres…
163
+ # > $ IPAddress <chr> NA, NA, "73.23.43.0", "16.140.105.0", "107.57.…
164
+ # > $ Progress <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 1…
165
+ # > $ `Duration (in seconds)` <dbl> 465, 545, 651, 409, 140, 213, 177, 662, 296, 2…
166
+ # > $ Finished <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE…
167
+ # > $ RecordedDate <dttm> 2020-12-11 12:10:30, 2020-12-11 12:11:27, 202…
168
+ # > $ ResponseId <chr> "R_xLWiuPaNuURSFXY", "R_Q5lqYw6emJQZx2o", "R_f…
169
+ # > $ LocationLatitude <dbl> 29.73694, 39.74107, 34.03852, 44.96581, 27.980…
170
+ # > $ LocationLongitude <dbl> -94.97599, -121.82490, -118.25739, -93.07187, …
171
+ # > $ UserLanguage <chr> "EN", "EN", "EN", "EN", "EN", "EN", "EN", "EN"…
172
+ # > $ Browser <chr> "Chrome", "Chrome", "Chrome", "Chrome", "Chrom…
173
+ # > $ Version <chr> "88.0.4324.41", "88.0.4324.50", "87.0.4280.88"…
174
+ # > $ `Operating System` <chr> "Windows NT 10.0", "Macintosh", "Windows NT 10…
175
+ # > $ Resolution <chr> "1366x768", "1680x1050", "1366x768", "1536x864…
176
+ # > $ exclusions <chr> "preview, ", "preview, ", ", ", ", ", ",duration",…
177
177
```
178
178
179
179
### Checking
@@ -188,7 +188,7 @@ criterion.
188
188
# Check for preview rows
189
189
qualtrics_text %> %
190
190
check_preview()
191
- # > i 2 rows were collected as previews. It is highly recommended to exclude these rows before further processing .
191
+ # > 2 out of 100 rows were collected as previews. It is highly recommended to exclude these rows before further checking .
192
192
# > StartDate EndDate Status IPAddress Progress
193
193
# > 1 2020-12-11 12:06:52 2020-12-11 12:10:30 Survey Preview <NA> 100
194
194
# > 2 2020-12-11 12:06:43 2020-12-11 12:11:27 Survey Preview <NA> 100
@@ -216,8 +216,10 @@ of rows meeting the exclusion criteria.
216
216
df <- qualtrics_text %> %
217
217
exclude_duration(min_duration = 100 ) %> %
218
218
exclude_progress()
219
- # > i 4 out of 100 rows of short and/or long duration were excluded, leaving 96 rows.
220
- # > i 4 out of 96 rows with incomplete progress were excluded, leaving 92 rows.
219
+ # > 4 out of 100 rows took less time than the minimum duration of 100 seconds.
220
+ # > 4 out of 100 duplicate rows were excluded, leaving 96 rows.
221
+ # > 4 out of 96 rows did not complete the study.
222
+ # > 4 out of 96 duplicate rows were excluded, leaving 92 rows.
221
223
dim(df )
222
224
# > [1] 92 16
223
225
```
@@ -227,8 +229,10 @@ dim(df)
227
229
df <- qualtrics_text %> %
228
230
exclude_progress() %> %
229
231
exclude_duration(min_duration = 100 )
230
- # > i 6 out of 100 rows with incomplete progress were excluded, leaving 94 rows.
231
- # > i 2 out of 94 rows of short and/or long duration were excluded, leaving 92 rows.
232
+ # > 6 out of 100 rows did not complete the study.
233
+ # > 6 out of 100 duplicate rows were excluded, leaving 94 rows.
234
+ # > 2 out of 94 rows took less time than the minimum duration of 100 seconds.
235
+ # > 2 out of 94 duplicate rows were excluded, leaving 92 rows.
232
236
dim(df )
233
237
# > [1] 92 16
234
238
```
@@ -248,13 +252,23 @@ df <- qualtrics_text %>%
248
252
exclude_resolution() %> %
249
253
exclude_ip() %> %
250
254
exclude_location()
251
- # > i 2 out of 100 preview rows were excluded, leaving 98 rows.
252
- # > i 6 out of 98 rows with incomplete progress were excluded, leaving 92 rows.
253
- # > i 9 out of 92 duplicate rows were excluded, leaving 83 rows.
254
- # > i 2 out of 83 rows of short and/or long duration were excluded, leaving 81 rows.
255
- # > i 4 out of 81 rows with unacceptable screen resolution were excluded, leaving 77 rows.
256
- # > i 2 out of 77 rows with IP addresses outside of the specified country were excluded, leaving 75 rows.
257
- # > i 4 out of 75 rows outside of the US were excluded, leaving 71 rows.
255
+ # > 2 out of 100 rows were collected as previews. It is highly recommended to exclude these rows before further checking.
256
+ # > 2 out of 100 duplicate rows were excluded, leaving 98 rows.
257
+ # > 6 out of 98 rows did not complete the study.
258
+ # > 6 out of 98 duplicate rows were excluded, leaving 92 rows.
259
+ # > 6 out of 92 rows have duplicate IP addresses.
260
+ # > 0 NAs were found in location.
261
+ # > 9 out of 91 rows have duplicate locations.
262
+ # > 9 out of 92 duplicate rows were excluded, leaving 83 rows.
263
+ # > 2 out of 83 rows took less time than the minimum duration of 100 seconds.
264
+ # > 2 out of 83 duplicate rows were excluded, leaving 81 rows.
265
+ # > 4 out of 81 rows have screen resolution width less than 1000 or height less than 0.
266
+ # > 4 out of 81 duplicate rows were excluded, leaving 77 rows.
267
+ # > 2 out of 77 rows have IP addresses outside of US.
268
+ # > 2 out of 77 duplicate rows were excluded, leaving 75 rows.
269
+ # > 1 out of 75 rows had no information on location.
270
+ # > 3 out of 75 rows were located outside of the US.
271
+ # > 4 out of 75 duplicate rows were excluded, leaving 71 rows.
258
272
```
259
273
260
274
## Citing this package
@@ -275,9 +289,8 @@ issue](https://github.com/ropensci/excluder/issues/new/choose) or
275
289
[ contact the maintainer] ( mailto:jeffrey.r.stevens@gmail.com ) first.
276
290
277
291
Please note that the excluder project is released with a [ Contributor
278
- Code of
279
- Conduct] ( https://devguide.ropensci.org/collaboration.html#coc-file ) . By
280
- contributing to this project, you agree to abide by its terms.
292
+ Code of Conduct] ( https://ropensci.org/code-of-conduct/ ) . By contributing
293
+ to this project, you agree to abide by its terms.
281
294
282
295
## Acknowledgements
283
296
0 commit comments