Update Survey Question Conversion and Analysis.R · stau.space/underrepresentation-theory@66acd2d

stau.space / underrepresentation-theory

fork atom

this repo has no description

fork atom

Update Survey Question Conversion and Analysis.R

authored by

Aden Mobley and committed by

GitHub 8 months ago 66acd2d6 d178a432

+183 -8

1 changed file

expand all

code

Survey Question Conversion and Analysis.R

+183 -8

code/Survey Question Conversion and Analysis.R

··· 68 68 CBMS_MSp2 <- cbmssurveyquestions |> 69 69 filter(Type == "MS") |> 70 70 pull(Question) 71 - CBMS_all_multipleselect = c(CBMS_MSp1, CBMS_MSp2) 71 + CBMS_all_multipleselect = as.character(c(CBMS_MSp1, CBMS_MSp2)) 72 72 CBMS_multiselect_count = length(CBMS_all_multipleselect) 73 73 74 74 ··· 146 146 if (length(matches) > 0) { 147 147 paste("Tags:", paste(unique(matches), collapse = ", ")) 148 148 } else { 149 - "Tags: None" 149 + NA 150 + } 151 + 152 + }) 153 + tagged_AMS_df <- as.data.frame(tagged_AMS) 154 + 155 + 156 + tagged_AMS_multiplechoice <- sapply(AMS_MC, function(text) { 157 + matches <- str_extract_all(text, paste(keywords, collapse = "|"))[[1]] 158 + if (length(matches) > 0) { 159 + paste("Tags:", paste(unique(matches), collapse = ", ")) 160 + } else { 161 + NA 150 162 } 151 163 }) 152 - tagged_AMS_list <- as.list(tagged_AMS) 164 + ams_mc_tagged <- as.data.frame(tagged_AMS_multiplechoice) 165 + 166 + AMS_writtenresponse_tags <- sapply(AMS_WR, function(text) { 167 + matches <- str_extract_all(text, paste(keywords, collapse = "|"))[[1]] 168 + if (length(matches) > 0) { 169 + paste("Tags:", paste(unique(matches), collapse = "|")) 170 + } else { 171 + NA 172 + } 173 + }) 174 + 175 + ams_wr_tagged <- as.data.frame(AMS_writtenresponse_tags) 176 + 177 + AMS_tableinput_tags <- sapply(AMS_TI, function(text) { 178 + matches <- str_extract_all(text, paste(keywords, collapse = "|"))[[1]] 179 + if (length(matches) > 0) { 180 + paste("Tags:", paste(unique(matches), collapse = "|")) 181 + } else { 182 + NA 183 + } 184 + }) 185 + 186 + ams_ti_tagged <- as.data.frame(AMS_tableinput_tags) 187 + 188 + AMS_multipleselect_tags <- sapply(AMS_MS, function(text) { 189 + matches <- str_extract_all(text, paste(keywords, collapse = "|"))[[1]] 190 + if (length(matches) > 0) { 191 + paste("Tags:", paste(unique(matches), collapse = "|")) 192 + } else { 193 + NA 194 + } 195 + }) 196 + 197 + ams_ms_tagged <- as.data.frame(AMS_multipleselect_tags) 198 + 199 + AMS_Question_Tags <- data.frame( 200 + questions = AMS_questions_list, 201 + tags = tagged_AMS_df 202 + ) 203 + 204 + AMS_MC_Tags <- data.frame( 205 + questions = AMS_MC, 206 + tags = ams_mc_tagged 207 + ) 208 + 209 + AMS_MS_Tags <- data.frame( 210 + questions = AMS_MS, 211 + tags = ams_ms_tagged 212 + ) 213 + 214 + AMS_TI_Tags <- data.frame( 215 + questions = AMS_TI, 216 + tags = ams_ti_tagged 217 + ) 218 + 219 + AMS_WR_Tags <- data.frame ( 220 + questions = AMS_WR, 221 + tags = ams_wr_tagged 222 + ) 223 + 153 224 154 225 CBMS_all_questions_char <- as.character(CBMS_all_questions) 155 226 ··· 158 229 if (length(matches) > 0) { 159 230 paste("Tags:", paste(unique(matches), collapse = ", ")) 160 231 } else { 161 - "Tags: None" 232 + NA 233 + } 234 + }) 235 + 236 + tagged_CBMS_df <- as.data.frame(tagged_CBMS) 237 + 238 + CBMS_mc_tagged <-sapply(CBMSMC_full, function(text) { 239 + matches <- str_extract_all(text, paste(keywords, collapse = "|"))[[1]] 240 + if (length(matches) > 0) { 241 + paste("Tags:", paste(unique(matches), collapse = ", ")) 242 + } else { 243 + NA 244 + } 245 + }) 246 + 247 + tagged_cbms_mc <- as.data.frame(CBMS_mc_tagged) 248 + 249 + CBMS_ms_tagged <- sapply(CBMS_all_multipleselect, function(text) { 250 + matches <- str_extract_all(text, paste(keywords, collapse = "|"))[[1]] 251 + if (length(matches) > 0) { 252 + paste("Tags:", paste(unique(matches), collapse = ", ")) 253 + } else { 254 + NA 162 255 } 163 256 }) 164 257 165 - tagged_CBMS_list <- as.list(tagged_CBMS) 258 + tagged_cbms_ms <- as.data.frame(CBMS_ms_tagged) 259 + 260 + CBMS_ti_tagged <-sapply(CBMS_all_tableinput, function(text) { 261 + matches <- str_extract_all(text, paste(keywords, collapse = "|"))[[1]] 262 + if (length(matches) > 0) { 263 + paste("Tags:", paste(unique(matches), collapse = ", ")) 264 + } else { 265 + NA 266 + } 267 + }) 268 + 269 + tagged_cbms_ti <- as.data.frame(CBMS_ti_tagged) 270 + 271 + 272 + CBMS_wr_tagged <- sapply(CBMS_all_writtenresponse, function(text) { 273 + matches <- str_extract_all(text, paste(keywords, collapse = "|"))[[1]] 274 + if (length(matches) > 0) { 275 + paste("Tags:", paste(unique(matches), collapse = ", ")) 276 + } else { 277 + NA 278 + } 279 + }) 280 + 281 + tagged_cbms_wr <- as.data.frame(CBMS_wr_tagged) 282 + 283 + CBMS_all_questions_df <- as.data.frame(CBMS_all_questions_char) 284 + 285 + CBMS_Question_Tags <- data.frame( 286 + questions = CBMS_all_questions_df, 287 + tags = tagged_CBMS_df 288 + ) 289 + 290 + CBMS_MC_Tags <- data.frame( 291 + questions = CBMSMC_full, 292 + tags = CBMS_mc_tagged 293 + ) 294 + all_multiselect_CBMS <- as.data.frame(CBMS_all_multipleselect) 295 + 296 + class(CBMS_all_multipleselect) 297 + 298 + CBMS_MS_Tags <- data.frame( 299 + questions = CBMS_all_multipleselect, 300 + tags = tagged_cbms_ms$CBMS_ms_tagged 301 + ) 166 302 167 - number_tagged_CBMS <- tagged_CBMS_list |> 303 + rownames(CBMS_MS_Tags) <- NULL 168 304 305 + CBMS_TI_Tags <- data.frame( 306 + questions = CBMS_all_tableinput, 307 + tags = tagged_cbms_ti$CBMS_ti_tagged 308 + ) 169 309 310 + rownames(CBMS_TI_Tags) <- NULL 170 311 312 + CBMS_WR_Tags <- data.frame( 313 + questions = CBMS_all_writtenresponse, 314 + tags = tagged_cbms_wr$CBMS_wr_tagged 315 + ) 316 + 317 + rownames(CBMS_TI_Tags) <- NULL 171 318 172 319 tagged_IPEDS <- sapply(IPEDS_all_questions, function(text) { 173 320 matches <- str_extract_all(text, paste(keywords, collapse = "|"))[[1]] 174 321 if (length(matches) > 0) { 175 322 paste("Tags:", paste(unique(matches), collapse = ", ")) 176 323 } else { 177 - "Tags: None" 324 + NA 178 325 } 179 326 }) 180 327 181 - tagged_IPEDS_list <- as.list(tagged_IPEDS) 328 + tagged_IPEDS_df <- as.data.frame(tagged_IPEDS) 329 + 330 + 331 + #there were no tagged questions in the IPEDS survey, so I assigned each question type a value of zero. 332 + IPEDS_multiplechoice_tagged = 0 333 + IPEDS_multipleslect_tagged = 0 334 + IPEDS_tableinput_tagged = 0 335 + IPEDS_writtenresponse_tagged = 0 336 + 337 + tagged_AMS_count <- tagged_AMS_df[complete.cases(tagged_AMS_df),] |> length() 338 + tagged_ams_mc_count <- ams_mc_tagged[complete.cases(ams_mc_tagged),] |> length() 339 + tagged_ams_ms_count <- ams_ms_tagged[complete.cases(ams_ms_tagged),] |> length() 340 + tagged_ams_ti_count <- ams_ti_tagged[complete.cases(ams_ti_tagged),] |> length() 341 + tagged_ams_wr_count <- ams_wr_tagged[complete.cases(ams_wr_tagged),] |> length() 342 + 343 + tagged_CBMS_count <- tagged_CBMS_df[complete.cases(tagged_CBMS_df),] |> length() 344 + tagged_cbms_mc_count <- tagged_cbms_mc[complete.cases(tagged_cbms_mc),] |> length() 345 + tagged_cbms_ms_count <- tagged_cbms_ms[complete.cases(tagged_cbms_ms),] |> length() 346 + tagged_cbms_wr_count <-tagged_cbms_wr[complete.cases(tagged_cbms_wr),] |> length() 347 + tagged_cbms_ti <- tagged_cbms_ti[complete.cases(tagged_cbms_ti),] |> length() 348 + 349 + 350 + tagged_IPEDS_count <- tagged_IPEDS_df[complete.cases(tagged_IPEDS_df),] |> length() 351 + 352 + ggplot 182 353 183 354 184 355 185 356 357 + 358 + 359 + 186 360 361 + 187 362 188 363 189 364