this repo has no description

Merge branch 'main' of github.com:sona-tau/underrepresentation-theory

+86
+86
code/interactive_wordcloud.R
··· 1 + load("code/big_question_tag_df.rda") 2 + 3 + head(big_question_tag_df) 4 + summary(big_question_tag_df) 5 + view(big_question_tag_df) 6 + 7 + #Remove rows where Tags = NA 8 + only_tag <- big_question_tag_df |> 9 + filter(Tags != "NA") 10 + view(only_tag) 11 + 12 + tagged_ams <- only_tag |> 13 + filter(Source == "AMS") 14 + 15 + tagged_cbms <- only_tag |> 16 + filter(Source == "CBMS") 17 + 18 + view(tagged_ams) 19 + view(tagged_cbms) 20 + 21 + #All questions by source 22 + ams_ques <- big_question_tag_df |> 23 + filter(Source == "AMS") 24 + 25 + cbms_ques <- big_question_tag_df |> 26 + filter(Source == "CBMS") 27 + 28 + ipeds_ques <- big_question_tag_df |> 29 + filter(Source == "IPEDS") 30 + 31 + #Tokenization and wordcloud per source 32 + #AMS 33 + tidy_ams_ques <- ams_ques |> 34 + unnest_tokens(word, Questions) |> 35 + anti_join(stop_words) |> 36 + filter(!str_detect(word, "^[0-9]+$")) 37 + 38 + word_counts_ams <- tidy_ams_ques |> 39 + count(word, sort = TRUE) |> 40 + filter(n > 0) 41 + 42 + view(word_counts_ams) 43 + 44 + #CBMS 45 + tidy_cbms_ques <- cbms_ques |> 46 + unnest_tokens(word, Questions) |> 47 + anti_join(stop_words) |> 48 + filter(!str_detect(word, "^[0-9]+$")) 49 + 50 + word_counts_cbms <- tidy_cbms_ques |> 51 + count(word, sort = TRUE) |> 52 + filter(n > 5) |> 53 + filter(!str_detect(word, "_|b2|b1|e1|f1|e2|e.g|ii")) 54 + 55 + view(word_counts_cbms) 56 + 57 + #IPEDS 58 + tidy_ipeds_ques <- ipeds_ques |> 59 + unnest_tokens(word, Questions) |> 60 + anti_join(stop_words) |> 61 + filter(!str_detect(word, "^[0-9]+$")) 62 + 63 + word_counts_ipeds <- tidy_ipeds_ques |> 64 + count(word, sort = TRUE) |> 65 + filter(n > 5) |> 66 + filter(!str_detect(word, "e.g")) 67 + 68 + view(word_counts_ipeds) 69 + 70 + #install.packages("wordcloud2") 71 + library("wordcloud2") 72 + #install.packages("httpgd") 73 + 74 + #AMS 75 + my_palette = c("#355070", 76 + "#6d597a", 77 + "#b56576", 78 + "#e56b6f", 79 + "#eaac8b") 80 + 81 + ams_wc = wordcloud2( 82 + word_counts_ams, 83 + color = rep_len(my_palette, 84 + nrow(word_counts_ams))) 85 + 86 +