this repo has no description
1library(shiny)
2library(shinydashboard)
3library(ggplot2)
4library(viridis)
5library(SnowballC)
6library(bslib)
7
8load("data/glove_100d_0.219_AMS.Rda")
9sema_count_glove_100d_ams <- sema_count
10load("data/glove_100d_0.219_CBMS.Rda")
11sema_count_glove_100d_cbms <- sema_count
12load("data/glove_100d_0.219_IPEDS.Rda")
13sema_count_glove_100d_ipeds <- sema_count
14load("data/glove_200d_0.164_AMS.Rda")
15sema_count_glove_200d_ams <- sema_count
16load("data/glove_200d_0.164_CBMS.Rda")
17sema_count_glove_200d_cbms <- sema_count
18load("data/glove_200d_0.164_IPEDS.Rda")
19sema_count_glove_200d_ipeds <- sema_count
20load("data/glove_300d_0.134_AMS.Rda")
21sema_count_glove_300d_ams <- sema_count
22load("data/glove_300d_0.134_CBMS.Rda")
23sema_count_glove_300d_cbms <- sema_count
24load("data/glove_300d_0.134_IPEDS.Rda")
25sema_count_glove_300d_ipeds <- sema_count
26load("data/glove_50d_0.273_AMS.Rda")
27sema_count_glove_50d_ams <- sema_count
28load("data/glove_50d_0.273_CBMS.Rda")
29sema_count_glove_50d_cbms <- sema_count
30load("data/glove_50d_0.273_IPEDS.Rda")
31sema_count_glove_50d_ipeds <- sema_count
32load("data/google_news_0.185_AMS.Rda")
33sema_count_google_ams <- sema_count
34load("data/google_news_0.185_CBMS.Rda")
35sema_count_google_cbms <- sema_count
36load("data/google_news_0.185_IPEDS.Rda")
37sema_count_google_ipeds <- sema_count
38
39get_df <- function(str) {
40 data.frame(
41 model = list("GloVe 50D", "GloVe 100D", "GloVe 200D", "GloVe 300D", "Google News", "GloVe 50D", "GloVe 100D", "GloVe 200D", "GloVe 300D", "Google News", "GloVe 50D", "GloVe 100D", "GloVe 200D", "GloVe 300D", "Google News") |> as.character(),
42 title = list("IPEDS", "IPEDS", "IPEDS", "IPEDS", "IPEDS", "CBMS", "CBMS", "CBMS", "CBMS", "CBMS", "AMS", "AMS", "AMS", "AMS", "AMS") |> as.character(),
43 n = list(
44 sema_count_glove_50d_ipeds[sema_count_glove_50d_ipeds $ word_category == str,] $ n |> sum(na.rm = TRUE),
45 sema_count_glove_300d_ipeds[sema_count_glove_300d_ipeds $ word_category == str,] $ n |> sum(na.rm = TRUE),
46 sema_count_glove_200d_ipeds[sema_count_glove_200d_ipeds $ word_category == str,] $ n |> sum(na.rm = TRUE),
47 sema_count_glove_100d_ipeds[sema_count_glove_100d_ipeds $ word_category == str,] $ n |> sum(na.rm = TRUE),
48 sema_count_google_ipeds[sema_count_google_ipeds $ word_category == str,] $ n |> sum(na.rm = TRUE),
49 sema_count_glove_50d_cbms[sema_count_glove_50d_cbms $ word_category == str,] $ n |> sum(na.rm = TRUE),
50 sema_count_glove_300d_cbms[sema_count_glove_300d_cbms $ word_category == str,] $ n |> sum(na.rm = TRUE),
51 sema_count_glove_200d_cbms[sema_count_glove_200d_cbms $ word_category == str,] $ n |> sum(na.rm = TRUE),
52 sema_count_glove_100d_cbms[sema_count_glove_100d_cbms $ word_category == str,] $ n |> sum(na.rm = TRUE),
53 sema_count_google_cbms[sema_count_google_cbms $ word_category == str,] $ n |> sum(na.rm = TRUE),
54 sema_count_glove_50d_ams[sema_count_glove_50d_ams $ word_category == str,] $ n |> sum(na.rm = TRUE),
55 sema_count_glove_300d_ams[sema_count_glove_300d_ams $ word_category == str,] $ n |> sum(na.rm = TRUE),
56 sema_count_glove_200d_ams[sema_count_glove_200d_ams $ word_category == str,] $ n |> sum(na.rm = TRUE),
57 sema_count_glove_100d_ams[sema_count_glove_100d_ams $ word_category == str,] $ n |> sum(na.rm = TRUE),
58 sema_count_google_ams[sema_count_google_ams $ word_category == str,] $ n |> sum(na.rm = TRUE)
59 ) |> as.numeric()
60 )
61}
62
63get_df_tag <- function(str) {
64 data.frame(
65 model = list("GloVe 50D", "GloVe 100D", "GloVe 200D", "GloVe 300D", "Google News", "GloVe 50D", "GloVe 100D", "GloVe 200D", "GloVe 300D", "Google News", "GloVe 50D", "GloVe 100D", "GloVe 200D", "GloVe 300D", "Google News") |> as.character(),
66 title = list("IPEDS", "IPEDS", "IPEDS", "IPEDS", "IPEDS", "CBMS", "CBMS", "CBMS", "CBMS", "CBMS", "AMS", "AMS", "AMS", "AMS", "AMS") |> as.character(),
67 n = list(
68 sema_count_glove_50d_ipeds[sema_count_glove_50d_ipeds $ tag == str,] $ n |> sum(na.rm = TRUE),
69 sema_count_glove_300d_ipeds[sema_count_glove_300d_ipeds $ tag == str,] $ n |> sum(na.rm = TRUE),
70 sema_count_glove_200d_ipeds[sema_count_glove_200d_ipeds $ tag == str,] $ n |> sum(na.rm = TRUE),
71 sema_count_glove_100d_ipeds[sema_count_glove_100d_ipeds $ tag == str,] $ n |> sum(na.rm = TRUE),
72 sema_count_google_ipeds[sema_count_google_ipeds $ tag == str,] $ n |> sum(na.rm = TRUE),
73 sema_count_glove_50d_cbms[sema_count_glove_50d_cbms $ tag == str,] $ n |> sum(na.rm = TRUE),
74 sema_count_glove_300d_cbms[sema_count_glove_300d_cbms $ tag == str,] $ n |> sum(na.rm = TRUE),
75 sema_count_glove_200d_cbms[sema_count_glove_200d_cbms $ tag == str,] $ n |> sum(na.rm = TRUE),
76 sema_count_glove_100d_cbms[sema_count_glove_100d_cbms $ tag == str,] $ n |> sum(na.rm = TRUE),
77 sema_count_google_cbms[sema_count_google_cbms $ tag == str,] $ n |> sum(na.rm = TRUE),
78 sema_count_glove_50d_ams[sema_count_glove_50d_ams $ tag == str,] $ n |> sum(na.rm = TRUE),
79 sema_count_glove_300d_ams[sema_count_glove_300d_ams $ tag == str,] $ n |> sum(na.rm = TRUE),
80 sema_count_glove_200d_ams[sema_count_glove_200d_ams $ tag == str,] $ n |> sum(na.rm = TRUE),
81 sema_count_glove_100d_ams[sema_count_glove_100d_ams $ tag == str,] $ n |> sum(na.rm = TRUE),
82 sema_count_google_ams[sema_count_google_ams $ tag == str,] $ n |> sum(na.rm = TRUE)
83 ) |> as.numeric()
84 )
85}
86
87# ------------------------------ lgbt ------------------------------
88
89
90
91# These are the tags that are used to categorize the data.
92lgbt_tags <- c("lgbt","lgbtq","sex","identity","gender","orientation","nonbinary") |> as.character()
93race_ethnicity_tags <- c("race","ethnicity","african","american","black","hispanic","asian","indigenous","native","latino","latina","latine") |> as.character()
94women_tags <- c("woman","women","girl","feminine","femeninity","ms","mrs") |> as.character()
95men_tags <- c("man", "men", "boy", "male", "masculine", "masculinity", "mr") |> as.character()
96disabilities_tags <- c("disabilities","disabled","disability","handicap","handicapped","neurodivergent") |> as.character()
97
98# This variable holds all of the tags. Additionally, tag_categories holds all
99# the tags together with their categories.
100tags <- c(lgbt_tags, race_ethnicity_tags, women_tags, men_tags, disabilities_tags)
101tag_categories <- list(
102 "lgbt",
103 "race/ethnicity",
104 "women",
105 "disabilities"
106) |> as.character()
107
108ui <- dashboardPage(
109 dashboardHeader(),
110 sidebarMenu(
111 menuItem(
112 "Home",
113 tabName = "home",
114 icon = icon("house")
115 ),
116 menuItem(
117 "Dashboard",
118 tabName = "dashboard",
119 icon = icon("dashboard")
120 ),
121 menuItem(
122 "About",
123 tabName = "about",
124 icon = icon("info")
125 )
126 ) |> dashboardSidebar(),
127 tabItems(
128 tabItem(
129 tabName = "home",
130 h2("Home information.")
131 ),
132 tabItem(
133 tabName = "dashboard",
134 fluidRow(
135 box(
136 h3("Options:"),
137 radioButtons(
138 inputId = "selected_tag",
139 label = "Select a tag category below:",
140 choices = tag_categories |> as.list()
141 ),
142 selectizeInput(
143 inputId = "selected_sources",
144 label = "Select a data source below:",
145 choices = list("AMS", "CBMS", "IPEDS") |> as.character(),
146 multiple = TRUE
147 ),
148 selectizeInput(
149 "selected_models",
150 label = "Select models to show:",
151 choices = list(
152 "Google News" = "Google News",
153 "GloVe 300D" = "GloVe 300D",
154 "GloVe 200D" = "GloVe 200D",
155 "GloVe 100D" = "GloVe 100D",
156 "GloVe 50D" = "GloVe 50D"
157 ),
158 multiple = TRUE
159 )
160 ),
161 plotOutput("occurrences_v_tags")
162 )
163 ),
164 tabItem(
165 tabName = "about",
166 h2("About information.")
167 )
168 ) |> dashboardBody()
169)
170
171get_rows <- function(tag_category, data_sources, models) {
172 df <- get_df(tag_category)
173
174 mask1 <- logical(length = nrow(df))
175 mask2 <- logical(length = nrow(df))
176 for (data_source in data_sources) {
177 mask1 <- mask1 | (df $ title == data_source)
178 }
179 for (model in models) {
180 mask2 <- mask2 | (df $ model == model)
181 }
182 df <- df[mask1 & mask2,]
183 df
184}
185
186server <- function(input, output) {
187 set.seed(122)
188
189 histdata <- rnorm(500)
190
191 output $ occurrences_v_tags <- renderPlot({
192 df <- get_rows(
193 tag_category = input $ selected_tag,
194 data_source = input $ selected_sources,
195 models = input $ selected_models
196 )
197
198 ggplot(df, aes(fill = model, x = title, y = n)) +
199 geom_bar(position = "dodge", stat = "identity") +
200 scale_fill_viridis_d() +
201 labs(
202 title = paste("Occurrences of", input $ selected_tag, "per data source"),
203 x = "Data Source",
204 y = "Occurrences"
205 ) +
206 theme_light() +
207 theme(aspect.ratio = 1)
208 })
209}
210
211shinyApp(ui, server, options = list(port = 6969))