%%{init: {
"flowchart": {"rankSpacing": 24},
"themeVariables": {"fontSize": "17px", "lineHeight": 1.1, "nodePadding": 6}
}}%%
flowchart TD
n1["Policy Documents Analysis<br/>N = 12 Documents"] --> D["Document Categorization"]
D --> E["Text Preprocessing"]
E --> F["Text Embeddings<br/>Sentence-BERT"] & G["Topic Modeling<br/>Structural Topic Model"]
F --> H["Similarity Analysis<br/>Cosine Similarity"]
G --> I["Topic Label Generation<br/>OpenAI's API"]
I --> V1["Research Team Validation<br/>Topic Labels"]
F --> V1
H --> P["Theme Comparison<br/>• SLD vs Other Disability<br/>• SLD vs General Policies"]
P --> Q["Gap Identification"]
n1 --> TP2["Text Preprocessing"]
TP2 --> TM2["Topic Modeling<br/>Structural Topic Model"]
TM2 --> J["Survey Item Generation<br/>OpenAI's API"]
J --> V3["Research Team Validation<br/>Survey Items"]
V3 --> B["Delphi Round 1<br/>N=17 Experts"]
B --> C["Delphi Round 2<br/>N=14 Experts"] & K1["Expert Validation<br/>After Delphi Round 1"]
C --> K2["Expert Validation<br/>After Delphi Round 2"]
K1 --> L["Content Validity<br/>Lawshe's CVR Method"]
K2 --> L
L --> STM3["Topic Modeling<br/>Structural Topic Model"]
STM3 --> M["Final Policy Items<br/>N=36 Items"]
M --> T["Theme Generation<br/>OpenAI's ChatGPT"]
T --> V2["Research Team Validation<br/>Theme Labels"]
classDef dataPhase fill:#f8f9ff,stroke:#4a5568,stroke-width:2px,color:#2d3748
classDef processPhase fill:#f7fdf7,stroke:#38a169,stroke-width:2px,color:#2d3748
classDef analysisPhase fill:#fffaf0,stroke:#ed8936,stroke-width:2px,color:#2d3748
classDef validationPhase fill:#fef5f7,stroke:#d53f8c,stroke-width:2px,color:#2d3748
classDef researchValidation fill:#f0f4ff,stroke:#6366f1,stroke-width:2px,color:#2d3748
classDef finalPhase fill:#e6fffa,stroke:#319795,stroke-width:2px,color:#2d3748
n1:::processPhase
D:::processPhase
B:::dataPhase
C:::dataPhase
K1:::validationPhase
E:::processPhase
F:::analysisPhase
G:::analysisPhase
H:::analysisPhase
I:::analysisPhase
T:::analysisPhase
TP2:::processPhase
TM2:::analysisPhase
V1:::researchValidation
V2:::researchValidation
V3:::researchValidation
P:::analysisPhase
Q:::analysisPhase
J:::analysisPhase
K2:::validationPhase
L:::validationPhase
STM3:::analysisPhase
M:::finalPhase
Addressing the void of AI policies in education for students with specific learning disabilities
The website contains supplemental materials and code used to analyze text data in Shin et al. (2025).
Shin, M., Deniz, F., Watson, L., Dieterich, C., Ewoldt, K., Johnson, F., Kong, J., Lee, S. H., & Whitehurst, A. (in press). Addressing the void of AI policies in education for students with specific learning disabilities. Learning Disability Quarterly.
Flowchart of Data Collection and Analysis
Load R Packages
# Reset any time limits from previous session state
setTimeLimit(cpu = Inf, elapsed = Inf, transient = FALSE)
suppressPackageStartupMessages({
library(tidyr)
library(dplyr)
library(readxl)
library(gtsummary)
library(officer)
library(flextable)
library(ggplot2)
library(quanteda)
library(reticulate)
library(TextAnalysisR)
library(htmlwidgets)
library(stm)
library(dotenv)
library(ggdendro)
library(plotly)
library(httr)
library(jsonlite)
library(progress)
library(openxlsx)
library(stringr)
library(purrr)
library(text2vec)
library(stringdist)
library(DT)
})
Sys.setenv(HF_HUB_DISABLE_SYMLINKS_WARNING = "1")
Sys.setenv(TOKENIZERS_PARALLELISM = "false")
options(timeout = 600)
setTimeLimit(cpu = Inf, elapsed = Inf, transient = FALSE)Load Dataset
load("data/ai_ld_data.rda")Document-Level Analysis
Code
data <- read_excel("data/policyData.xlsx")
data_long <- data |>
pivot_longer(
cols = matches("^coding_\\d+$"),
names_to = "coding_id",
names_prefix = "coding_",
values_to = "coding_text",
values_drop_na = TRUE
) |>
mutate(
coding_id = as.integer(coding_id),
document_id = paste(ID, coding_id, sep = "_")
)
united_tbl<- TextAnalysisR::unite_cols(data_long, listed_vars = "coding_text")
documents <- data_long %>%
group_by(ID, doc_name, type, disability, SLD) %>%
summarise(
document_text = paste(coding_text, collapse = " "),
n_chunks = n(),
.groups = 'drop'
) %>%
mutate(
category = case_when(
disability == "Yes" & SLD == "Yes" ~ "SLD",
disability == "Yes" & SLD == "No" ~ "Other Disability",
disability == "No" & SLD == "No" ~ "General",
TRUE ~ "Uncategorized"
),
display_name = paste("Doc", ID),
doc_numeric_id = as.numeric(str_extract(display_name, "\\d+"))
) %>%
arrange(doc_numeric_id)
documents_filtered <- documents %>%
mutate(
doc_numeric_id = as.numeric(str_extract(display_name, "\\d+"))
) %>%
arrange(doc_numeric_id)
if(nrow(documents_filtered) == 0) {
stop("No documents remain after filtering! Check document lengths and categories.")
}
document_summary <- documents_filtered %>%
select(
Document = display_name,
`Document Name` = doc_name,
Type = type,
Disability = disability,
SLD = SLD,
Category = category,
`Coding Units` = n_chunks
)Code
# setTimeLimit(cpu = Inf, elapsed = Inf, transient = FALSE)
documents <- data_long %>%
group_by(ID, doc_name, type, disability, SLD) %>%
summarise(
document_text = paste(coding_text, collapse = " "),
n_chunks = n(),
.groups = 'drop'
) %>%
mutate(
category = case_when(
disability == "Yes" & SLD == "Yes" ~ "SLD",
disability == "Yes" & SLD == "No" ~ "Other Disability",
disability == "No" & SLD == "No" ~ "General",
TRUE ~ "Uncategorized"
),
display_name = paste("Doc", ID),
doc_numeric_id = as.numeric(str_extract(display_name, "\\d+"))
) %>%
arrange(doc_numeric_id)
if(nrow(documents) == 0) {
stop("No documents remain after filtering! Check document lengths and categories.")
}
# Generate embeddings for all documents using TextAnalysisR
all_embeddings <- TextAnalysisR::generate_embeddings(
texts = documents$document_text,
model = "all-MiniLM-L6-v2",
verbose = TRUE
)
# Calculate full similarity matrix using TextAnalysisR
similarity_result <- TextAnalysisR::calculate_document_similarity(
texts = documents$document_text,
document_feature_type = "embeddings",
similarity_method = "cosine",
embedding_model = "all-MiniLM-L6-v2",
verbose = TRUE
)
full_sim_matrix <- similarity_result$similarity_matrix
# Create document metadata for filtering
doc_metadata <- documents %>%
mutate(row_idx = row_number()) %>%
select(row_idx, display_name, ID, doc_name, category)
# Extract cross-category similarities (SLD vs Other categories)
similarity_threshold <- 0.6
sld_indices <- doc_metadata %>% filter(category == "SLD") %>% pull(row_idx)
other_disability_indices <- doc_metadata %>% filter(category == "Other Disability") %>% pull(row_idx)
general_indices <- doc_metadata %>% filter(category == "General") %>% pull(row_idx)
# Build ld_similarities by extracting from full matrix
ld_similarities <- bind_rows(
# SLD vs Other Disability
expand_grid(
ld_idx = sld_indices,
other_idx = other_disability_indices
) %>%
mutate(
cosine_similarity = map2_dbl(ld_idx, other_idx, ~full_sim_matrix[.x, .y]),
other_category = "Other Disability"
),
# SLD vs General
expand_grid(
ld_idx = sld_indices,
other_idx = general_indices
) %>%
mutate(
cosine_similarity = map2_dbl(ld_idx, other_idx, ~full_sim_matrix[.x, .y]),
other_category = "General"
)
) %>%
left_join(doc_metadata %>% select(row_idx, display_name, ID, doc_name) %>%
rename(ld_doc_name = display_name, ld_doc_id = ID, ld_actual_name = doc_name),
by = c("ld_idx" = "row_idx")) %>%
left_join(doc_metadata %>% select(row_idx, display_name, ID, doc_name) %>%
rename(other_doc_name = display_name, other_doc_id = ID, other_actual_name = doc_name),
by = c("other_idx" = "row_idx")) %>%
select(-ld_idx, -other_idx) %>%
mutate(
other_category = factor(other_category, levels = c("Other Disability", "General"))
)
similarity_heatmap <- TextAnalysisR::plot_cross_category_heatmap(
similarity_data = ld_similarities,
row_var = "ld_doc_name",
col_var = "other_doc_name",
value_var = "cosine_similarity",
category_var = "other_category",
row_display_var = "ld_actual_name",
col_display_var = "other_actual_name",
row_label = "SLD Documents"
)
similarity_heatmap_plotly <- similarity_heatmap %>%
ggplotly(tooltip = "text", width = 800, height = 500) %>%
layout(hovermode = "closest") %>%
style(hoverinfo = "text")Document-Level Similarity Heatmap
Document Embedding Similarity Statistics Table
Code
similarity_stats <- ld_similarities %>%
group_by(other_category) %>%
summarise(
mean_similarity = round(mean(cosine_similarity), 3),
median_similarity = round(median(cosine_similarity), 3),
sd_similarity = round(sd(cosine_similarity), 3),
min_similarity = round(min(cosine_similarity), 3),
max_similarity = round(max(cosine_similarity), 3),
.groups = 'drop'
)
doc_gap_analysis <- TextAnalysisR::analyze_similarity_gaps(
similarity_data = ld_similarities,
ref_var = "ld_doc_name",
other_var = "other_doc_name",
similarity_var = "cosine_similarity",
category_var = "other_category",
unique_threshold = similarity_threshold,
cross_policy_min = similarity_threshold,
cross_policy_max = 0.8
)
LD_Unique <- doc_gap_analysis$unique_items %>%
rename(match_doc = best_match, match_cat = best_match_category) %>%
mutate(
doc_numeric_id = as.numeric(str_extract(ld_doc_name, "\\d+")),
gap_type = "LD_Unique"
) %>%
arrange(doc_numeric_id)
missing_from_ld <- doc_gap_analysis$missing_items %>%
rename(match_sld = best_match) %>%
mutate(
doc_numeric_id = as.numeric(str_extract(other_doc_name, "\\d+")),
gap_type = "Missing_from_SLD"
) %>%
arrange(doc_numeric_id)
cross_policy <- doc_gap_analysis$cross_policy %>%
rename(match_doc = best_match, match_cat = best_match_category) %>%
mutate(
doc_numeric_id = as.numeric(str_extract(ld_doc_name, "\\d+")),
gap_type = "Cross-Policy_Document"
) %>%
arrange(doc_numeric_id)Unique SLD Policy Documents
Lower similarity scores indicate greater uniqueness
Missing from SLD Policy Documents
Lower similarity scores indicate content missing from SLD policies
Cross Policy
Similarity scores between 0.6-0.8 indicate cross-policy learning potential
Code
cross_policy %>%
select(ld_doc_name, min_similarity, max_similarity, match_doc, match_cat, gap_type) %>%
DT::datatable(
options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'pdf'),
pageLength = 10,
scrollX = TRUE
),
extensions = 'Buttons',
class = "cell-border stripe",
rownames = FALSE
) %>%
DT::formatRound(columns = c("min_similarity", "max_similarity"), digits = 3)Topic-Level Analysis
Code
# setTimeLimit(cpu = Inf, elapsed = Inf, transient = FALSE)
data <- data %>%
mutate(
category = case_when(
disability == "Yes" & SLD == "Yes" ~ "SLD",
disability == "Yes" & SLD == "No" ~ "Other Disability",
disability == "No" & SLD == "No" ~ "General",
TRUE ~ "Uncategorized"
)
)
ld_docs <- data %>%
filter(category == "SLD") %>%
pivot_longer(
cols = matches("^coding_\\d+$"),
names_to = "coding_id",
names_prefix = "coding_",
values_to = "coding_text",
values_drop_na = TRUE
) %>%
mutate(
coding_id = as.integer(coding_id),
document_id = paste(ID, coding_id, sep = "_")
)
disability_docs <- data %>%
filter(category == "Other Disability") %>%
pivot_longer(
cols = matches("^coding_\\d+$"),
names_to = "coding_id",
names_prefix = "coding_",
values_to = "coding_text",
values_drop_na = TRUE
) %>%
mutate(
coding_id = as.integer(coding_id),
document_id = paste(ID, coding_id, sep = "_")
)
general_docs <- data %>%
filter(category == "General") %>%
pivot_longer(
cols = matches("^coding_\\d+$"),
names_to = "coding_id",
names_prefix = "coding_",
values_to = "coding_text",
values_drop_na = TRUE
) %>%
mutate(
coding_id = as.integer(coding_id),
document_id = paste(ID, coding_id, sep = "_")
)
common_words <- c("na", "may", "can", "[''']s")
united_tbl_ld <- TextAnalysisR::unite_cols(ld_docs, listed_vars = "coding_text")
tokens_ld <- TextAnalysisR::prep_texts(
united_tbl_ld,
text_field = "united_texts",
remove_stopwords = TRUE,
custom_stopwords = common_words,
custom_valuetype = "regex",
verbose = FALSE
)
toks_lemmatized_ld <- quanteda::tokens_wordstem(tokens_ld)
dfm_ld <- dfm(toks_lemmatized_ld)
united_tbl_disability <- TextAnalysisR::unite_cols(disability_docs, listed_vars = "coding_text")
tokens_disability <- TextAnalysisR::prep_texts(
united_tbl_disability,
text_field = "united_texts",
remove_stopwords = TRUE,
custom_stopwords = common_words,
custom_valuetype = "regex",
verbose = FALSE
)
toks_lemmatized_disability <- quanteda::tokens_wordstem(tokens_disability)
dfm_disability <- dfm(toks_lemmatized_disability)
united_tbl_general <- TextAnalysisR::unite_cols(general_docs, listed_vars = "coding_text")
tokens_general <- TextAnalysisR::prep_texts(
united_tbl_general,
text_field = "united_texts",
remove_stopwords = TRUE,
custom_stopwords = common_words,
custom_valuetype = "regex",
verbose = FALSE
)
toks_lemmatized_general <- quanteda::tokens_wordstem(tokens_general)
dfm_general <- dfm(toks_lemmatized_general)
out_ld <- quanteda::convert(dfm_ld, to = "stm")
topic_model_ld <- stm(
data = out_ld$meta,
documents = out_ld$documents,
vocab = out_ld$vocab,
max.em.its = 75,
init.type = "Spectral",
K = 10,
prevalence = ~ 1,
verbose = FALSE,
seed = 2025
)
out_disability <- quanteda::convert(dfm_disability, to = "stm")
topic_model_disability <- stm(
data = out_disability$meta,
documents = out_disability$documents,
vocab = out_disability$vocab,
max.em.its = 75,
init.type = "Spectral",
K = 10,
prevalence = ~ 1,
verbose = FALSE,
seed = 2025
)
out_general <- quanteda::convert(dfm_general, to = "stm")
topic_model_general <- stm(
data = out_general$meta,
documents = out_general$documents,
vocab = out_general$vocab,
max.em.its = 75,
init.type = "Spectral",
K = 10,
prevalence = ~ 1,
verbose = FALSE,
seed = 2025
)
top_terms_ld <- TextAnalysisR::get_topic_terms(topic_model_ld, top_term_n = 10, verbose = FALSE)
top_terms_disability <- TextAnalysisR::get_topic_terms(topic_model_disability, top_term_n = 10, verbose = FALSE)
top_terms_general <- TextAnalysisR::get_topic_terms(topic_model_general, top_term_n = 10, verbose = FALSE)
top_labeled_ld <- TextAnalysisR::generate_topic_labels(top_terms_ld, model = "gpt-4o", temperature = 0.5, verbose = FALSE)
top_labeled_disability <- TextAnalysisR::generate_topic_labels(top_terms_disability, model = "gpt-4o", temperature = 0.5, verbose = FALSE)
top_labeled_general <- TextAnalysisR::generate_topic_labels(top_terms_general, model = "gpt-4o", temperature = 0.5, verbose = FALSE)
top_labeled_ld$category <- "SLD"
top_labeled_disability$category <- "Other Disability"
top_labeled_general$category <- "General"
top_labeled_ld <- top_labeled_ld %>% mutate(topic = as.numeric(topic)) %>% arrange(topic)
top_labeled_disability <- top_labeled_disability %>% mutate(topic = as.numeric(topic)) %>% arrange(topic)
top_labeled_general <- top_labeled_general %>% mutate(topic = as.numeric(topic)) %>% arrange(topic)
gamma_ld <- TextAnalysisR::get_topic_prevalence(topic_model_ld, category = "SLD")
gamma_disability <- TextAnalysisR::get_topic_prevalence(topic_model_disability, category = "Other Disability")
gamma_general <- TextAnalysisR::get_topic_prevalence(topic_model_general, category = "General")
all_gamma <- bind_rows(gamma_ld, gamma_disability, gamma_general)
all_labeled_topics <- bind_rows(top_labeled_ld, top_labeled_disability, top_labeled_general) %>%
left_join(all_gamma, by = c("topic", "category")) %>%
mutate(
topic_gamma = paste0("Topic ", topic, " (", round(gamma, 3), ")")
) %>%
arrange(category, topic)
all_labeled_topics <- all_labeled_topics %>%
mutate(
topic = as.numeric(topic),
topic_label_with_gamma = paste0(
topic, ". ", topic_label, " (", round(gamma, 3), ")"
)
)
ld_topic_texts <- TextAnalysisR::get_topic_texts(top_terms_ld)
disability_topic_texts <- TextAnalysisR::get_topic_texts(top_terms_disability)
general_topic_texts <- TextAnalysisR::get_topic_texts(top_terms_general)
# Generate topic embeddings using TextAnalysisR
ld_topic_emb <- TextAnalysisR::generate_embeddings(ld_topic_texts, model = "all-MiniLM-L6-v2", verbose = FALSE)
disability_topic_emb <- TextAnalysisR::generate_embeddings(disability_topic_texts, model = "all-MiniLM-L6-v2", verbose = FALSE)
general_topic_emb <- TextAnalysisR::generate_embeddings(general_topic_texts, model = "all-MiniLM-L6-v2", verbose = FALSE)
# Calculate cross-category topic similarity using TextAnalysisR
ld_labels <- top_labeled_ld %>% distinct(topic, .keep_all = TRUE) %>% arrange(topic) %>% pull(topic_label)
disability_labels <- top_labeled_disability %>% distinct(topic, .keep_all = TRUE) %>% arrange(topic) %>% pull(topic_label)
general_labels <- top_labeled_general %>% distinct(topic, .keep_all = TRUE) %>% arrange(topic) %>% pull(topic_label)
ld_vs_disability_result <- TextAnalysisR::calculate_cross_similarity(
ld_topic_emb, disability_topic_emb,
labels1 = ld_labels, labels2 = disability_labels
)
ld_vs_disability_cosine <- ld_vs_disability_result$similarity_df %>%
rename(topic1 = row_idx, topic2 = col_idx, cosine_similarity = similarity) %>%
mutate(cat1 = "SLD", cat2 = "Other Disability", comparison = "SLD vs Other Disability")
ld_vs_general_result <- TextAnalysisR::calculate_cross_similarity(
ld_topic_emb, general_topic_emb,
labels1 = ld_labels, labels2 = general_labels
)
ld_vs_general_cosine <- ld_vs_general_result$similarity_df %>%
rename(topic1 = row_idx, topic2 = col_idx, cosine_similarity = similarity) %>%
mutate(cat1 = "SLD", cat2 = "General", comparison = "SLD vs General")
ld_focused_similarities <- bind_rows(
ld_vs_disability_cosine,
ld_vs_general_cosine
)
topic_similarities <- ld_focused_similarities %>%
left_join(
top_labeled_ld %>%
arrange(topic) %>%
select(topic, topic_label) %>%
rename(ld_topic_label = topic_label),
by = c("topic1" = "topic")
) %>%
mutate(
ld_topic_name = paste("Topic", topic1),
other_topic_name = paste("Topic", topic2),
ld_topic_id = label1,
other_topic_id = label2,
other_category = cat2
) %>%
left_join(
top_labeled_disability %>%
arrange(topic) %>%
select(topic, topic_label) %>%
rename(disability_topic_label = topic_label),
by = c("topic2" = "topic")
) %>%
left_join(
top_labeled_general %>%
arrange(topic) %>%
select(topic, topic_label) %>%
rename(general_topic_label = topic_label),
by = c("topic2" = "topic")
) %>%
mutate(
other_topic_label = case_when(
cat2 == "Other Disability" ~ disability_topic_label,
cat2 == "General" ~ general_topic_label,
TRUE ~ as.character(topic2)
)
) %>%
select(
ld_topic_name, other_topic_name, ld_topic_id, other_topic_id,
ld_topic_label, other_topic_label, cosine_similarity, other_category
) %>%
mutate(
other_category = factor(other_category, levels = c("Other Disability", "General"))
)
topic_similarities <- topic_similarities %>%
distinct(ld_topic_name, other_topic_name, other_category, .keep_all = TRUE)
topic_similarity_heatmap <- TextAnalysisR::plot_cross_category_heatmap(
similarity_data = topic_similarities,
row_var = "ld_topic_name",
col_var = "other_topic_name",
value_var = "cosine_similarity",
category_var = "other_category",
row_display_var = "ld_topic_label",
col_display_var = "other_topic_label",
row_label = "SLD Topics"
) +
ggplot2::facet_wrap(~ col_category, scales = "free_x", nrow = 2) +
ggplot2::scale_fill_viridis_c(name = "Cosine\nSimilarity", option = "plasma")
topic_similarity_heatmap_plotly <- topic_similarity_heatmap %>%
ggplotly(tooltip = "text", width = 800, height = 1000) %>%
layout(
hovermode = "closest",
margin = list(b = 70)
) %>%
style(hoverinfo = "text")Topic-Level Similarity Heatmap
Code
topic_similarity_stats <- topic_similarities %>%
group_by(other_category) %>%
summarise(
mean_similarity = round(mean(cosine_similarity), 3),
median_similarity = round(median(cosine_similarity), 3),
sd_similarity = round(sd(cosine_similarity), 3),
min_similarity = round(min(cosine_similarity), 3),
max_similarity = round(max(cosine_similarity), 3),
.groups = 'drop'
) %>%
arrange(other_category)
topic_similarity_threshold <- 0.6
topic_gap_analysis <- TextAnalysisR::analyze_similarity_gaps(
similarity_data = topic_similarities,
ref_var = "ld_topic_name",
other_var = "other_topic_name",
similarity_var = "cosine_similarity",
category_var = "other_category",
ref_label_var = "ld_topic_label",
other_label_var = "other_topic_label",
unique_threshold = topic_similarity_threshold,
cross_policy_min = topic_similarity_threshold,
cross_policy_max = 0.8
)
LD_Unique_topics <- topic_gap_analysis$unique_items %>%
rename(match_topic = best_match, match_cat = best_match_category) %>%
mutate(
topic_numeric_id = as.numeric(str_extract(ld_topic_name, "\\d+")),
gap_type = "LD_Unique_Strength"
) %>%
arrange(topic_numeric_id)
missing_from_ld_topics <- topic_gap_analysis$missing_items %>%
rename(match_sld = best_match) %>%
mutate(
topic_numeric_id = as.numeric(str_extract(other_topic_name, "\\d+")),
gap_type = "Missing_from_LD"
) %>%
arrange(topic_numeric_id)
cross_policy_topics <- topic_gap_analysis$cross_policy %>%
rename(match_topic = best_match, match_cat = best_match_category) %>%
mutate(
topic_numeric_id = as.numeric(str_extract(ld_topic_name, "\\d+")),
gap_type = "Cross-Policy_Learning"
) %>%
arrange(topic_numeric_id)Topic-Level Similarity Statistics Table
Unique Topics from SLD Policies
Lower similarity scores indicate greater uniqueness
Code
LD_Unique_topics %>%
select(-gap_type, -match_topic, -topic_numeric_id) %>%
DT::datatable(
options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'pdf'),
pageLength = 10,
scrollX = TRUE
),
extensions = 'Buttons',
class = "cell-border stripe",
rownames = FALSE
) %>%
DT::formatRound(columns = c("min_similarity", "max_similarity"), digits = 3)Missing Topics from LD Policies
Lower similarity scores indicate content missing from SLD policies
Code
missing_from_ld_topics %>%
select(-gap_type, -topic_numeric_id) %>%
DT::datatable(
options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'pdf'),
pageLength = 10,
scrollX = TRUE
),
extensions = 'Buttons',
class = "cell-border stripe",
rownames = FALSE
) %>%
DT::formatRound(columns = c("min_similarity", "max_similarity"), digits = 3)Cross-Policy Learning Opportunities
Similarity scores between 0.6-0.8 indicate cross-policy learning potential
Code
cross_policy_topics %>%
select(-gap_type, -match_topic, -topic_numeric_id) %>%
DT::datatable(
options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'pdf'),
pageLength = 10,
scrollX = TRUE
),
extensions = 'Buttons',
class = "cell-border stripe",
rownames = FALSE
) %>%
DT::formatRound(columns = c("min_similarity", "max_similarity"), digits = 3)Participants
Code
# single Choice Variables
single_tbl <- delphiFirst %>%
select(spanish_hispanic_latino, sex) %>%
pivot_longer(everything(), names_to = "Variable", values_to = "Choice") %>%
count(Variable, Choice) %>%
mutate(
Percent = round(n / nrow(delphiFirst) * 100, 1),
`N (%)` = paste0(n, " (", Percent, "%)"),
Variable = case_when(
Variable == "spanish_hispanic_latino" ~ "Spanish/Hispanic/Latino",
Variable == "sex" ~ "Sex",
TRUE ~ Variable
)
) %>%
select(Variable, Choice, `N (%)`)
openxlsx::write.xlsx(single_tbl, "data/single_tbl.xlsx")Code
# Multiple Choice Variables
total_n <- nrow(delphiFirst)
summarize_multichoice <- function(data, var, var_label) {
data %>%
pull({{ var }}) %>%
str_split(",\\s*") %>%
unlist() %>%
str_trim() %>%
table() %>%
sort(decreasing = TRUE) %>%
as.data.frame() %>%
rename(Choice = ".", Count = "Freq") %>%
mutate(
Percent = round(Count / total_n * 100, 1),
`N (%)` = paste0(Count, " (", Percent, "%)"),
Variable = var_label
) %>%
select(Variable, Choice, `N (%)`)
}
position_tbl <- summarize_multichoice(delphiFirst, position, "Position")
school_tbl <- summarize_multichoice(delphiFirst, school_level, "School Level")
race_tbl <- summarize_multichoice(delphiFirst, race, "Race")
multi_tbl <- bind_rows(position_tbl, school_tbl, race_tbl)
openxlsx::write.xlsx(multi_tbl, "data/multi_tbl.xlsx")Word Frequency
Code
setTimeLimit(cpu = Inf, elapsed = Inf, transient = FALSE)
tokens <- TextAnalysisR::prep_texts(united_tbl, text_field = "united_texts", verbose = FALSE)
tokens_collocations <- TextAnalysisR::detect_multi_words(tokens, size = 2:5, min_count = 2)
tokens_dict <- quanteda::dictionary(
list(
custom = c(
"access to",
"ai systems",
"artificial intelligence",
"assistive technology",
"automated systems",
"best practices",
"civil rights",
"decision making",
"developers should",
"educational settings",
"federal government",
"intelligence systems",
"learning disability",
"learning disabilities",
"machine learning",
"safety institute",
"school district",
"students with disabilities",
"virtual environments"
)
)
)
compound_tokens <- function(tokens, dict) {
quanteda::tokens_compound(
tokens,
pattern = dict,
concatenator = "_",
valuetype = "glob",
window = 0,
case_insensitive = TRUE,
join = TRUE,
keep_unigrams = FALSE,
verbose = TRUE
)
}
tokens_compound <- compound_tokens(tokens, tokens_dict)
dfm_object <- dfm(tokens_compound)
word_frequency_plot <- TextAnalysisR::plot_word_frequency(dfm_object, n = 20)Remove Predefined Stopwords
Code
# setTimeLimit(cpu = Inf, elapsed = Inf, transient = FALSE)
toks_removed <- quanteda::tokens_remove(tokens_compound, pattern = stopwords::stopwords("en", source = "snowball"), verbose = FALSE)
dfm_init <- dfm(toks_removed)
word_frequency_remove_stopwords <- TextAnalysisR::plot_word_frequency(dfm_init, n = 20)Remove Common Words in the Dataset
Code
# setTimeLimit(cpu = Inf, elapsed = Inf, transient = FALSE)
common_words <- c("na", "may", "can", "[''']s")
toks_removed_common <- quanteda::tokens_remove(toks_removed,
pattern = common_words,
valuetype = "regex",
verbose = FALSE)
dfm_removed <- dfm(toks_removed_common)
word_frequency_remove_common <- TextAnalysisR::plot_word_frequency(dfm_removed, n = 20)Lemmatize Tokens
Code
# setTimeLimit(cpu = Inf, elapsed = Inf, transient = FALSE)
toks_lemmatized <- TextAnalysisR::lemmatize_tokens(toks_removed_common, batch_size = 50)
dfm <- dfm(toks_lemmatized)
word_frequency_lemmatized <- TextAnalysisR::plot_word_frequency(dfm, n = 20)Model Diagnostics (Optimal Topic Numbers)
Code
# setTimeLimit(cpu = Inf, elapsed = Inf, transient = FALSE)
toks_lemmatized <- TextAnalysisR::lemmatize_tokens(toks_removed_common, batch_size = 50)
dfm <- dfm(toks_lemmatized)
word_frequency_lemmatized <- TextAnalysisR::plot_word_frequency(dfm, n = 20)
optimal_k_result <- TextAnalysisR::find_optimal_k(
dfm_object = dfm,
topic_range = 10:45,
max.em.its = 50,
categorical_var = NULL,
continuous_var = NULL,
height = 600,
width = 800,
verbose = FALSE) 
Structural Topic Modeling
Code
out <- quanteda::convert(dfm, to = "stm")
topic_model <- stm(
data = out$meta,
documents = out$documents,
vocab = out$vocab,
max.em.its = 75,
init.type = "Spectral",
K = 35,
prevalence = ~ c("disability", "SLD"),
verbose = FALSE,
seed = 1234)
top_topic_terms <- TextAnalysisR::get_topic_terms(
stm_model = topic_model,
top_term_n = 10,
verbose = FALSE
)
top_topic_terms %>%
mutate_if(is.numeric, ~ round(., 3)) %>%
DT::datatable(
options = list(
dom = 'Bfrtip',
buttons = c('copy', 'excel', 'pdf'),
pageLength = 10,
scrollX = TRUE
),
extensions = 'Buttons',
class = "cell-border stripe",
rownames = FALSE
)Generate Policy Topic Labels Using OpenAI’s API
top_labeled_topic_terms <- TextAnalysisR::generate_topic_labels(
top_topic_terms,
model = "gpt-3.5-turbo",
temperature = 0.5,
verbose = FALSE)Probability of Words Observed in Each Topic (Beta)
word_probability_ggplot <- TextAnalysisR::plot_word_probability(
top_labeled_topic_terms,
topic_label = "topic_label",
ncol = 2
) +
ggplot2::theme(
axis.text.x = ggplot2::element_text(size = 12),
axis.text.y = ggplot2::element_text(size = 12),
axis.title.x = ggplot2::element_text(size = 12),
axis.title.y = ggplot2::element_text(size = 12),
legend.position = "right",
legend.text = ggplot2::element_text(size = 12)
)
word_probability_plot <- plotly::ggplotly(word_probability_ggplot, height = 5000, width = 1500)
word_probability_plot$x$layout$annotations <- lapply(
word_probability_plot$x$layout$annotations,
\(x) { x$font$size <- 16; x }
)
word_probability_plotly <- word_probability_plot |>
plotly::layout(margin = list(t = 50, b = 50, l = 80, r = 20))Generate Policy Survey Items Using OpenAI’s API
Code
top_labeled_human_loop <- read_excel("data/top_labeled_topic_terms_human_loop.xlsx")
# Custom prompts for educational policy survey items
policy_system_prompt <- "
You are a highly skilled survey designer specializing in creating Likert-scale survey items for educational policy research. Your task is to generate clear, concise survey items that assess the importance of various AI policy aspects for students with learning disabilities.
Guidelines for generating survey items:
1. Format and Structure
- Create statements that can be rated on a 5-point Likert scale (1=Strongly Disagree to 5=Strongly Agree)
- Keep items clear, concise, and focused on a single concept
- Use active voice and present tense
- Avoid double-barreled questions or complex statements
2. Content Focus
- Focus on the importance or necessity of the policy aspect
- Use person-first language (e.g., 'students with learning disabilities' instead of 'disabled students')
- Ensure items are specific and actionable
- Avoid jargon unless necessary for technical accuracy
3. Clarity and Precision
- Use simple, direct language
- Avoid ambiguous terms or complex sentence structures
- Make sure each item measures exactly one concept
- Ensure items are relevant to educational settings
4. Response Scale Considerations
- Frame items to work well with a 5-point Likert scale
- Avoid extreme language that might force responses to one end of the scale
- Ensure items can be meaningfully rated on the agree-disagree continuum
Example
----------
Top Terms (highest to lowest beta score):
virtual manipulatives (.035)
manipulatives (.022)
mathematical (.014)
app (.013)
solving (.013)
learning disability (.012)
algebra (.012)
area (.011)
tool (.010)
concrete manipulatives (.010)
Generated Survey Item:
AI-powered visual manipulatives should be made available to support mathematical problem-solving for students with learning disabilities.
Focus on creating clear, measurable items that capture the essence of each topic while following these guidelines.
"
policy_user_template <- "You have a topic with keywords listed from most to least significant: {terms}
Please create a single survey item that:
1. Captures the essence of this topic
2. Can be rated on a 5-point Likert scale (1=Strongly Disagree to 5=Strongly Agree)
3. Focuses on the importance of this policy aspect
4. Uses person-first language
5. Is clear, concise, and specific
Survey item:"# Generate survey items using TextAnalysisR
policy_survey_items <- TextAnalysisR::generate_topic_content(
topic_terms_df = top_labeled_human_loop,
content_type = "survey_item",
topic_var = "topic",
term_var = "term",
weight_var = "beta",
provider = "openai",
model = "gpt-3.5-turbo",
temperature = 0,
system_prompt = policy_system_prompt,
user_prompt_template = policy_user_template,
max_tokens = 50,
verbose = TRUE
)Round 1 Delphi Survey Results Table
Decision Rule Based on Experts’ Consensus and Content Validity Ratio (CVR):
- 75% or more of experts agree (Rating ≥ 4) & CVR >= 0.60 → Retain
- 70% or more of experts agree (Rating ≥ 4) & CVR >= 0.60 → Consider
- 75% or more of experts agree (Rating ≥ 4) & CVR < 0.60 → Consider
- Otherwise → Revise/Remove
Code
delphiFirst <- read_excel("data/delphiFirst.xlsx")
likert <- c(
"Strongly Disagree" = 1,
"Disagree" = 2,
"Neutral" = 3,
"Agree" = 4,
"Strongly Agree" = 5
)
question_levels <- sort(as.numeric(gsub("Q", "", unique(delphiFirst %>%
select(matches("^Q[0-9]+$")) %>%
names()))))
question_levels <- paste0("Q", question_levels)
delphi_long <- delphiFirst %>%
pivot_longer(
cols = -c(
ID,
position:zip_code,
starts_with("Q33_"),
"Q34",
"state",
"census_region"
),
names_to = "Question",
values_to = "Rating"
) %>%
mutate(
Rating = likert[Rating],
Question = factor(Question, levels = question_levels)
)
round_1_results <- delphi_long %>%
group_by(Question) %>%
summarise(
Total = n(),
Strongly_Disagree = sum(Rating == 1),
Disagree = sum(Rating == 2),
Neutral = sum(Rating == 3),
Agree = sum(Rating == 4),
Strongly_Agree = sum(Rating == 5),
Agreement_Count = sum(Rating >= 4),
Percent_Agreement = round((Agreement_Count / Total) * 100, 1),
Mean = round(mean(Rating), 2),
SD = round(sd(Rating), 2),
Essential = sum(Rating >= 4),
CVR = round((Essential - (Total / 2)) / (Total / 2), 3),
IQR = IQR(Rating),
Consensus_Decision = case_when(
Percent_Agreement >= 75 ~ "Retain",
Percent_Agreement >= 70 ~ "Consider",
TRUE ~ "Revise/Remove"
),
CVR_Decision = ifelse(CVR >= 0.60, "Retain", "Revise/Remove"),
Final_Decision = case_when(
Percent_Agreement >= 75 & CVR >= 0.60 ~ "Retain",
Percent_Agreement >= 70 & CVR >= 0.60 ~ "Consider",
Percent_Agreement >= 75 & CVR < 0.60 ~ "Consider",
TRUE ~ "Revise/Remove"
),
.groups = 'drop'
) %>%
ungroup() %>%
mutate(
Question = factor(
Question,
levels = Question[order(as.numeric(gsub("Q", "", Question)))]
)
)
# openxlsx::write.xlsx(round_1_results, "results/round_1_results.xlsx", rowNames = FALSE)
round_1_results_label <- read_excel("results/round_1_results_label.xlsx")Round 1 Delphi Survey Results Plot
Code
round_1_results <- round_1_results %>%
mutate(Final_Decision = factor(Final_Decision, levels = c("Retain", "Consider", "Revise/Remove")))
round_1_results <- round_1_results %>%
mutate(
tooltip_label = paste0(
"Agreement: ", Percent_Agreement, "%\n",
"CVR: ", round(CVR, 2), "\n",
"Final Decision: ", Final_Decision
)
)
round_1_plot <- ggplot(round_1_results, aes(
x = reorder(Question, Percent_Agreement),
y = Percent_Agreement,
fill = Final_Decision,
text = tooltip_label
)) +
geom_col() +
geom_hline(yintercept = 75, linetype = "dashed", color = "blue") +
coord_flip() +
labs(
title = "",
subtitle = "",
x = "Question",
y = "Percent Agreement",
fill = "Final Decision"
) +
scale_fill_manual(values = c(
"Retain" = "darkgreen",
"Consider" = "#568203",
"Revise/Remove" = "orange"
)) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
legend.position = "right",
legend.text = element_text(size = 12)
)
round_1_plotly <- plotly::ggplotly(round_1_plot, width = 800, height = 700, tooltip = "text") %>%
plotly::layout(
annotations = list(
list(
x = 0,
y = -0.16,
text = "Blue line = 75% agreement threshold (Delphi consensus)",
showarrow = FALSE,
xref = "paper",
yref = "paper",
font = list(size = 16, color = "gray40")
)
),
margin = list(b = 100, t = 40, l = 40)
)Round 1 Response Distribution by Question
Code
round_1_dist <- delphi_long %>%
mutate(Response_Label = case_when(
Rating == 1 ~ "Strongly Disagree",
Rating == 2 ~ "Disagree",
Rating == 3 ~ "Neutral",
Rating == 4 ~ "Agree",
Rating == 5 ~ "Strongly Agree"
)) %>%
count(Question, Response_Label) %>%
group_by(Question) %>%
mutate(
Percentage = round(n / sum(n) * 100, 1),
Rating_Count = ifelse(Response_Label == "Strongly Agree", n, 0),
Max_Rating = max(Rating_Count)
)
round_1_dist <- round_1_dist %>%
mutate(Response_Label = factor(Response_Label, levels = c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree")))
round_1_dist <- round_1_dist %>%
left_join(
round_1_results %>% select(Question, Percent_Agreement),
by = "Question"
)
round_1_dist_plot <- ggplot(
round_1_dist,
aes(x = reorder(Question, Percent_Agreement), y = n, fill = Response_Label)
) +
geom_col(position = "stack") +
coord_flip() +
labs(
title = "Distribution of Responses by Question",
x = "Question",
y = "Number of Responses",
fill = "Rating"
) +
scale_fill_brewer(type = "div", palette = "RdYlGn") +
guides(fill = guide_legend(reverse = FALSE)) +
theme_minimal(base_size = 12) +
theme(
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(color = "gray90"),
panel.grid.major.y = element_line(color = "gray90"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
legend.position = "right",
legend.text = element_text(size = 12)
)
round_1_dist_plotly <- plotly::ggplotly(round_1_dist_plot, width = 800, height = 700)Round 2 Delphi Survey Item Development
Code
# setTimeLimit(cpu = Inf, elapsed = Inf, transient = FALSE)
common_words_round2 <- c("also")
united_tbl_round2 <- TextAnalysisR::unite_cols(delphiFirst, listed_vars = "Q34")
tokens_round2 <- TextAnalysisR::prep_texts(
united_tbl_round2,
text_field = "united_texts",
remove_stopwords = TRUE,
custom_stopwords = common_words_round2,
custom_valuetype = "regex",
verbose = FALSE
)
tokens_collocations_round2 <- TextAnalysisR::detect_multi_words(tokens_round2, size = 2:5, min_count = 2)
dfm_object_round2 <- dfm(tokens_round2)
toks_lemmatized_round2 <- TextAnalysisR::lemmatize_tokens(tokens_round2, batch_size = 50)
dfm_round2 <- dfm(toks_lemmatized_round2)
word_frequency_lemmatized_round2 <- TextAnalysisR::plot_word_frequency(dfm_round2, n = 20)
optimal_k_result_round2 <- TextAnalysisR::find_optimal_k(
dfm_object = dfm_round2,
topic_range = 5:20,
max.em.its = 75,
categorical_var = NULL,
continuous_var = "",
height = 600,
width = 800,
verbose = FALSE)
Code
out_round2 <- quanteda::convert(dfm_round2, to = "stm")
topic_model_round_1 <- stm(
data = out_round2$meta,
documents = out_round2$documents,
vocab = out_round2$vocab,
max.em.its = 75,
init.type = "Spectral",
K = 11,
prevalence = ~ 1,
verbose = FALSE,
seed = 1234)
top_topic_terms_round_1 <- TextAnalysisR::get_topic_terms(
stm_model = topic_model_round_1,
top_term_n = 10,
verbose = FALSE
)
top_labeled_topic_terms_round_1 <- TextAnalysisR::generate_topic_labels(
top_topic_terms_round_1 ,
model = "gpt-3.5-turbo",
temperature = 0.5,
verbose = FALSE)
openxlsx::write.xlsx(top_labeled_topic_terms_round_1, "data/top_labeled_topic_terms_round_1.xlsx")Code
# Generate survey items for Round 1 using TextAnalysisR
policy_survey_items_round_1 <- TextAnalysisR::generate_topic_content(
topic_terms_df = top_labeled_topic_terms_round_1,
content_type = "survey_item",
topic_var = "topic",
term_var = "term",
weight_var = "beta",
provider = "openai",
model = "gpt-3.5-turbo",
temperature = 0,
system_prompt = policy_system_prompt,
user_prompt_template = policy_user_template,
max_tokens = 50,
verbose = TRUE
)
policy_survey_items_round_1_td <-
policy_survey_items_round_1 %>%
group_by(topic) %>%
summarise(
survey_item = first(survey_item),
term_beta = paste(term, "(", round(beta, 3), ")", collapse = ", ")
) %>%
ungroup()
openxlsx::write.xlsx(policy_survey_items_round_1_td, "data/policy_survey_items_round_1_td.xlsx")Round 2 Delphi Survey Results Table
Decision Rule Based on Experts’ Consensus and Content Validity Ratio (CVR):
- 75% or more of experts agree (Rating ≥ 4) & CVR >= 0.60 → Retain
- 70% or more of experts agree (Rating ≥ 4) & CVR >= 0.60 → Consider
- 75% or more of experts agree (Rating ≥ 4) & CVR < 0.60 → Consider
- Otherwise → Revise/Remove
Code
delphiSecond <- read_excel("data/delphiSecond.xlsx")
likert <- c(
"Strongly Disagree" = 1,
"Disagree" = 2,
"Neutral" = 3,
"Agree" = 4,
"Strongly Agree" = 5
)
question_levels <- sort(as.numeric(gsub("Q", "", unique(delphiSecond %>%
select(matches("^Q[0-9]+$")) %>%
names()))))
question_levels <- paste0("Q", question_levels)
delphi_long_2 <- delphiSecond %>%
pivot_longer(
cols = -c(
ID,
starts_with("Q15_"),
"Q16"
),
names_to = "Question",
values_to = "Rating"
) %>%
mutate(
Rating = likert[Rating],
Question = factor(Question, levels = question_levels)
)
round_2_results <- delphi_long_2 %>%
group_by(Question) %>%
summarise(
Total = n(),
Strongly_Disagree = sum(Rating == 1),
Disagree = sum(Rating == 2),
Neutral = sum(Rating == 3),
Agree = sum(Rating == 4),
Strongly_Agree = sum(Rating == 5),
Agreement_Count = sum(Rating >= 4),
Percent_Agreement = round((Agreement_Count / Total) * 100, 1),
Mean = round(mean(Rating), 2),
SD = round(sd(Rating), 2),
Essential = sum(Rating >= 4),
CVR = round((Essential - (Total / 2)) / (Total / 2), 3),
IQR = IQR(Rating),
Consensus_Decision = case_when(
Percent_Agreement >= 75 ~ "Retain",
Percent_Agreement >= 70 ~ "Consider",
TRUE ~ "Revise/Remove"
),
CVR_Decision = ifelse(CVR >= 0.60, "Retain", "Revise/Remove"),
Final_Decision = case_when(
Percent_Agreement >= 75 & CVR >= 0.60 ~ "Retain",
Percent_Agreement >= 70 & CVR >= 0.60 ~ "Consider",
Percent_Agreement >= 75 & CVR < 0.60 ~ "Consider",
TRUE ~ "Revise/Remove"
),
.groups = 'drop'
) %>%
ungroup() %>%
mutate(
Question = factor(
Question,
levels = Question[order(as.numeric(gsub("Q", "", Question)))]
)
)
# openxlsx::write.xlsx(round_2_results, "results/round_2_results.xlsx", rowNames = FALSE)
round_2_results_label <- read_excel("results/round_2_results_label.xlsx")Round 2 Delphi Survey Results Plot
Code
round_2_results <- round_2_results %>%
mutate(Final_Decision = factor(Final_Decision, levels = c("Retain", "Consider", "Revise/Remove")))
round_2_results <- round_2_results %>%
mutate(
tooltip_label = paste0(
"Agreement: ", Percent_Agreement, "%\n",
"CVR: ", round(CVR, 2), "\n",
"Final Decision: ", Final_Decision
)
)
round_2_plot <- ggplot(round_2_results, aes(
x = reorder(Question, Percent_Agreement),
y = Percent_Agreement,
fill = Final_Decision,
text = tooltip_label
)) +
geom_col() +
geom_hline(yintercept = 75, linetype = "dashed", color = "blue") +
coord_flip() +
labs(
title = "",
subtitle = "",
x = "Question",
y = "Percent Agreement",
fill = "Final Decision"
) +
scale_fill_manual(values = c(
"Retain" = "darkgreen",
"Consider" = "#568203",
"Revise/Remove" = "orange"
)) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
legend.position = "right",
legend.text = element_text(size = 12)
)
round_2_plotly <- plotly::ggplotly(round_2_plot, width = 800, height = 600, tooltip = "text") %>%
plotly::layout(
annotations = list(
list(
x = 0,
y = -0.22,
text = "Blue line = 75% agreement threshold (Delphi consensus)",
showarrow = FALSE,
xref = "paper",
yref = "paper",
font = list(size = 16, color = "gray40")
)
),
margin = list(b = 120, t = 40, l = 40)
)Round 2 Response Distribution by Question
Code
round_2_dist <- delphi_long_2 %>%
mutate(Response_Label = case_when(
Rating == 1 ~ "Strongly Disagree",
Rating == 2 ~ "Disagree",
Rating == 3 ~ "Neutral",
Rating == 4 ~ "Agree",
Rating == 5 ~ "Strongly Agree"
)) %>%
count(Question, Response_Label) %>%
group_by(Question) %>%
mutate(
Percentage = round(n / sum(n) * 100, 1),
Rating_Count = ifelse(Response_Label == "Strongly Agree", n, 0),
Max_Rating = max(Rating_Count)
)
round_2_dist <- round_2_dist %>%
mutate(Response_Label = factor(Response_Label, levels = c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree")))
round_2_dist <- round_2_dist %>%
left_join(
round_2_results %>% select(Question, Percent_Agreement),
by = "Question"
)
round_2_dist_plot <- ggplot(
round_2_dist,
aes(x = reorder(Question, Percent_Agreement), y = n, fill = Response_Label)
) +
geom_col(position = "stack") +
coord_flip() +
labs(
title = "Distribution of Responses by Question",
x = "Question",
y = "Number of Responses",
fill = "Rating"
) +
scale_fill_brewer(type = "div", palette = "RdYlGn") +
guides(fill = guide_legend(reverse = FALSE)) +
theme_minimal(base_size = 12) +
theme(
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(color = "gray90"),
panel.grid.major.y = element_line(color = "gray90"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
legend.position = "right",
legend.text = element_text(size = 12)
)
round_2_dist_plotly <- plotly::ggplotly(round_2_dist_plot, width = 800, height = 600)Round 2 Delphi Survey Ranking of Policy Items
Code
ranking_long <- delphiSecond %>%
pivot_longer(
cols = starts_with("Q15_"),
names_to = "rank_position",
values_to = "item_selected",
names_pattern = "Q15_(\\d+)"
) %>%
mutate(rank_position = as.numeric(rank_position)) %>%
filter(!is.na(item_selected))
item_percentages <- ranking_long %>%
group_by(item_selected) %>%
summarise(
Times_Selected = n(),
Percentage = round((Times_Selected / 14) * 100, 1),
.groups = 'drop'
) %>%
arrange(desc(Percentage))