The TextAnalysisR
package provides a supporting workflow
for text mining analysis. The TextAnalysisR.app()
function
allows users to launch and browse a Shiny app. This web app incorporates
‘quanteda’ (text preprocessing), ‘stm’ (structural topic modeling),
‘ggraph’ as well as ‘widyr’ (network analysis). ‘tidytext’ was
implemented to tidy non-tidy format objects.
Installation
The development version from GitHub with:
install.packages("devtools")
devtools::install_github("mshin77/TextAnalysisR")
Launch and Browse the Shiny app
Preprocess Text Data
data <- TextAnalysisR::SpecialEduTech
preprocessed_data <- preprocess_texts(data, text_field = "abstract")
Plot Word Frequency
# data is a document-feature matrix (dfm) object through the quanteda package.
# Plot word frequency for the top 20 terms.
dfm <- SpecialEduTech %>%
preprocess_texts(text_field = "abstract") %>%
quanteda::dfm()
dfm %>% plot_word_frequency(n = 20)
Examine Highest Per-Term Per-Topic Probabilities
# data is a tidy data frame that includes per-term per-topic probabilities (beta).
# Examine the top 5 terms with the highest per-term per-topic probabilities.
# Number of top_n can be changed.
dfm <- SpecialEduTech %>%
preprocess_texts(text_field = "abstract") %>%
quanteda::dfm()
data <- tidytext::tidy(stm_15, document_names = rownames(dfm), log = FALSE)
data %>% examine_top_terms(top_n = 5) %>%
dplyr::mutate_if(is.numeric, ~ round(., 3)) %>%
DT::datatable(rownames = FALSE)
Plot Topic Per-Term Per-Topic Probabilities
# data is a tidy data frame that includes per-term per-topic probabilities (beta).
# Plot per-term per-topic probabilities for the top 10 terms.
dfm <- SpecialEduTech %>%
preprocess_texts(text_field = "abstract") %>%
quanteda::dfm()
data <- tidytext::tidy(stm_15, document_names = rownames(dfm), log = FALSE)
data %>% examine_top_terms(top_n = 2) %>%
plot_topic_term(ncol = 3)
Plot Per-Document Per-Topic Probabilities
# data is a tidy data frame that includes per-document per-topic probabilities (gamma).
# Plot per-document per-topic probabilities for the top 15 topics.
# Number of top_n can be changed.
dfm <- SpecialEduTech %>%
preprocess_texts(text_field = "abstract") %>%
quanteda::dfm()
data <- tidytext::tidy(stm_15, matrix = "gamma", document_names = rownames(dfm), log = FALSE)
data %>% topic_probability_plot(top_n = 15) %>% plotly::ggplotly()
Visualize a Table for Per-Document Per-Topic Probabilities
# data is a tidy data frame that includes per-document per-topic probabilities (gamma).
# Create a table of per-document per-topic probabilities for the top 15 topics.
# Number of top_n can be changed.
dfm <- SpecialEduTech %>%
preprocess_texts(text_field = "abstract") %>%
quanteda::dfm()
data <- tidytext::tidy(stm_15, matrix = "gamma", document_names = rownames(dfm), log = FALSE)
data %>% topic_probability_table(top_n = 15) %>% DT::datatable(rownames = FALSE)