Plot Highest Word Probabilities for Each Topic — word_probability

This function provides a visualization of the top terms for each topic, ordered by their word probability distribution for each topic (beta).

Usage

word_probability_plot(
  top_topic_terms,
  topic_label = NULL,
  ncol = 3,
  height = 1200,
  width = 800,
  ...
)

Arguments

top_topic_terms: A data frame containing the top terms for each topic.
topic_label: A character vector of topic labels for each topic. If NULL, the function uses the topic number.
ncol: The number of columns in the facet plot (default: 3).
height: The height of the resulting Plotly plot, in pixels (default: 1200).
width: The width of the resulting Plotly plot, in pixels (default: 800).
...: Additional arguments passed to plotly::layout.

Value

A Plotly object showing a facet-wrapped chart of top terms for each topic, ordered by their per-topic probability (beta). Each facet represents a topic.

Details

The function uses the ggplot2 package to create a facet-wrapped chart of top terms for each topic,

Examples

if (interactive()) {
 df <- TextAnalysisR::SpecialEduTech

 united_tbl <- TextAnalysisR::unite_text_cols(df, listed_vars = c("title", "keyword", "abstract"))

 tokens <- TextAnalysisR::preprocess_texts(united_tbl, text_field = "united_texts")

 dfm_object <- quanteda::dfm(tokens)

 out <- quanteda::convert(dfm_object, to = "stm")

stm_15 <- stm::stm(
  data = out$meta,
  documents = out$documents,
  vocab = out$vocab,
  max.em.its = 75,
  init.type = "Spectral",
  K = 15,
  prevalence = ~ reference_type + s(year),
  verbose = TRUE)

top_topic_terms <- TextAnalysisR::select_top_topic_terms(
  stm_model = stm_15,
  top_term_n = 10,
  verbose = TRUE
  )

top_labeled_topic_terms <- TextAnalysisR::generate_topic_labels(
  top_topic_terms = top_topic_terms,
  model = "gpt-3.5-turbo",
  temperature = 0.5,
  openai_api_key = "your_openai_api_key",
  verbose = TRUE)
top_labeled_topic_terms


TextAnalysisR::plot_word_probabilities(
  top_labeled_topic_terms,
  topic_label = "topic_label",
  ncol = 3,
  height = 1200,
  width = 800
  )

TextAnalysisR::plot_word_probabilities(
  top_topic_terms,
  ncol = 3,
  height = 1200,
  width = 800
  )


 manual_labels <- c("1" = "Mathematical technology for students with LD",
                    "2" = "STEM technology",
                    "3" = "CAI for math problem solving")

word_probability_plot <- TextAnalysisR::word_probability_plot(
                         top_topic_terms,
                         topic_label = manual_labels,
                         ncol = 3,
                         height = 1200,
                         width = 800)
print(word_probability_plot)

}