This function detects multi-word expressions (collocations) of specified sizes that appear at least a specified number of times in the provided tokens.
See also
Other lexical:
calculate_dispersion_metrics(),
calculate_lexical_dispersion(),
calculate_log_odds_ratio(),
calculate_text_readability(),
clear_lexdiv_cache(),
extract_keywords_keyness(),
extract_keywords_tfidf(),
extract_morphology(),
extract_named_entities(),
extract_noun_chunks(),
extract_pos_tags(),
extract_subjects_objects(),
find_similar_words(),
get_sentences(),
get_spacy_embeddings(),
get_spacy_model_info(),
get_word_similarity(),
init_spacy_nlp(),
lexical_analysis,
lexical_diversity_analysis(),
lexical_frequency_analysis(),
parse_morphology_string(),
plot_keyness_keywords(),
plot_keyword_comparison(),
plot_lexical_diversity_distribution(),
plot_morphology_feature(),
plot_readability_by_group(),
plot_readability_distribution(),
plot_tfidf_keywords(),
plot_top_readability_documents(),
render_displacy_dep(),
render_displacy_ent(),
spacy_extract_entities(),
spacy_has_vectors(),
spacy_initialized(),
spacy_parse_full(),
summarize_morphology()
Examples
if (interactive()) {
mydata <- TextAnalysisR::SpecialEduTech
united_tbl <- TextAnalysisR::unite_cols(
mydata,
listed_vars = c("title", "keyword", "abstract")
)
tokens <- TextAnalysisR::prep_texts(united_tbl, text_field = "united_texts")
collocations <- TextAnalysisR::detect_multi_words(tokens, size = 2:5, min_count = 2)
print(collocations)
}
