convert word content to excel in R code example
Example 1: convert word content to excel in R
sample_text_b <- "The quick brown fox named Seamus
- 1 -
jumps over the lazy dog also named Seamus, with
- 2 -
the newspaper from a boy named quick Seamus, in his mouth.
- 33 -
The quicker brown fox jumped over 2 lazy dogs."
sample_text_b
## [1] "The quick brown fox named Seamus \n- 1 - \njumps over the lazy dog also named Seamus, with \n- 2 - \nthe newspaper from a boy named quick Seamus, in his mouth. \n- 33 - \nThe quicker brown fox jumped over 2 lazy dogs."
sample_text_b2 <- unlist(stri_split_fixed(sample_text_b, '\n'), use.names = FALSE)
sample_text_b2 <- stri_replace_all_regex(sample_text_b2, "[-] \\d* [-]", "")
sample_text_b2 <- stri_trim_both(sample_text_b2)
sample_text_b2 <- sample_text_b2[sample_text_b2 != '']
stri_paste(sample_text_b2, collapse = '\n')
## [1] "The quick brown fox named Seamus\njumps over the lazy dog also named Seamus, with\nthe newspaper from a boy named quick Seamus, in his mouth.\nThe quicker brown fox jumped over 2 lazy dogs."
Example 2: convert word content to excel in R
## Read in Word data (.docx)
readtext(paste0(DATA_DIR, "/word/*.docx"))
## readtext object consisting of 2 documents and 0 docvars.
## # Description: df[,2] [2 × 2]
## doc_id text
##
## 1 UK_2015_EccentricParty.docx "\"The Eccent\"..."
## 2 UK_2015_LoonyParty.docx "\"The Offici\"..."
Example 3: convert word content to excel in R
# Make some text with page numbers
sample_text_a <- "The quick brown fox named Seamus jumps over the lazy dog also named Seamus,
page 1
with the newspaper from a boy named quick Seamus, in his mouth.
page 2
The quicker brown fox jumped over 2 lazy dogs."
sample_text_a
## [1] "The quick brown fox named Seamus jumps over the lazy dog also named Seamus, \npage 1 \nwith the newspaper from a boy named quick Seamus, in his mouth.\npage 2\nThe quicker brown fox jumped over 2 lazy dogs."
# Remove "page" and respective digit
sample_text_a2 <- unlist(stri_split_fixed(sample_text_a, '\n'), use.names = FALSE)
sample_text_a2 <- stri_replace_all_regex(sample_text_a2, "page \\d*", "")
sample_text_a2 <- stri_trim_both(sample_text_a2)
sample_text_a2 <- sample_text_a2[sample_text_a2 != '']
stri_paste(sample_text_a2, collapse = '\n')
## [1] "The quick brown fox named Seamus jumps over the lazy dog also named Seamus,\nwith the newspaper from a boy named quick Seamus, in his mouth.\nThe quicker brown fox jumped over 2 lazy dogs."
Example 4: convert word content to excel in R
# Load readtext package
library(readtext)