This function uses grapheme clusters instead of Unicode code points in UTF-8 strings.
utf8_substr(x, start, stop)
Character vector of the same length as x
, containing
the requested substrings.
Other UTF-8 string manipulation:
utf8_graphemes()
,
utf8_nchar()
# Five grapheme clusters, select the middle three
str <- paste0(
"\U0001f477\U0001f3ff\u200d\u2640\ufe0f",
"\U0001f477\U0001f3ff",
"\U0001f477\u200d\u2640\ufe0f",
"\U0001f477\U0001f3fb",
"\U0001f477\U0001f3ff")
cat(str)
#> π·πΏββοΈπ·πΏπ·ββοΈπ·π»π·πΏ
str24 <- utf8_substr(str, 2, 4)
cat(str24)
#> π·πΏπ·ββοΈπ·π»