“Analiza na Twitterze w R Clean Tweets” Kod odpowiedzi

Analiza na Twitterze w R Clean Tweets

  clean_tweet = gsub("&", "", unclean_tweet)
  clean_tweet = gsub("(RT|via)((?:\\b\\W*@\\w+)+)", "", clean_tweet)
  clean_tweet = gsub("@\\w+", "", clean_tweet)
  clean_tweet = gsub("[[:punct:]]", "", clean_tweet)
  clean_tweet = gsub("[[:digit:]]", "", clean_tweet)
  clean_tweet = gsub("http\\w+", "", clean_tweet)
  clean_tweet = gsub("[ \t]{2,}", "", clean_tweet)
  clean_tweet = gsub("^\\s+|\\s+$", "", clean_tweet) 

Analiza na Twitterze w R Clean Tweets

clean_tweet4 <- str_replace_all(clean_tweet3, "https://t.co/[a-z,A-Z,0-9]*","")
clean_tweet5 <- str_replace_all(clean_tweet4, "http://t.co/[a-z,A-Z,0-9]*","")

Analiza na Twitterze w R Clean Tweets

df <- tm_map(df, tolower)  

Analiza na Twitterze w R Clean Tweets

df <- tm_map(df, removePunctuation)

Analiza na Twitterze w R Clean Tweets

# Get rid of URLs
clean_tweet <- str_replace_all(clean_tweet, "http://t.co/[a-z,A-Z,0-9]*{8}","")

Analiza na Twitterze w R Clean Tweets

df <- tm_map(df, removeNumbers)

Analiza na Twitterze w R Clean Tweets

 #get rid of unnecessary spaces
clean_tweet <- str_replace_all(clean_tweet," "," ")
# Get rid of URLs
clean_tweet <- str_replace_all(clean_tweet, "http://t.co/[a-z,A-Z,0-9]*{8}","")
# Take out retweet header, there is only one
clean_tweet <- str_replace(clean_tweet,"RT @[a-z,A-Z]*: ","")
# Get rid of hashtags
clean_tweet <- str_replace_all(clean_tweet,"#[a-z,A-Z]*","")
# Get rid of references to other screennames
clean_tweet <- str_replace_all(clean_tweet,"@[a-z,A-Z]*","")   

Analiza na Twitterze w R Clean Tweets

Error in stri_replace_all_regex(string, pattern, fix_replacement(replacement),  : 
 Syntax error in regexp pattern. (U_REGEX_RULE_SYNTAX)

Analiza na Twitterze w R Clean Tweets


    clean_tweets <- function(x) {
                x %>%
                        str_remove_all(" ?(f|ht)(tp)(s?)(://)(.*)[.|/](.*)") %>%
                        str_replace_all("&", "and") %>%
                        str_remove_all("[[:punct:]]") %>%
                        str_remove_all("^RT:? ") %>%
                        str_remove_all("@[[:alnum:]]+") %>%
                        str_remove_all("#[[:alnum:]]+") %>%
                        str_replace_all("\\\n", " ") %>%
                        str_to_lower() %>%

    tweets %>% clean_tweets

Odpowiedzi podobne do “Analiza na Twitterze w R Clean Tweets”

Pytania podobne do “Analiza na Twitterze w R Clean Tweets”

Więcej pokrewnych odpowiedzi na “Analiza na Twitterze w R Clean Tweets” w TypeScript

Przeglądaj popularne odpowiedzi na kod według języka

Przeglądaj inne języki kodu