For this tutorial, download the file SwissPoliticians-tweets.json.gz (~21MB) and SwissPoliticians.csv.
You can find the RMD file for this tutorial here.
Today, we will go together through the introductory part of the fourth exercise, where we will analyze the network of Swiss politicians on Twitter. Here, we will do the basics of loading and processing Twitter data from a file with tweets in JSON.
First load the packages we will use in this exercise: dplyr, tidygraph, jsonlite, and ggraph.
library(dplyr)
library(tidygraph)
library(jsonlite)
library(ggraph)
Download the file SwissPoliticians.csv and read it as a csv in R. Take into account that separators are tabs. Change the screen names of accounts to lower case and add a column with a sequential id from 1 to the number of politicians.
poldf <- read.csv("SwissPoliticians.csv",sep="\t",header=TRUE, stringsAsFactors=FALSE)
poldf$screenName <- tolower(poldf$screenName)
poldf$id <- seq(1, nrow(poldf))
Read the politician tweets file taking into account that it is compressed. Print a random line and its content read as JSON. Check Exercise 2 (SIT on Twitter) if you need an example of how to do this.
lines <- readLines(gzfile("SwissPoliticians-tweets.json.gz"))
line <- lines[sample(length(lines), 1)]
line
## [1] "{\"created_at\":\"Fri Sep 15 09:39:17 +0000 2017\",\"id\":908626265497850000,\"id_str\":\"908626265497849856\",\"full_text\":\"ist immer ein Erlebniss seinen Vortrag zu hören. Übrigens viel Glück bei den Wahlen. https://t.co/GKL8PU1dyU\",\"truncated\":false,\"display_text_range\":[0,84],\"entities\":{\"hashtags\":[],\"symbols\":[],\"user_mentions\":[],\"urls\":[{\"url\":\"https://t.co/GKL8PU1dyU\",\"expanded_url\":\"https://twitter.com/OrunP/status/908622041674387457\",\"display_url\":\"twitter.com/OrunP/status/9…\",\"indices\":[85,108]}],\"media\":{}},\"extended_entities\":{\"media\":{}},\"source\":\"<a href=\\\"http://twitter.com\\\" rel=\\\"nofollow\\\">Twitter Web Client</a>\",\"user\":{\"id\":428852601,\"id_str\":\"428852601\",\"name\":\"Michael Merkli\",\"screen_name\":\"MichaelMerkli\",\"location\":\"Wettingen Schweiz\",\"description\":\"Versicherungs-Fachmann mit eidg. Fachausweis, Treuhänder mit Fachausweis und eidg. dipl. Finanzplanungs-Experte\",\"url\":\"http://t.co/Z9cY6aucLy\",\"entities\":{\"url\":{\"urls\":[{\"url\":\"http://t.co/Z9cY6aucLy\",\"expanded_url\":\"http://michaelmerkli.ch\",\"display_url\":\"michaelmerkli.ch\",\"indices\":[0,22]}]},\"description\":{\"urls\":[]}},\"protected\":false,\"followers_count\":109,\"friends_count\":106,\"listed_count\":9,\"created_at\":\"Mon Dec 05 07:56:01 +0000 2011\",\"favourites_count\":52,\"geo_enabled\":false,\"verified\":false,\"statuses_count\":470,\"contributors_enabled\":false,\"is_translator\":false,\"is_translation_enabled\":false,\"profile_background_color\":\"C0DEED\",\"profile_background_image_url\":\"http://abs.twimg.com/images/themes/theme1/bg.png\",\"profile_background_image_url_https\":\"https://abs.twimg.com/images/themes/theme1/bg.png\",\"profile_background_tile\":false,\"profile_image_url\":\"http://pbs.twimg.com/profile_images/794838274418372608/BNC6xlaU_normal.jpg\",\"profile_image_url_https\":\"https://pbs.twimg.com/profile_images/794838274418372608/BNC6xlaU_normal.jpg\",\"profile_banner_url\":\"https://pbs.twimg.com/profile_banners/428852601/1506321396\",\"profile_link_color\":\"1DA1F2\",\"profile_sidebar_border_color\":\"C0DEED\",\"profile_sidebar_fill_color\":\"DDEEF6\",\"profile_text_color\":\"333333\",\"profile_use_background_image\":true,\"has_extended_profile\":false,\"default_profile\":true,\"default_profile_image\":false,\"following\":false,\"follow_request_sent\":false,\"notifications\":false,\"translator_type\":\"none\",\"withheld_in_countries\":[]},\"retweeted_status\":{\"display_text_range\":{},\"entities\":{\"hashtags\":{},\"symbols\":{},\"user_mentions\":{},\"urls\":{},\"media\":{}},\"extended_entities\":{\"media\":{}},\"user\":{\"entities\":{\"description\":{\"urls\":{}},\"url\":{\"urls\":{}}},\"withheld_in_countries\":{}}},\"is_quote_status\":true,\"retweet_count\":0,\"favorite_count\":0,\"favorited\":false,\"retweeted\":false,\"possibly_sensitive\":false,\"lang\":\"de\",\"quoted_status_id\":908622041674387000,\"quoted_status_id_str\":\"908622041674387457\",\"quoted_status_permalink\":{\"url\":\"https://t.co/GKL8PU1dyU\",\"expanded\":\"https://twitter.com/OrunP/status/908622041674387457\",\"display\":\"twitter.com/OrunP/status/9…\"},\"quoted_status\":{\"display_text_range\":{},\"entities\":{\"hashtags\":{},\"symbols\":{},\"user_mentions\":{},\"urls\":{},\"media\":{}},\"user\":{\"entities\":{\"url\":{\"urls\":{}},\"description\":{\"urls\":{}}},\"withheld_in_countries\":{}},\"extended_entities\":{\"media\":{}},\"quoted_status_permalink\":{}}}"
fromJSON(line)
## $created_at
## [1] "Fri Sep 15 09:39:17 +0000 2017"
##
## $id
## [1] 9.086263e+17
##
## $id_str
## [1] "908626265497849856"
##
## $full_text
## [1] "ist immer ein Erlebniss seinen Vortrag zu hören. Übrigens viel Glück bei den Wahlen. https://t.co/GKL8PU1dyU"
##
## $truncated
## [1] FALSE
##
## $display_text_range
## [1] 0 84
##
## $entities
## $entities$hashtags
## list()
##
## $entities$symbols
## list()
##
## $entities$user_mentions
## list()
##
## $entities$urls
## url expanded_url
## 1 https://t.co/GKL8PU1dyU https://twitter.com/OrunP/status/908622041674387457
## display_url indices
## 1 twitter.com/OrunP/status/9… 85, 108
##
## $entities$media
## named list()
##
##
## $extended_entities
## $extended_entities$media
## named list()
##
##
## $source
## [1] "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>"
##
## $user
## $user$id
## [1] 428852601
##
## $user$id_str
## [1] "428852601"
##
## $user$name
## [1] "Michael Merkli"
##
## $user$screen_name
## [1] "MichaelMerkli"
##
## $user$location
## [1] "Wettingen Schweiz"
##
## $user$description
## [1] "Versicherungs-Fachmann mit eidg. Fachausweis, Treuhänder mit Fachausweis und eidg. dipl. Finanzplanungs-Experte"
##
## $user$url
## [1] "http://t.co/Z9cY6aucLy"
##
## $user$entities
## $user$entities$url
## $user$entities$url$urls
## url expanded_url display_url indices
## 1 http://t.co/Z9cY6aucLy http://michaelmerkli.ch michaelmerkli.ch 0, 22
##
##
## $user$entities$description
## $user$entities$description$urls
## list()
##
##
##
## $user$protected
## [1] FALSE
##
## $user$followers_count
## [1] 109
##
## $user$friends_count
## [1] 106
##
## $user$listed_count
## [1] 9
##
## $user$created_at
## [1] "Mon Dec 05 07:56:01 +0000 2011"
##
## $user$favourites_count
## [1] 52
##
## $user$geo_enabled
## [1] FALSE
##
## $user$verified
## [1] FALSE
##
## $user$statuses_count
## [1] 470
##
## $user$contributors_enabled
## [1] FALSE
##
## $user$is_translator
## [1] FALSE
##
## $user$is_translation_enabled
## [1] FALSE
##
## $user$profile_background_color
## [1] "C0DEED"
##
## $user$profile_background_image_url
## [1] "http://abs.twimg.com/images/themes/theme1/bg.png"
##
## $user$profile_background_image_url_https
## [1] "https://abs.twimg.com/images/themes/theme1/bg.png"
##
## $user$profile_background_tile
## [1] FALSE
##
## $user$profile_image_url
## [1] "http://pbs.twimg.com/profile_images/794838274418372608/BNC6xlaU_normal.jpg"
##
## $user$profile_image_url_https
## [1] "https://pbs.twimg.com/profile_images/794838274418372608/BNC6xlaU_normal.jpg"
##
## $user$profile_banner_url
## [1] "https://pbs.twimg.com/profile_banners/428852601/1506321396"
##
## $user$profile_link_color
## [1] "1DA1F2"
##
## $user$profile_sidebar_border_color
## [1] "C0DEED"
##
## $user$profile_sidebar_fill_color
## [1] "DDEEF6"
##
## $user$profile_text_color
## [1] "333333"
##
## $user$profile_use_background_image
## [1] TRUE
##
## $user$has_extended_profile
## [1] FALSE
##
## $user$default_profile
## [1] TRUE
##
## $user$default_profile_image
## [1] FALSE
##
## $user$following
## [1] FALSE
##
## $user$follow_request_sent
## [1] FALSE
##
## $user$notifications
## [1] FALSE
##
## $user$translator_type
## [1] "none"
##
## $user$withheld_in_countries
## list()
##
##
## $retweeted_status
## $retweeted_status$display_text_range
## named list()
##
## $retweeted_status$entities
## $retweeted_status$entities$hashtags
## named list()
##
## $retweeted_status$entities$symbols
## named list()
##
## $retweeted_status$entities$user_mentions
## named list()
##
## $retweeted_status$entities$urls
## named list()
##
## $retweeted_status$entities$media
## named list()
##
##
## $retweeted_status$extended_entities
## $retweeted_status$extended_entities$media
## named list()
##
##
## $retweeted_status$user
## $retweeted_status$user$entities
## $retweeted_status$user$entities$description
## $retweeted_status$user$entities$description$urls
## named list()
##
##
## $retweeted_status$user$entities$url
## $retweeted_status$user$entities$url$urls
## named list()
##
##
##
## $retweeted_status$user$withheld_in_countries
## named list()
##
##
##
## $is_quote_status
## [1] TRUE
##
## $retweet_count
## [1] 0
##
## $favorite_count
## [1] 0
##
## $favorited
## [1] FALSE
##
## $retweeted
## [1] FALSE
##
## $possibly_sensitive
## [1] FALSE
##
## $lang
## [1] "de"
##
## $quoted_status_id
## [1] 9.08622e+17
##
## $quoted_status_id_str
## [1] "908622041674387457"
##
## $quoted_status_permalink
## $quoted_status_permalink$url
## [1] "https://t.co/GKL8PU1dyU"
##
## $quoted_status_permalink$expanded
## [1] "https://twitter.com/OrunP/status/908622041674387457"
##
## $quoted_status_permalink$display
## [1] "twitter.com/OrunP/status/9…"
##
##
## $quoted_status
## $quoted_status$display_text_range
## named list()
##
## $quoted_status$entities
## $quoted_status$entities$hashtags
## named list()
##
## $quoted_status$entities$symbols
## named list()
##
## $quoted_status$entities$user_mentions
## named list()
##
## $quoted_status$entities$urls
## named list()
##
## $quoted_status$entities$media
## named list()
##
##
## $quoted_status$user
## $quoted_status$user$entities
## $quoted_status$user$entities$url
## $quoted_status$user$entities$url$urls
## named list()
##
##
## $quoted_status$user$entities$description
## $quoted_status$user$entities$description$urls
## named list()
##
##
##
## $quoted_status$user$withheld_in_countries
## named list()
##
##
## $quoted_status$extended_entities
## $quoted_status$extended_entities$media
## named list()
##
##
## $quoted_status$quoted_status_permalink
## named list()
Iterate over all the lines you read from the file, interpreting each one as a JSON object with the data of a tweet. For each tweet that is a retweet, save the screen name of the user who tweeted it and the screen name of the user who made the tweet being retweeted. Save these two in a data frame with two columns.
userName <- NULL
RTuserName <- NULL
for (line in lines)
{
tweet <- fromJSON(line)
if (!is.null(tweet$retweeted_status$id_str))
{
userName[length(userName)+1] <- tweet$user$screen_name
RTuserName[length(RTuserName)+1] <- tweet$retweeted_status$user$screen_name
}
}
tweetsdf <- data.frame(userName = tolower(userName), RTuserName = tolower(RTuserName))
As a last step, filter the data frame to remove cases in which a politician was retweeting themselves. How many tweets did you have in the dataset before and after this filter?
nrow(tweetsdf)
## [1] 19047
tweetsdf %>% filter(userName != RTuserName) -> tweetsdf
nrow(tweetsdf)
## [1] 18959
Your turn
Which user has the highest degree? Which one is retweeted the most? Do you notice any difference in the type of accounts on the top of these two metrics?
#Your code here