For this tutorial, download the file SwissPoliticians-tweets.json.gz (~21MB) and SwissPoliticians.csv.

You can find the RMD file for this tutorial here.

Today, we will go together through the introductory part of the fourth exercise, where we will analyze the network of Swiss politicians on Twitter. Here, we will do the basics of loading and processing Twitter data from a file with tweets in JSON.

1. Load the timeline data of Twitter user accounts of Swiss politicians

First load the packages we will use in this exercise: dplyr, tidygraph, jsonlite, and ggraph.

library(dplyr)
library(tidygraph)
library(jsonlite)
library(ggraph)

Download the file SwissPoliticians.csv and read it as a csv in R. Take into account that separators are tabs. Change the screen names of accounts to lower case and add a column with a sequential id from 1 to the number of politicians.

poldf <- read.csv("SwissPoliticians.csv",sep="\t",header=TRUE, stringsAsFactors=FALSE)
poldf$screenName <- tolower(poldf$screenName)
poldf$id <- seq(1, nrow(poldf))

Read the politician tweets file taking into account that it is compressed. Print a random line and its content read as JSON. Check Exercise 2 (SIT on Twitter) if you need an example of how to do this.

lines <- readLines(gzfile("SwissPoliticians-tweets.json.gz"))
line <- lines[sample(length(lines), 1)]
line
## [1] "{\"created_at\":\"Fri Sep 15 09:39:17 +0000 2017\",\"id\":908626265497850000,\"id_str\":\"908626265497849856\",\"full_text\":\"ist immer ein Erlebniss seinen Vortrag zu hören. Übrigens viel Glück bei den Wahlen. https://t.co/GKL8PU1dyU\",\"truncated\":false,\"display_text_range\":[0,84],\"entities\":{\"hashtags\":[],\"symbols\":[],\"user_mentions\":[],\"urls\":[{\"url\":\"https://t.co/GKL8PU1dyU\",\"expanded_url\":\"https://twitter.com/OrunP/status/908622041674387457\",\"display_url\":\"twitter.com/OrunP/status/9…\",\"indices\":[85,108]}],\"media\":{}},\"extended_entities\":{\"media\":{}},\"source\":\"<a href=\\\"http://twitter.com\\\" rel=\\\"nofollow\\\">Twitter Web Client</a>\",\"user\":{\"id\":428852601,\"id_str\":\"428852601\",\"name\":\"Michael Merkli\",\"screen_name\":\"MichaelMerkli\",\"location\":\"Wettingen Schweiz\",\"description\":\"Versicherungs-Fachmann mit eidg. Fachausweis,  Treuhänder mit Fachausweis und eidg. dipl. Finanzplanungs-Experte\",\"url\":\"http://t.co/Z9cY6aucLy\",\"entities\":{\"url\":{\"urls\":[{\"url\":\"http://t.co/Z9cY6aucLy\",\"expanded_url\":\"http://michaelmerkli.ch\",\"display_url\":\"michaelmerkli.ch\",\"indices\":[0,22]}]},\"description\":{\"urls\":[]}},\"protected\":false,\"followers_count\":109,\"friends_count\":106,\"listed_count\":9,\"created_at\":\"Mon Dec 05 07:56:01 +0000 2011\",\"favourites_count\":52,\"geo_enabled\":false,\"verified\":false,\"statuses_count\":470,\"contributors_enabled\":false,\"is_translator\":false,\"is_translation_enabled\":false,\"profile_background_color\":\"C0DEED\",\"profile_background_image_url\":\"http://abs.twimg.com/images/themes/theme1/bg.png\",\"profile_background_image_url_https\":\"https://abs.twimg.com/images/themes/theme1/bg.png\",\"profile_background_tile\":false,\"profile_image_url\":\"http://pbs.twimg.com/profile_images/794838274418372608/BNC6xlaU_normal.jpg\",\"profile_image_url_https\":\"https://pbs.twimg.com/profile_images/794838274418372608/BNC6xlaU_normal.jpg\",\"profile_banner_url\":\"https://pbs.twimg.com/profile_banners/428852601/1506321396\",\"profile_link_color\":\"1DA1F2\",\"profile_sidebar_border_color\":\"C0DEED\",\"profile_sidebar_fill_color\":\"DDEEF6\",\"profile_text_color\":\"333333\",\"profile_use_background_image\":true,\"has_extended_profile\":false,\"default_profile\":true,\"default_profile_image\":false,\"following\":false,\"follow_request_sent\":false,\"notifications\":false,\"translator_type\":\"none\",\"withheld_in_countries\":[]},\"retweeted_status\":{\"display_text_range\":{},\"entities\":{\"hashtags\":{},\"symbols\":{},\"user_mentions\":{},\"urls\":{},\"media\":{}},\"extended_entities\":{\"media\":{}},\"user\":{\"entities\":{\"description\":{\"urls\":{}},\"url\":{\"urls\":{}}},\"withheld_in_countries\":{}}},\"is_quote_status\":true,\"retweet_count\":0,\"favorite_count\":0,\"favorited\":false,\"retweeted\":false,\"possibly_sensitive\":false,\"lang\":\"de\",\"quoted_status_id\":908622041674387000,\"quoted_status_id_str\":\"908622041674387457\",\"quoted_status_permalink\":{\"url\":\"https://t.co/GKL8PU1dyU\",\"expanded\":\"https://twitter.com/OrunP/status/908622041674387457\",\"display\":\"twitter.com/OrunP/status/9…\"},\"quoted_status\":{\"display_text_range\":{},\"entities\":{\"hashtags\":{},\"symbols\":{},\"user_mentions\":{},\"urls\":{},\"media\":{}},\"user\":{\"entities\":{\"url\":{\"urls\":{}},\"description\":{\"urls\":{}}},\"withheld_in_countries\":{}},\"extended_entities\":{\"media\":{}},\"quoted_status_permalink\":{}}}"
fromJSON(line)
## $created_at
## [1] "Fri Sep 15 09:39:17 +0000 2017"
## 
## $id
## [1] 9.086263e+17
## 
## $id_str
## [1] "908626265497849856"
## 
## $full_text
## [1] "ist immer ein Erlebniss seinen Vortrag zu hören. Übrigens viel Glück bei den Wahlen. https://t.co/GKL8PU1dyU"
## 
## $truncated
## [1] FALSE
## 
## $display_text_range
## [1]  0 84
## 
## $entities
## $entities$hashtags
## list()
## 
## $entities$symbols
## list()
## 
## $entities$user_mentions
## list()
## 
## $entities$urls
##                       url                                        expanded_url
## 1 https://t.co/GKL8PU1dyU https://twitter.com/OrunP/status/908622041674387457
##                   display_url indices
## 1 twitter.com/OrunP/status/9… 85, 108
## 
## $entities$media
## named list()
## 
## 
## $extended_entities
## $extended_entities$media
## named list()
## 
## 
## $source
## [1] "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>"
## 
## $user
## $user$id
## [1] 428852601
## 
## $user$id_str
## [1] "428852601"
## 
## $user$name
## [1] "Michael Merkli"
## 
## $user$screen_name
## [1] "MichaelMerkli"
## 
## $user$location
## [1] "Wettingen Schweiz"
## 
## $user$description
## [1] "Versicherungs-Fachmann mit eidg. Fachausweis,  Treuhänder mit Fachausweis und eidg. dipl. Finanzplanungs-Experte"
## 
## $user$url
## [1] "http://t.co/Z9cY6aucLy"
## 
## $user$entities
## $user$entities$url
## $user$entities$url$urls
##                      url            expanded_url      display_url indices
## 1 http://t.co/Z9cY6aucLy http://michaelmerkli.ch michaelmerkli.ch   0, 22
## 
## 
## $user$entities$description
## $user$entities$description$urls
## list()
## 
## 
## 
## $user$protected
## [1] FALSE
## 
## $user$followers_count
## [1] 109
## 
## $user$friends_count
## [1] 106
## 
## $user$listed_count
## [1] 9
## 
## $user$created_at
## [1] "Mon Dec 05 07:56:01 +0000 2011"
## 
## $user$favourites_count
## [1] 52
## 
## $user$geo_enabled
## [1] FALSE
## 
## $user$verified
## [1] FALSE
## 
## $user$statuses_count
## [1] 470
## 
## $user$contributors_enabled
## [1] FALSE
## 
## $user$is_translator
## [1] FALSE
## 
## $user$is_translation_enabled
## [1] FALSE
## 
## $user$profile_background_color
## [1] "C0DEED"
## 
## $user$profile_background_image_url
## [1] "http://abs.twimg.com/images/themes/theme1/bg.png"
## 
## $user$profile_background_image_url_https
## [1] "https://abs.twimg.com/images/themes/theme1/bg.png"
## 
## $user$profile_background_tile
## [1] FALSE
## 
## $user$profile_image_url
## [1] "http://pbs.twimg.com/profile_images/794838274418372608/BNC6xlaU_normal.jpg"
## 
## $user$profile_image_url_https
## [1] "https://pbs.twimg.com/profile_images/794838274418372608/BNC6xlaU_normal.jpg"
## 
## $user$profile_banner_url
## [1] "https://pbs.twimg.com/profile_banners/428852601/1506321396"
## 
## $user$profile_link_color
## [1] "1DA1F2"
## 
## $user$profile_sidebar_border_color
## [1] "C0DEED"
## 
## $user$profile_sidebar_fill_color
## [1] "DDEEF6"
## 
## $user$profile_text_color
## [1] "333333"
## 
## $user$profile_use_background_image
## [1] TRUE
## 
## $user$has_extended_profile
## [1] FALSE
## 
## $user$default_profile
## [1] TRUE
## 
## $user$default_profile_image
## [1] FALSE
## 
## $user$following
## [1] FALSE
## 
## $user$follow_request_sent
## [1] FALSE
## 
## $user$notifications
## [1] FALSE
## 
## $user$translator_type
## [1] "none"
## 
## $user$withheld_in_countries
## list()
## 
## 
## $retweeted_status
## $retweeted_status$display_text_range
## named list()
## 
## $retweeted_status$entities
## $retweeted_status$entities$hashtags
## named list()
## 
## $retweeted_status$entities$symbols
## named list()
## 
## $retweeted_status$entities$user_mentions
## named list()
## 
## $retweeted_status$entities$urls
## named list()
## 
## $retweeted_status$entities$media
## named list()
## 
## 
## $retweeted_status$extended_entities
## $retweeted_status$extended_entities$media
## named list()
## 
## 
## $retweeted_status$user
## $retweeted_status$user$entities
## $retweeted_status$user$entities$description
## $retweeted_status$user$entities$description$urls
## named list()
## 
## 
## $retweeted_status$user$entities$url
## $retweeted_status$user$entities$url$urls
## named list()
## 
## 
## 
## $retweeted_status$user$withheld_in_countries
## named list()
## 
## 
## 
## $is_quote_status
## [1] TRUE
## 
## $retweet_count
## [1] 0
## 
## $favorite_count
## [1] 0
## 
## $favorited
## [1] FALSE
## 
## $retweeted
## [1] FALSE
## 
## $possibly_sensitive
## [1] FALSE
## 
## $lang
## [1] "de"
## 
## $quoted_status_id
## [1] 9.08622e+17
## 
## $quoted_status_id_str
## [1] "908622041674387457"
## 
## $quoted_status_permalink
## $quoted_status_permalink$url
## [1] "https://t.co/GKL8PU1dyU"
## 
## $quoted_status_permalink$expanded
## [1] "https://twitter.com/OrunP/status/908622041674387457"
## 
## $quoted_status_permalink$display
## [1] "twitter.com/OrunP/status/9…"
## 
## 
## $quoted_status
## $quoted_status$display_text_range
## named list()
## 
## $quoted_status$entities
## $quoted_status$entities$hashtags
## named list()
## 
## $quoted_status$entities$symbols
## named list()
## 
## $quoted_status$entities$user_mentions
## named list()
## 
## $quoted_status$entities$urls
## named list()
## 
## $quoted_status$entities$media
## named list()
## 
## 
## $quoted_status$user
## $quoted_status$user$entities
## $quoted_status$user$entities$url
## $quoted_status$user$entities$url$urls
## named list()
## 
## 
## $quoted_status$user$entities$description
## $quoted_status$user$entities$description$urls
## named list()
## 
## 
## 
## $quoted_status$user$withheld_in_countries
## named list()
## 
## 
## $quoted_status$extended_entities
## $quoted_status$extended_entities$media
## named list()
## 
## 
## $quoted_status$quoted_status_permalink
## named list()

Iterate over all the lines you read from the file, interpreting each one as a JSON object with the data of a tweet. For each tweet that is a retweet, save the screen name of the user who tweeted it and the screen name of the user who made the tweet being retweeted. Save these two in a data frame with two columns.

userName <- NULL
RTuserName <- NULL
for (line in lines)
{
  tweet <- fromJSON(line)
  if (!is.null(tweet$retweeted_status$id_str))
  {
    userName[length(userName)+1] <- tweet$user$screen_name
    RTuserName[length(RTuserName)+1] <- tweet$retweeted_status$user$screen_name
  } 
}
tweetsdf <- data.frame(userName = tolower(userName), RTuserName = tolower(RTuserName))

As a last step, filter the data frame to remove cases in which a politician was retweeting themselves. How many tweets did you have in the dataset before and after this filter?

nrow(tweetsdf)
## [1] 19047
tweetsdf %>% filter(userName != RTuserName) -> tweetsdf
nrow(tweetsdf)
## [1] 18959

Your turn

Which user has the highest degree? Which one is retweeted the most? Do you notice any difference in the type of accounts on the top of these two metrics?

#Your code here

2. Build social network of retweets

Using inner_join, merge the tweets data frame with the politicians data frame such that each row also contains the information of the politician who wrote the tweet. After this, each tweet should be in one row including also the id of the user that posted it and the political party they belong to.

mergedf1 <- inner_join(tweetsdf, poldf, by=c("userName"="screenName"))
names(mergedf1) <- c("userName", "RTuserName", "userParty", "userid")

Similarly as above, use inner_join to merge the result of the previous chunk with the politicians data frame, but now to match by the screen name of the politician being retweeted. After this, the resulting data frame should contain the id and the party of both the politician retweeting and being retweeted.

mergedf2 <- inner_join(mergedf1, poldf, by=c("RTuserName"="screenName"))
names(mergedf2) <- c("userName", "RTuserName", "userParty", "userid", "RTparty", "RTuserid")

Build the vertices and edges data frames for the network. The vertices data frame only needs to contain the id of each politician, their screen name (as a column called “name”), and the party they belong to. The edges dataframe needs the id of the user being retweeted as “from” and the id of the user retweeting as “to”. This way edges mark information flow. Use group_by to aggregate the multiple instances of these pairs such that the weight of edges is the number of times a user retweeted another.

poldf %>% select(id=id, name=screenName, party) -> vertices
mergedf2 %>% select(from=RTuserid, to=userid) %>% group_by(from, to) %>% summarize(weight=n()) -> edges
## `summarise()` has grouped output by 'from'. You can override using the
## `.groups` argument.

Now do the corresponding call to tbl_graph to build the graph as an undirected graph, using the column id of nodes as identifier (node_key).

graph <- tbl_graph(nodes=vertices, edges = edges, node_key = "id", directed = FALSE)
graph
## # A tbl_graph: 388 nodes and 739 edges
## #
## # An undirected multigraph with 143 components
## #
## # Node Data: 388 × 3 (active)
##       id name            party
##    <int> <chr>           <chr>
##  1     1 andreaskirstein AL   
##  2     2 bergerwthur     AL   
##  3     3 maenij          AL   
##  4     4 walterangst     AL   
##  5     5 asba_j          BDP  
##  6     6 beining_bdp     BDP  
##  7     7 bernhardguhl    BDP  
##  8     8 boschettisteen  BDP  
##  9     9 charlespiguet   BDP  
## 10    10 enea_martinelli BDP  
## # ℹ 378 more rows
## #
## # Edge Data: 739 × 3
##    from    to weight
##   <int> <int>  <int>
## 1     1     4      4
## 2     5    13      3
## 3     5    14      5
## # ℹ 736 more rows

Show a simple visualization of the graph with the FR layout algorithm. Does it look like a social network?

graph %>% 
  ggraph("fr") + geom_edge_link() + geom_node_point() + theme_graph()

Your turn

Rebuild the network above but instead of making links for retweets, do it for mentions of other users in tweets. Explore the JSON structure because a tweet can contain more than one mention. Visualize the network. Just from eyeballing the visualization, do you notice some differences? How can you turn them into a number?

# Your code here