Difference between revisions of "Facebook and R"

From PeformIQ Upgrade
Jump to navigation Jump to search
Line 50: Line 50:
<pre>
<pre>
## convert Facebook date format to R date format
## convert Facebook date format to R date format
format.facebook.date <- function(datestring) {
format.facebook.date <- function(datestring) {
     date <- as.POSIXct(datestring, format = "%Y-%m-%dT%H:%M:%S+0000", tz = "GMT")
     date <- as.POSIXct(datestring, format = "%Y-%m-%dT%H:%M:%S+0000", tz = "GMT")
}
}
## aggregate metric counts over month
## aggregate metric counts over month
aggregate.metric <- function(metric) {
aggregate.metric <- function(metric) {
     m <- aggregate(page[[paste0(metric, "_count")]], list(month = page$month),  
     m <- aggregate(page[[paste0(metric, "_count")]], list(month = page$month),  
Line 61: Line 64:
     return(m)
     return(m)
}
}
# create data frame with average metric counts per month
# create data frame with average metric counts per month
page$datetime <- format.facebook.date(page$created_time)
page$datetime <- format.facebook.date(page$created_time)
page$month <- format(page$datetime, "%Y-%m")
page$month <- format(page$datetime, "%Y-%m")
df.list <- lapply(c("likes", "comments", "shares"), aggregate.metric)
df.list <- lapply(c("likes", "comments", "shares"), aggregate.metric)
df <- do.call(rbind, df.list)
df <- do.call(rbind, df.list)
# visualize evolution in metric
# visualize evolution in metric
library(ggplot2)
library(ggplot2)
library(scales)
library(scales)
ggplot(df, aes(x = month, y = x, group = metric)) + geom_line(aes(color = metric)) +  
ggplot(df, aes(x = month, y = x, group = metric)) + geom_line(aes(color = metric)) +  
     scale_x_date(breaks = "years", labels = date_format("%Y")) + scale_y_log10("Average count per post",  
     scale_x_date(breaks = "years", labels = date_format("%Y")) + scale_y_log10("Average count per post",  

Revision as of 15:28, 9 August 2016

Also see - Getting Started with R

Some of the R commands...

install.packages("Rfacebook")  # from CRAN
library(devtools)
install_github("Rfacebook", "pablobarbera", subdir = "Rfacebook")  # from GitHub
library(Rfacebook)
# token generated here: https://developers.facebook.com/tools/explorer 
token <- "XXXXXXXXXXXXXX"
me <- getUsers("xxxx", token, private_info = TRUE)
me$name # my name
my_friends <- getFriends(token, simplify = TRUE)
head(my_friends$id, n = 1) # get lowest user ID
my_friends_info <- getUsers(my_friends$id, token, private_info = TRUE)
table(my_friends_info$gender)  # gender
table(substr(my_friends_info$locale, 1, 2))  # language
table(substr(my_friends_info$locale, 4, 5))  # country
table(my_friends_info$relationship_status)["It's complicated"]  # relationship status
mat <- getNetwork(token, format = "adj.matrix")
dim(mat)

See - http://blog.revolutionanalytics.com/2013/11/how-to-analyze-you-facebook-friends-network-with-r.html

posts <- searchFacebook(string = "upworthy", token, n = 500, since = "20 january 2016 00:00", until = "10 august2016 10:00")
posts[which.max(posts$likes_count), ]
## convert Facebook date format to R date format

format.facebook.date <- function(datestring) {
    date <- as.POSIXct(datestring, format = "%Y-%m-%dT%H:%M:%S+0000", tz = "GMT")
}

## aggregate metric counts over month

aggregate.metric <- function(metric) {
    m <- aggregate(page[[paste0(metric, "_count")]], list(month = page$month), 
        mean)
    m$month <- as.Date(paste0(m$month, "-15"))
    m$metric <- metric
    return(m)
}

# create data frame with average metric counts per month

page$datetime <- format.facebook.date(page$created_time)
page$month <- format(page$datetime, "%Y-%m")

df.list <- lapply(c("likes", "comments", "shares"), aggregate.metric)

df <- do.call(rbind, df.list)

# visualize evolution in metric

library(ggplot2)
library(scales)

ggplot(df, aes(x = month, y = x, group = metric)) + geom_line(aes(color = metric)) + 
    scale_x_date(breaks = "years", labels = date_format("%Y")) + scale_y_log10("Average count per post", 
    breaks = c(10, 100, 1000, 10000, 50000)) + theme_bw() + theme(axis.title.x = element_blank())
post_id <- head(page$id, n = 1)  ## ID of most recent post
post <- getPost(post_id, token, n = 1000, likes = TRUE, comments = FALSE)

users <- getUsers(post$likes$from_id, token)

table(users$gender)  # gender
table(substr(users$locale, 4, 5))  # country