library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
library(ggplot2)
data_csv <- read.csv("Facebook Friends.csv", header = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
names(data_csv)
## [1] "X.Facebook.Social.Network.Database..n...715.observations..m...21.variables.."
## [2] "X"
## [3] "X.1"
## [4] "X.2"
## [5] "X.3"
## [6] "X.4"
## [7] "X.5"
## [8] "X.6"
## [9] "X.7"
## [10] "X.8"
## [11] "X.9"
## [12] "X.10"
## [13] "X.11"
## [14] "X.12"
## [15] "X.13"
## [16] "X.14"
## [17] "X.15"
## [18] "X.16"
## [19] "X.17"
## [20] "X.18"
## [21] "X.19"
## [22] "X.20"
str(data_csv)
## 'data.frame': 716 obs. of 22 variables:
## $ X.Facebook.Social.Network.Database..n...715.observations..m...21.variables..: chr "Friend " "1" "2" "3" ...
## $ X : chr "Age" "57" "42" "42" ...
## $ X.1 : chr "Photos" "7" "531" "1396" ...
## $ X.2 : chr "# of Tags" "27" "241" "423" ...
## $ X.3 : chr "Albums" "5" "19" "40" ...
## $ X.4 : chr "Gender " "1" "0" "0" ...
## $ X.5 : chr "Emp" "1" "0" "1" ...
## $ X.6 : chr "Profile" "1" "0" "1" ...
## $ X.7 : chr "Cover" "0" "0" "0" ...
## $ X.8 : chr "Orientation" "1" "1" "1" ...
## $ X.9 : chr "Relationship" "1" "1" "1" ...
## $ X.10 : chr "Posts" "1" "5" "186" ...
## $ X.11 : chr "Replies" "0" "1" "31" ...
## $ X.12 : chr "MidWest" "0" "0" "1" ...
## $ X.13 : chr "Seast" "0" "0" "0" ...
## $ X.14 : chr "West" "1" "1" "0" ...
## $ X.15 : chr "Children" "3" "3" "2" ...
## $ X.16 : chr "Likes" "21" "9" "43" ...
## $ X.17 : chr "Edu" "0" "0" "1" ...
## $ X.18 : chr "Events" "0" "1" "1" ...
## $ X.19 : chr "USA" "1" "1" "1" ...
## $ X.20 : chr "Friends" "68" "234" "727" ...
View(data_csv)
summary(data_csv)
## X.Facebook.Social.Network.Database..n...715.observations..m...21.variables..
## Length:716
## Class :character
## Mode :character
## X X.1 X.2 X.3
## Length:716 Length:716 Length:716 Length:716
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X.4 X.5 X.6 X.7
## Length:716 Length:716 Length:716 Length:716
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X.8 X.9 X.10 X.11
## Length:716 Length:716 Length:716 Length:716
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X.12 X.13 X.14 X.15
## Length:716 Length:716 Length:716 Length:716
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X.16 X.17 X.18 X.19
## Length:716 Length:716 Length:716 Length:716
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## X.20
## Length:716
## Class :character
## Mode :character
head(data_csv)
## X.Facebook.Social.Network.Database..n...715.observations..m...21.variables..
## 1 Friend
## 2 1
## 3 2
## 4 3
## 5 4
## 6 5
## X X.1 X.2 X.3 X.4 X.5 X.6 X.7 X.8
## 1 Age Photos # of Tags Albums Gender Emp Profile Cover Orientation
## 2 57 7 27 5 1 1 1 0 1
## 3 42 531 241 19 0 0 0 0 1
## 4 42 1396 423 40 0 1 1 0 1
## 5 55 394 139 48 0 1 1 1 1
## 6 30 916 231 78 1 1 1 1 1
## X.9 X.10 X.11 X.12 X.13 X.14 X.15 X.16 X.17 X.18 X.19
## 1 Relationship Posts Replies MidWest Seast West Children Likes Edu Events USA
## 2 1 1 0 0 0 1 3 21 0 0 1
## 3 1 5 1 0 0 1 3 9 0 1 1
## 4 1 186 31 1 0 0 2 43 1 1 1
## 5 1 6 4 1 0 0 2 187 0 4 1
## 6 1 412 752 1 0 0 1 34 0 2 1
## X.20
## 1 Friends
## 2 68
## 3 234
## 4 727
## 5 437
## 6 372
tail(data_csv)
## X.Facebook.Social.Network.Database..n...715.observations..m...21.variables..
## 711 710
## 712 711
## 713 712
## 714 713
## 715 714
## 716 715
## X X.1 X.2 X.3 X.4 X.5 X.6 X.7 X.8 X.9 X.10 X.11 X.12 X.13 X.14 X.15 X.16
## 711 25 111 8 2 0 1 1 1 1 0 21 27 0 0 0 0 21
## 712 25 499 4 0 0 1 1 1 1 1 16 19 0 0 1 0 49
## 713 22 232 101 4 1 0 1 0 1 0 698 552 1 0 0 0 223
## 714 22 327 329 29 1 1 1 0 1 1 543 399 1 0 0 0 520
## 715 24 219 0 8 1 1 1 1 1 0 2 7 1 0 0 1 66
## 716 19 465 0 22 0 1 1 0 1 1 3 6 1 0 0 0 990
## X.17 X.18 X.19 X.20
## 711 0 3 0 698
## 712 0 11 1 "1,689"
## 713 0 4 1 327
## 714 0 6 1 706
## 715 1 1 1 500
## 716 1 3 1 585
subset1 <- dplyr::select(data_csv, X.4, X.10, X.20)
head(subset1)
## X.4 X.10 X.20
## 1 Gender Posts Friends
## 2 1 1 68
## 3 0 5 234
## 4 0 186 727
## 5 0 6 437
## 6 1 412 372
data_txt <- read.delim("Facebook Friends.txt", sep = "\t", header = TRUE, fileEncoding = "UTF-16LE")
View(data_txt)
data_xlsx <- readxl::read_excel("Facebook Friends.xlsx", sheet = 1)
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
head(data_xlsx, 10)
## # A tibble: 10 × 22
## Facebook Social Netwo…¹ ...2 ...3 ...4 ...5 ...6 ...7 ...8 ...9 ...10
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 2 Friend Age Phot… # of… Albu… Gend… Emp Prof… Cover Orie…
## 3 1 57 7 27 5 1 1 1 0 1
## 4 2 42 531 241 19 0 0 0 0 1
## 5 3 42 1396 423 40 0 1 1 0 1
## 6 4 55 394 139 48 0 1 1 1 1
## 7 5 30 916 231 78 1 1 1 1 1
## 8 6 25 241 345 6 1 1 1 1 1
## 9 7 28 138 206 15 1 1 0 1 1
## 10 8 32 167 581 10 1 1 1 0 1
## # ℹ abbreviated name:
## # ¹​`Facebook Social Network Database (n = 715 observations, m = 21 variables)`
## # ℹ 12 more variables: ...11 <chr>, ...12 <chr>, ...13 <chr>, ...14 <chr>,
## # ...15 <chr>, ...16 <chr>, ...17 <chr>, ...18 <chr>, ...19 <chr>,
## # ...20 <chr>, ...21 <chr>, ...22 <chr>
View(data_xlsx)
subset_data5 <- subset(data_csv, X %in% 1:40)
subset_data5$X.10 <- as.numeric(subset_data5$X.10)
p5 <- ggplot(subset_data5, aes(x = "", y = X.10, fill = factor(X))) +
geom_bar(stat = "identity") +
labs(title = "Posts By Age 13 to 40", x = NULL, y = NULL) +
scale_fill_discrete(name = "Age") +
coord_polar("y", start = 0) +
theme_void()
print(p5)

subset_data7 <- subset(data_csv, X %in% 1:40)
subset_data7$X.10 <- as.numeric(subset_data7$X.10)
p7 <- ggplot(subset_data7, aes(x = X.10, fill = factor(X))) +
geom_histogram(binwidth = 10, color = "black", alpha = 0.8) +
labs(title = "Posts By Age 13 to 40", x = "Posts", y = "Frequency") +
scale_fill_discrete(name = "Age") +
theme_minimal()
print(p7)

p <- ggplot(data_csv, aes(x = X, y = X.1)) +
geom_bar(stat = "identity", color = 'gold') +
labs(title = "Photos By Age", x = "Age", y = "Photos")
print(p)

subset_data <- subset(data_csv, X %in% 1:20)
p4 <- ggplot(subset_data, aes(x = X, y = X.1)) +
geom_bar(stat = "identity") +
labs(title = "Photos By Age 13-20", x = "Age", y = "Photos")
print(p4)

subset_data2 <- subset(data_csv, X %in% 1:30)
p2 <- ggplot(subset_data2, aes(x = X, y = X.1)) +
geom_bar(stat = "identity", color = "blue") +
labs(title = "Photos By Age (1-30)", x = "Age", y = "Photos")
print(p2)

subset_data3 <- subset(data_csv, X %in% 1:40)
subset_data3$X.10 <- as.numeric(subset_data3$X.10)
p3 <- ggplot(subset_data3, aes(x = X, y = X.10)) +
geom_bar(stat = "identity", color = "red") +
labs(title = "Posts By Age 13 to 40", x = "Age", y = "Posts") +
scale_y_continuous(limits = c(0, max(subset_data3$X.10) * 1.1))
print(p3)
## Warning: Removed 375 rows containing missing values (`geom_bar()`).
