Plot on statistics of data in r - r

I have a list of lists, where each list corresponds to a certain year, starting from 2005. I want a plot that looks like the one bellow where the distribution is shown and on the x-axis, it's the years (number of lists). In addition, it's showing the mean, 1sd and the histogram. I assume it would be done using ggplot but I wasn't able to figure it out. Note that the picture was taken from a simple google search and my data distribution is not going to look like this. This picture is only for concept reference.
Here's a subset of my list, with 3 lists within, for years 2005-2007.
dput(eig_inter_list[c(1,2,3)])
list(structure(c(0.314642768055768, 0.325549276750512, 0.32407558151974,
0.503037906331374, 0.177702652110427, 0.649874624573978, 0.22304809425227,
0.19744998358554, 0.322951763103893, 0.417206897701431, 0.378171649699382,
0.25116873932351, 0.485979263395877, 0.411851682123569, 1, 0.478943307151144,
0.710983939873711, 0.205890770051514, 0.335737092407451, 0.495130707288105,
0.395609776015166, 0.367388748844004, 0.236987996918897, 0.17727518450146,
0.542503233906977, 0.22322071021728, 0.305512801458551, 0.703463566780157,
0.0565731620746889, 0.281865665377428, 0.34675705775566, 0.240102820697777,
0.847955796670962, 0.467145199252136, 0.408110899113892, 0.121168347150886,
0.166279979571487, 0.225178903127632, 0.836003339903779, 0.298624839828843,
0.156832369183448, 0.331724029229045, 0.28607359410209, 0.438251331553188,
0.531970745173967, 0.473559658632999, 0.339927590064139, 0.329554185654697,
0.23010077174135, 0.444399306655095, 0.265064970418036, 0.54141477127972,
0.181034728719507, 0.217971681487819, 0.702604336646787, 0.260277784525433,
0.9313793258404), .Dim = c(57L, 1L)), structure(c(0.758053727134031,
0.389169917077854, 0.553155900977713, 0.680957693695371, 0.688478238016961,
0.5406614343729, 0.654085903776063, 0.249245017029272, 0.527924916117291,
0.607686315557293, 0.328402935290058, 0.472653172631753, 0.503447923078263,
0.540956133924371, 0.367957509649887, 0.537869789262281, 0.177185397396901,
0.49323257867289, 0.380042131111929, 0.76085241271694, 0.540174821432495,
0.283063101888133, 0.319997654939547, 0.651366853994039, 0.706918785162937,
0.578280317866076, 0.871853266130338, 0.874076858150783, 1, 0.611669207162514,
0.28036164071929, 0.292694661837052, 0.610126478002428, 0.380386438921713,
0.669982812090512, 0.122065848180652, 0.974400986291088, 0.945135694392546,
0.462456530300744, 0.134501132230783, 0.322784038090134, 0.317780702999941,
0.662847059747895, 0.523285813371882, 0.668825630786058, 0.79637143229052,
0.379229953581731, 0.282559418794954, 0.745322058370275, 0.697765403065504,
0.282171232377215, 0.672824204974444, 0.812732702343435, 0.747236048953355,
0.810360825374334, 0.147736041262474, 0.638551018944304), .Dim = c(57L,
1L)), structure(c(0.580650487050986, 0.226493031789664, 0.216153469107205,
0.679582322890881, 0.585765696317977, 0.277602052620714, 0.301697850942933,
0.539494048925467, 0.22052504674914, 0.279450501649262, 0.292210734005604,
0.406706750636561, 0.253178327897084, 0.26902431345799, 0.72080329402394,
0.538483400997092, 0.132513153071638, 0.169267787671607, 0.355070307460915,
0.420987061713029, 0.355905556319682, 0.423354094728718, 0.225064747308464,
0.368813823547299, 0.290838727568061, 0.191959913771819, 0.412085260195494,
0.220140351021593, 0.251950925564949, 0.360732509927935, 0.130353223587661,
0.355856853738614, 0.399558273021718, 0.351244298106977, 0.280432050072677,
0.51121978973842, 0.300682631494935, 0.39923136729231, 0.217749058297645,
0.347719424129099, 0.28916475155952, 0.531747942212439, 0.384583756749702,
0.19911455191363, 0.574044809883757, 0.206919558647874, 1, 0.327370421244658,
0.561049901898945, 0.177575568867026, 0.311627824832627, 0.214432911950344,
0.405893310593152, 0.328365875498285, 0.403268555194588, 0.21904514354561,
0.243848932866513), .Dim = c(57L, 1L)))

Update:
Adding #Dan Adams suggestion (see comments):
map_df(df_list, ~as.data.frame(.x), .id="id") %>%
ggplot(aes(x=id, y=V1, fill=id)) +
geom_violindot(stackratio = 0.7) +
stat_summary()+
theme_modern()
First answer:
What you need is a Half-violin Half-dot plot: https://easystats.github.io/see/reference/geom_violindot.html
You can use see package with geom_violindot() function after preparing your list of lists, like here:
library(ggplot2)
library(purrr)
library(see)
# ad names to your lists in your list called df_list
names(df_list) <- c("df1", "df2", "df3")
# add a new column to each list with the list name and then use ggplot to plot
map_df(df_list, ~as.data.frame(.x), .id="id") %>%
ggplot(aes(x=id, y=V1, fill=id)) +
geom_violindot() +
theme_modern()

Related

ggplot2 - Barchart ot Histogram in R - plotting more than one variable

So sorry I'm quite new to R and have been trying to do this by myself but have been struggling.
I'm trying to do some sort of barplot or histogram of the tag 'Amateur' over the years 2007 to 2013 to show how it's changed over time.
The data set was downloaded from: https://sexualitics.github.io/ specifically looking at the hamster.csv
Here is some of the initial preprocessing of the data below.
head(xhamster) # Need to change upload_date into a date column, then add new column containing year
xhamster$upload_date<-as.Date(xhamster$upload_date,format="%d/%m/%Y")
xhamster$Year<-year(ymd(xhamster$upload_date)) #Adds new column containing just the year
xhamster$Year<-as.integer(xhamster$Year) # Changing new Year variable into an interger
head(xhamster) # Check changes made correctly
The filter for the years:
Yr2007<-xhamster%>%
filter_at(vars(Year),any_vars(.%in%c("2007")))
Yr2008<-xhamster%>%
filter_at(vars(Year),any_vars(.%in%c("2008")))
Yr2009<-xhamster%>%
filter_at(vars(Year),any_vars(.%in%c("2009")))
Yr2010<-xhamster%>%
filter_at(vars(Year),any_vars(.%in%c("2010")))
Yr2011<-xhamster%>%
filter_at(vars(Year),any_vars(.%in%c("2011")))
Yr2012<-xhamster%>%
filter_at(vars(Year),any_vars(.%in%c("2012")))
Yr2013<-xhamster%>%
filter_at(vars(Year),any_vars(.%in%c("2013")))
For example, I want to create a plot for the tag 'Amateur' in the data. Here is some of the code I have already done:
Amateur<-grep("Amateur",xhamster$channels)
Amateur_2007<-grep("Amateur", Yr2007$channels)
Amateur_2008<-grep("Amateur", Yr2008$channels)
Amateur_2009<-grep("Amateur", Yr2009$channels)
Amateur_2010<-grep("Amateur", Yr2010$channels)
Amateur_2011<-grep("Amateur", Yr2011$channels)
Amateur_2012<-grep("Amateur", Yr2012$channels)
Amateur_2013<-grep("Amateur", Yr2013$channels)
Amateur_2007 <- length(Amateur_2007)
Amateur_2008 <- length(Amateur_2008)
Amateur_2009 <- length(Amateur_2009)
Amateur_2010 <- length(Amateur_2010)
Amateur_2011 <- length(Amateur_2011)
Amateur_2012 <- length(Amateur_2012)
Amateur_2013 <- length(Amateur_2013)
Plot:
Amateur <- cbind(Amateur_2007, Amateur_2008, Amateur_2009,Amateur_2010, Amateur_2011, Amateur_2012, Amateur_2013)
barplot((Amateur),beside=TRUE,col = c("red","orange"),ylim=c(0,90000))
title(main="Usage of 'Amateur' as a tag from 2007 to 2013")
title(xlab="Amateur")
title(ylab="Frequency")
Plot showing amateur tag over the years
However this isn't exactly a great plot. I'm looking for a way to plot using ggplot ideally and to have the names of each bar to be the year rather than 'Amateur_2010' etc. How do I do this?
An even better bonus if I can add 'nb_views' for each year with this tag usage or something like that.
There are lots of ways to approach this, here is how I would tackle it:
library(tidyverse)
library(lubridate)
library(vroom)
xhamster <- vroom("xhamster.csv")
xhamster$upload_date<-as.Date(xhamster$upload_date,format="%d/%m/%Y")
xhamster$Year <- year(ymd(xhamster$upload_date))
xhamster %>%
filter(Year %in% 2007:2013) %>%
filter(grepl("Amateur", channels)) %>%
ggplot(aes(x = Year, y = ..count..)) +
geom_bar() +
scale_x_continuous(breaks = c(2007:2013),
labels = c(2007:2013)) +
ylab(label = "Count") +
xlab(label = "Amateur") +
labs(title = "Usage of 'Amateur' as a tag from 2007 to 2013",
caption = "Data obtained from https://sexualitics.github.io/ under a CC BY-NC-SA 3.0 license") +
theme_minimal(base_size = 14)
As Jared said, there are lots of ways, but I want to solve it with your way, so that you can internalize the solution better.
I just changed your cbind in the plot:
Amateur <- cbind("2007" = Amateur_2007,"2008" = Amateur_2008,"2009" = Amateur_2009, "2010" =Amateur_2010, "2011" = Amateur_2011, "2012" = Amateur_2012, "2013" = Amateur_2013)
As you can see, you can give names to your columns into cbind function like that :)

How can I combine 93 reps into 3 groups according to value?

I have a 24 data sets of 93 observations each. There are only two variables, a factor (size) and it's response (percent). The factor value ranges from 0-2000. I would like to combine these observations into three groups based on factor values (0-2, 2-50, and 50-2000) and see the total combined response value for each. I have tried using the group_by and summarize functions, but I am fairly new with r and I am in over my head.
In addition, is it possible to automate this so that one string of code can do this for all 24 of my data sets? They are saved as different text files in the same folder. I don't know the limitations of r, so this might not be possible. If necessary, one code that I could run 24 times would still get the job done.
Here's an example of one of the data sets -
>dput(head(data))
structure(list(run.size.percent = structure(c(2L, 13L, 24L, 35L,
46L, 57L), .Label = c(",2000,", "1,0.375,0.013", "10,0.868,0.11",
"11,0.953,0.12", "12,1.047,0.12", "13,1.149,0.13", "14,1.261,0.14",
"15,1.385,0.14", "16,1.520,0.15", "17,1.668,0.15", "18,1.832,0.16",
"19,2.011,0.17", "2,0.412,0.023", "20,2.207,0.17", "21,2.423,0.18",
"22,2.660,0.19", "23,2.920,0.20", "24,3.205,0.21", "25,3.519,0.22",
"26,3.863,0.24", "27,4.240,0.25", "28,4.655,0.26", "29,5.110,0.28",
"3,0.452,0.034", "30,5.610,0.30", "31,6.158,0.31", "32,6.760,0.33",
"33,7.421,0.35", "34,8.147,0.37", "35,8.943,0.39", "36,9.817,0.42",
"37,10.78,0.45", "38,11.83,0.47", "39,12.99,0.50", "4,0.496,0.049",
"40,14.26,0.53", "41,15.65,0.56", "42,17.18,0.58", "43,18.86,0.59",
"44,20.70,0.59", "45,22.73,0.58", "46,24.95,0.55", "47,27.39,0.52",
"48,30.07,0.49", "49,33.01,0.46", "5,0.545,0.061", "50,36.24,0.45",
"51,39.78,0.45", "52,43.67,0.45", "53,47.94,0.44", "54,52.62,0.42",
"55,57.77,0.38", "56,63.41,0.35", "57,69.61,0.32", "58,76.42,0.31",
"59,83.89,0.33", "6,0.598,0.072", "60,92.09,0.36", "61,101.1,0.42",
"62,111.0,0.49", "63,121.8,0.59", "64,133.7,0.74", "65,146.8,0.94",
"66,161.2,1.19", "67,176.9,1.49", "68,194.2,1.82", "69,213.2,2.18",
"7,0.656,0.083", "70,234.1,2.55", "71,256.9,2.94", "72,282.1,3.34",
"73,309.6,3.78", "74,339.9,4.25", "75,373.1,4.73", "76,409.6,5.20",
"77,449.7,5.60", "78,493.6,5.87", "79,541.9,5.93", "8,0.721,0.093",
"80,594.9,5.77", "81,653.0,5.37", "82,716.8,4.77", "83,786.9,4.03",
"84,863.9,3.21", "85,948.3,2.36", "86,1041,1.55", "87,1143,0.81",
"88,1255,0.30", "89,1377,0.056", "9,0.791,0.10", "90,1512,0.0044",
"91,1660,0", "92,1822,0"), class = "factor")), row.names = c(NA,
6L), class = "data.frame")
Thanks very much for any help! Please let me know if there is anything I need to clarify.
1) Summary Table
You were on the right track with the group_by/summarise idea! There are just a few steps to do first.
# load tidyverse packages
library(tidyverse)
# load dataset
# (I did this differently than you did in the question - I'm not familiar with the structure function)
data <- tibble(x= c("0,2000,0", "1,0.375,0.013", "10,0.868,0.11",
"11,0.953,0.12", "12,1.047,0.12", "13,1.149,0.13", "14,1.261,0.14",
"15,1.385,0.14", "16,1.520,0.15", "17,1.668,0.15", "18,1.832,0.16",
"19,2.011,0.17", "2,0.412,0.023", "20,2.207,0.17", "21,2.423,0.18",
"22,2.660,0.19", "23,2.920,0.20", "24,3.205,0.21", "25,3.519,0.22",
"26,3.863,0.24", "27,4.240,0.25", "28,4.655,0.26", "29,5.110,0.28",
"3,0.452,0.034", "30,5.610,0.30", "31,6.158,0.31", "32,6.760,0.33",
"33,7.421,0.35", "34,8.147,0.37", "35,8.943,0.39", "36,9.817,0.42",
"37,10.78,0.45", "38,11.83,0.47", "39,12.99,0.50", "4,0.496,0.049",
"40,14.26,0.53", "41,15.65,0.56", "42,17.18,0.58", "43,18.86,0.59",
"44,20.70,0.59", "45,22.73,0.58", "46,24.95,0.55", "47,27.39,0.52",
"48,30.07,0.49", "49,33.01,0.46", "5,0.545,0.061", "50,36.24,0.45",
"51,39.78,0.45", "52,43.67,0.45", "53,47.94,0.44", "54,52.62,0.42",
"55,57.77,0.38", "56,63.41,0.35", "57,69.61,0.32", "58,76.42,0.31",
"59,83.89,0.33", "6,0.598,0.072", "60,92.09,0.36", "61,101.1,0.42",
"62,111.0,0.49", "63,121.8,0.59", "64,133.7,0.74", "65,146.8,0.94",
"66,161.2,1.19", "67,176.9,1.49", "68,194.2,1.82", "69,213.2,2.18",
"7,0.656,0.083", "70,234.1,2.55", "71,256.9,2.94", "72,282.1,3.34",
"73,309.6,3.78", "74,339.9,4.25", "75,373.1,4.73", "76,409.6,5.20",
"77,449.7,5.60", "78,493.6,5.87", "79,541.9,5.93", "8,0.721,0.093",
"80,594.9,5.77", "81,653.0,5.37", "82,716.8,4.77", "83,786.9,4.03",
"84,863.9,3.21", "85,948.3,2.36", "86,1041,1.55", "87,1143,0.81",
"88,1255,0.30", "89,1377,0.056", "9,0.791,0.10", "90,1512,0.0044",
"91,1660,0", "92,1822,0")) %>%
# separate into three fields
separate(x,
into = c("run", "size", "percent"),
sep = ",") %>%
# only keep useful fields - size and percent
select(size, percent) %>%
# change field types to numeric
mutate_all(as.numeric)
# group by size: categories [0,2), [2,5), [5,2000]
data_summary <- data %>%
mutate(size_bin = cut(size,
breaks = c(0,2,5,2000),
include.lowest = TRUE,
right = FALSE)) %>%
group_by(size_bin) %>%
summarise(percent_sum = sum(percent))
# take a look at the result
data_summary
2) Repeat process over several files
Yes, you can definitely set this up to run over many files in a folder!
Do you want all of the files to feed into one dataset? If so, here's the code you'd use:
data_all <- list.files("folder_name/") %>%
map_df(~read_csv(path = paste0("folder_name/", .)) %>%
# only keep useful fields - size and percent
select(size, percent) %>%
# change field types to numeric
mutate_all(as.numeric) %>%
# group by size: categories [0,2), [2,5), [5,2000]
mutate(size_bin = cut(size,
breaks = c(0,2,5,2000),
include.lowest = TRUE,
right = FALSE)) %>%
group_by(size_bin) %>%
summarise(percent_sum = sum(percent))
)
If you want to keep the datasets separate, the code would be different. (I'm not sure how to code that right now, but I'll look into it if that something you're interested in!)

Multiple Histograms On 1 page (without making long data)

I want to make a histogram for each column. Each Column has three values (Phase_1_Mean, Phase_2_Mean and Phase_3_Mean)
The output should be:
12 histograms (because we have 12 rows), and per histogram the 3 values showed in a bar (Y axis = value, X axis = Phase_1_Mean, Phase_2_Mean and Phase_3_Mean).
Stuck: When I search the internet, almost everyone is making a "long" data frame. That is not helpful with this example (because than we will generate a value "value". But I want to keep the three "rows" separated.
At the bottom you can find my data. Appreciated!
I tried this (How do I generate a histogram for each column of my table?), but here is the "long table" problem, after that I tried Multiple Plots on 1 page in R, that solved how we can plot multiple graphs on 1 page.
dput(Plots1)
structure(list(`0-0.5` = c(26.952381, 5.455598, 28.32947), `0.5-1` =
c(29.798635,
25.972696, 32.87372), `1-1.5` = c(32.922764, 41.95935, 41.73577
), `1.5-2` = c(31.844156, 69.883117, 52.25974), `2-2.5` = c(52.931034,
128.672414, 55.65517), `2.5-3` = c(40.7, 110.1, 63.1), `3-3.5` =
c(73.466667,
199.533333, 70.93333), `3.5-4` = c(38.428571, 258.571429, 95),
`4-4.5` = c(47.6, 166.5, 233.4), `4.5- 5` = c(60.846154,
371.730769, 74.61538), `5-5.5` = c(7.333333, 499.833333,
51), `5.5-6` = c(51.6, 325.4, 82.4), `6-6.5` = c(69, 411.5,
134)), class = "data.frame", .Names = c("0-0.5", "0.5-1",
"1-1.5", "1.5-2", "2-2.5", "2.5-3", "3-3.5", "3.5-4", "4-4.5",
"4.5- 5", "5-5.5", "5.5-6", "6-6.5"), row.names = c("Phase_1_Mean",
"Phase_2_Mean", "Phase_3_Mean"))
Something which is showed in this example (which didn't worked for me, because it is Python) https://www.google.com/search?rlz=1C1GCEA_enNL765NL765&biw=1366&bih=626&tbm=isch&sa=1&ei=Yqc8XOjMLZDUwQLp9KuYCA&q=multiple+histograms+r&oq=multiple+histograms+r&gs_l=img.3..0i19.4028.7585..7742...1.0..1.412.3355.0j19j1j0j1......0....1..gws-wiz-img.......0j0i67j0i30j0i5i30i19j0i8i30i19j0i5i30j0i8i30j0i30i19.j-1kDXNKZhI#imgrc=L0Lvbn1rplYaEM:
I think you have to reshape to long to make this work, but I don't see why this is a problem. I think this code achieves what you want. Note that there are 13 plots because you have 13 (not 12) columns in the dataframe you posted.
# Load libraries
library(reshape2)
library(ggplot2)
Plots1$ID <- rownames(Plots1) # Add an ID variable
Plots2 <- melt(Plots1) # melt to long format
ggplot(Plots2, aes(y = value, x = ID)) + geom_bar(stat = "identity") + facet_wrap(~variable)
Below is the resulting plot. I've kept it basic, but of course you can make it pretty by adding further layers.

Using character values as axis scale

I am working on graphing weekly order volumes in ggplot. I only get daily data and aggregate it like this:
subRC$week <- ISOweek(ymd(subRC$L01.Order.Date))
aggRC <- aggregate(subRC$Cases.Sold, by= list(subRC$week), sum)
L01.Order.Date is in MM-DD-YYYY format.
This works perfectly for me and my data looks like this (generated random values here but that doesn't change anything):
Group.1 x
2016-W01 15444
2016-W02 134900
2016-W03 2639
2016-W04 13055
2016-W05 18012
2016-W06 138764
2016-W07 73204
2016-W08 111646
2016-W09 33872
2016-W10 35456
2016-W11 106070
2016-W12 37843
2016-W13 66861
2016-W14 46273
2016-W15 19049
2016-W16 62065
2016-W17 52882
2016-W18 67134
2016-W19 60766
2016-W20 89763
2016-W21 80680
2016-W22 101619
2016-W23 120757
2016-W24 91560
2016-W25 123721
2016-W26 23647
2016-W27 67131
2016-W28 44775
2016-W29 123575
2016-W30 130845
2016-W31 114168
2016-W32 84923
2016-W33 123661
2016-W34 66065
2016-W35 80636
2016-W36 146880
2016-W37 50515
2016-W38 75468
2016-W39 145391
2016-W40 5586
2016-W41 16922
2016-W42 67943
2016-W43 140725
2016-W44 96454
2016-W45 47013
2016-W46 47276
2016-W47 54241
2016-W48 46889
2016-W49 116884
2016-W50 70194
2016-W51 141270
2016-W52 60754
2017-W01 5214
2017-W02 65803
2017-W03 48864
2017-W04 41300
2017-W05 65923
2017-W06 67856
2017-W07 104272
2017-W08 138575
2017-W09 97664
2017-W10 62303
2017-W11 78402
2017-W12 78170
2017-W13 27001
2017-W14 38086
2017-W15 87113
Now, I want to generate a plot where I want the axis scales to show the first, middle and end date. For the example here it would be: 2016-W01,2016-W34 and 2017-W15. The dates change during my analysis, so I want it to adjust to the dataset I am using. Here is what I have tried so far:
ggplot() +
geom_line(data = aggRC, aes(Group.1, x, group=1, color = "Retail Chubs"))+
scale_x_discrete(labels = c(min(aggRC$Group.1), median(aggRC$Group.1),
max(aggRC$Group.1)))
But that doesnt even do anything, I guess because Group.1 are characters and not numeric.
I have also tried to use scale_x_date, but that doesn't work since my date values are characters.
Maybe I need to use a different way to aggregate the orders? Let me know if you need me to add anything to my question. Would love any input on this and already appreciate your help. Already looked at similar questions but nothing really helped me with this.
Even with characters, you can select by position
This is a common problem and see some questions with many upvotes:
Select first and last row from grouped data and Filter the middle row of each group
Now combine both into one selection:
require(ggplot2)
require(dplyr)
p_dat <- example_df %>% #have renamed your dataframe and pre-filtered
arrange(Group.1) %>%
filter(row_number() %in% c(1, ceiling(n()/2), n()))
p_dat
Group.1 x
1 2016-W01 15444
2 2016-W34 66065
3 2017-W15 87113
ggplot() +
geom_point(data = p_dat, aes(x = Group.1, y = x ))
However, I would probably still advice to keep dates as dates and aggregate in a different manner, because this might make you more flexible for other questions
Of course, you should be able to filter by group with this approach too.

transform "mFilter" object (list of Time-Series) to plot with ggplot2

I'm working with the hpfilter from the mFilter package and I can't seem to find a simple way to convert the list of Time-Series objects by hpfilter to a format I can use with ggplot2. I realize I can take it all apart and put it back together, but I imagine there's some simple way I have overlooked? I tried the code suggested in the SO discussion R list to data frame. However I couldn't find a way to convert the list of Time-Series objects to a data.frame in any simple way. The final goal is to reproduce the default plot produced by the mFilter object (see below)
Here's some example code
# install.packages(c("mFilter"), dependencies = TRUE)
library(mFilter)
data(unemp)
unemp.hp <- hpfilter(unemp, type=c("lambda"), freq = 1606)
# str(unemp.hp)
class(unemp.hp)
# [1] "mFilter"
plot(unemp.hp)
Hit <Return> to see next plot:
Also, why am I asked to " Hit <Return>" to see the plot?
The plot function calls plot.mFilter which has parameter ask=interactive() and it is set as TRUE for interactive sessions,
you could disable this by ask=FALSE in call for plot
plot(unemp.hp,ask=FALSE)
Data:
library(mFilter)
library(ggplot2)
library(gridExtra)
# library(zoo)
data(unemp)
unemp.hp <- hpfilter(unemp, type=c("lambda"), freq = 1606)
# str(unemp.hp)
class(unemp.hp)
# [1] "mFilter"
plot(unemp.hp,ask=FALSE)
To check for slots of object unemp.hp
names(unemp.hp)
# [1] "cycle" "trend" "fmatrix" "title" "xname" "call" "type" "lambda" "method"
#[10] "x"
The relevant objects are x (the main unemp series) , trend and cycle. All three objects are of class ts, we first convert them to
data.frame using custom function and plot using ggplot and gridExtra (for grid.arrange)
objectList = list(unemp.hp$x,unemp.hp$trend,unemp.hp$cycle)
names(objectList) = c("unemp","trend","cycle")
sapply(objectList,class)
#unemp trend cycle
# "ts" "ts" "ts"
Conversion from ts to data.frame:
fn_ts_to_DF = function(x) {
DF = data.frame(date=zoo::as.Date(time(objectList[[x]])),tseries=as.matrix(objectList[[x]]))
colnames(DF)[2]=names(objectList)[x]
return(DF)
}
DFList=lapply(seq_along(objectList),fn_ts_to_DF)
names(DFList) = c("unemp","trend","cycle")
seriesTrend = merge(DFList$unemp,DFList$trend,by="date")
cycleSeries = DFList$cycle
Plots:
gSeries = ggplot(melt(seriesTrend,"date"),aes(x=date,y=value,color=variable)) + geom_line() +
ggtitle('Hodrick-Prescot Filter for unemp') +
theme(legend.title = element_blank(),legend.justification = c(0.1, 0.8), legend.position = c(0, 1),
legend.direction = "horizontal",legend.background = element_rect(fill="transparent",size=.5, linetype="dotted"))
gCycle = ggplot(cycleSeries,aes(x=date,y=cycle)) + geom_line(color="#619CFF") + ggtitle("Cyclical component (deviations from trend)")
gComb = grid.arrange(gSeries,gCycle,nrow=2)
I tried to use the prior answer, didn't worked for me.
I was getting the trend and cycle from a GDP quarterly series.
This data was a time series, so I did this, and worked for me:
list <- list(gdp_ln$x, gdp_ln$trend, gdp_ln$cycle)
names(list)=c("gdp","trend","cycle")
gdp<- data.frame((sapply(list,c)))
Data:
> dput(gdp_ln)
structure(c(16.0275785360442, 16.0477176062761, 16.0718936895007,
16.0899963371452, 16.0875707712141, 16.0981391378223, 16.0988601288276,
16.1110815092797, 16.1244321329861, 16.1384685077996, 16.1451472350838,
16.148178781735, 16.161163569502, 16.1418894206861, 16.1634877625667,
16.1965372621761, 16.2216815829736, 16.2387677536829, 16.249412380526,
16.2690521777631, 16.2812185880068, 16.2951024427095, 16.2964024092233,
16.3127733881018, 16.3233290487177, 16.3369922768377, 16.3486515031696,
16.3489275708763, 16.3451264371757, 16.3524856433069, 16.3666338513045,
16.3801691039135, 16.3959993202765, 16.4135937981601, 16.4321203154987,
16.4488104165345, 16.4344524213544, 16.4302554348621, 16.4240722287677,
16.425087582257, 16.4350803035092, 16.4507216431126, 16.4670532627455,
16.4985227751756, 16.5094864456079, 16.5352746165004, 16.5504689966469,
16.5594976247513, 16.5754312535087, 16.592641573353, 16.6003340665324,
16.6063100774853, 16.6163655606058, 16.6370227688187, 16.6564363783854,
16.6577160570216, 16.6543595214556, 16.6773721241902, 16.6911082706925,
16.6935398489076, 16.6956102943815, 16.6798673418354, 16.6772670544553,
16.6678707780266, 16.6606889172344, 16.6678398460835, 16.6668473810049,
16.676020524389, 16.6775934319312, 16.6882821147755, 16.6957985899994,
16.7032334217472, 16.6926036544774, 16.7027214366522, 16.7103625977254,
16.7105344224572, 16.7042504851486, 16.7063913529457, 16.7100598555556,
16.6960591147037, 16.686477079594, 16.5740423808036, 16.6181175035946
), .Tsp = c(2000, 2020.5, 4), class = "ts")

Resources