Let's say I have the following data:
stocks <- structure(list(date = structure(c(15120, 15126, 15156, 15187,
15218, 15250, 15279, 15309, 15342, 15371), class = "Date"), AAPL = c(0,
-0.0349594915528398, 0.163285209696362, -0.0144692603838991,
-0.00912094189637977, 0.0615229895783601, -0.0557834027614259,
0.0596546102691159, 0.127111450820476, 0.188310389721697), LMT = c(0,
0.0394093623514219, -0.064715298915223, -0.0103142125320749,
-0.0208923278478336, 0.0448787708206146, 0.0430164493053814,
0.035188599184363, 0.0175524826908838, 0.0861273642597269)), .Names = c("date",
"AAPL", "LMT"), row.names = c(NA, 10L), class = "data.frame")
Which looks something like that:
date AAPL LMT
1 2011-05-26 0.000000000 0.00000000
2 2011-06-01 -0.034959492 0.03940936
3 2011-07-01 0.163285210 -0.06471530
4 2011-08-01 -0.014469260 -0.01031421
5 2011-09-01 -0.009120942 -0.02089233
6 2011-10-03 0.061522990 0.04487877
7 2011-11-01 -0.055783403 0.04301645
8 2011-12-01 0.059654610 0.03518860
9 2012-01-03 0.127111451 0.01755248
10 2012-02-01 0.188310390 0.08612736
Then I melt it:
library(reshape2)
stocks <- melt(stocks, id.vars = "date")
And then plot it as the cumulative series:
library(ggplot2)
ggplot(stocks, aes(date, cumsum(value), color = variable)) + geom_line()
As you see, the starting points of the series for some reason have different y values (and thus, the graphs do start from different points). The question would be the following: is there any way to make both AAPL and LMT series start from the same (0,0) point?
I would calculate the cumsum value first using dplyr or plyr:
library(dplyr)
stocks %>%
group_by(variable) %>%
mutate(cumsum = cumsum(value)) %>%
ggplot(., aes(x = date, color = variable)) +
geom_line(aes(y = cumsum))
Related
I've trying to plot data that has been mutated into quarterly growth rates from nominal levels.
i.e the original dataset was
Date GDP Level
2010Q1 457
2010Q2 487
2010Q3 538
2010Q4 589
2011Q1 627
2011Q2 672.2
2011Q3 716.4
2011Q4 760.6
2012Q1 804.8
2012Q2 849
2012Q3 893.2
2012Q4 937.4
Which was in an excel file which I have imported using
dataset <- read_excel("xx")
Then, I have done the below in order to mutate it to quarter on quarter growth ("QoQ Growth):
dataset %>%
mutate(QoQ Growth= (GDP Level) / lag(GDP Level, n=1) - 1)
I would like to now plot this % growth across time, however I'm not too sure how what the geom_line code is for a mutated variable, any help would be really truly appreciated! I'm quite new to R and really trying to learn, thanks!
Something like this?
library(tidyverse)
df %>%
mutate(QoQGrowth = (GDPLevel) / lag(GDPLevel, n=1) - 1) %>%
ggplot(aes(factor(Date), QoQGrowth, group=1)) +
geom_line()
Output
Data
df <- structure(list(Date = c("2010Q1", "2010Q2", "2010Q3", "2010Q4",
"2011Q1", "2011Q2", "2011Q3", "2011Q4", "2012Q1", "2012Q2", "2012Q3",
"2012Q4"), GDPLevel = c(457, 487, 538, 589, 627, 672.2, 716.4,
760.6, 804.8, 849, 893.2, 937.4)), class = "data.frame", row.names = c(NA,
-12L))
Package zoo defines a S3 class "yearqtr" and has a function to handle quarterly dates, as.yearqtr. Combined with ggplot2's scale_x_date, the formating of quarterly axis labels becomes easier.
dataset <- read.table(text = "
Date 'GDP Level'
2010Q1 457
2010Q2 487
2010Q3 538
2010Q4 589
2011Q1 627
2011Q2 672.2
2011Q3 716.4
2011Q4 760.6
2012Q1 804.8
2012Q2 849
2012Q3 893.2
2012Q4 937.4
", header = TRUE, check.names = FALSE)
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(zoo))
library(ggplot2)
dataset %>%
mutate(Date = as.yearqtr(Date, format= "%Y Q%q"),
Date = as.Date(Date)) %>%
mutate(`QoQ Growth` = `GDP Level` / lag(`GDP Level`, n = 1) - 1) %>%
ggplot(aes(Date, `QoQ Growth`)) +
geom_line() +
scale_x_date(date_breaks = "3 months", labels = as.yearqtr) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
#> Warning: Removed 1 row(s) containing missing values (geom_path).
Created on 2022-03-08 by the reprex package (v2.0.1)
Convert dataset to a zoo object z, use diff.zoo to get the growth, QoQ Growth, and then use autoplot.zoo with scale_x_yearqtr.
library(zoo)
library(ggplot2)
z <- read.zoo(dataset, FUN = as.yearqtr)
`QoQ Growth` <- diff(z, arith = FALSE) - 1
autoplot(`QoQ Growth`) +
scale_x_yearqtr(format = "%YQ%q", n = length(`QoQ Growth`)) +
xlab("")
I have a data frame that looks like this:
Year Women Men
1 2013 145169 889190
2 2014 119064 849778
3 2015 210107 1079592
4 2016 221217 1427639
5 2017 205000 1692592
6 2018 273721 1703456
7 2019 434407 2010493
I want to make a geom_bar, where x is a year and every year has two bars for a number from Women and Men. I have found a solution where this table should looks different, but I'm wondering if there is an option to work with this one. Thank You for any help :)
You can use the following code
library(tidyverse)
df %>%
pivot_longer(cols = -c(Year,Sl), values_to = "Value", names_to = "Name") %>%
ggplot(aes(x = Year, y = Value, fill = Name))+geom_col(position = "dodge")
Data
df = structure(list(Sl = 1:7, Year = 2013:2019, Women = c(145169L,
119064L, 210107L, 221217L, 205000L, 273721L, 434407L), Men = c(889190L,
849778L, 1079592L, 1427639L, 1692592L, 1703456L, 2010493L)), class = "data.frame", row.names = c(NA,
-7L))
I'm trying to graph multiple dataframe columns in R.
(like this-> Graphing multiple variables in R)
bid ask date
1 20.12 20.14 2014-10-31
2 20.09 20.12 2014-11-03
3 20.03 20.06 2014-11-04
4 19.86 19.89 2014-11-05
This is my data.
And I can make one line graph like this.
`data%>% select(bid,ask,date) %>% hchart(type='line', hcaes(x='date', y='bid'))`
I want to add ask line graph in this graph.
One way is to reshape (gather) the values to plot and then add a group aesthetic to the hchart function:
library(tidyr)
data %>% select(bid,ask,date) %>%
gather("key", "value", bid, ask) %>%
hchart(type='line', hcaes(x='date', y='value', group='key'))
ps. Don't forget to load all the necessary libraries
You can use the following code
library(reshape2)
library(highcharter)
df_m <- melt(df, id="date")
hchart(df_m, "line", hcaes(x = date, y = value, group = variable))
Here is the data
df = structure(list(bid = c(20.12, 20.09, 20.03, 19.86), ask = c(20.14,
20.12, 20.06, 19.89), date = structure(c(4L, 1L, 2L, 3L), .Label = c("03/11/2014",
"04/11/2014", "05/11/2014", "31/10/2014"), class = "factor")), class = "data.frame", row.names = c(NA,
-4L))
Sample of dataset:
sample <- structure(list(NAME = c("WEST YORKSHIRE", "WEST YORKSHIRE", "WEST YORKSHIRE",
"WEST YORKSHIRE", "WEST YORKSHIRE", "WEST YORKSHIRE", "NOTTINGHAMSHIRE",
"NOTTINGHAMSHIRE", "NOTTINGHAMSHIRE", "NOTTINGHAMSHIRE", "NOTTINGHAMSHIRE",
"NOTTINGHAMSHIRE"), ACH_DATE = structure(c(17410, 17410, 17410,
17440, 17440, 17440, 17410, 17410, 17410, 17440, 17440, 17440
), class = "Date"), MEASURE = c("DIAG_RATE_65_PLUS", "DIAG_RATE_65_PLUS_LL",
"DIAG_RATE_65_PLUS_UL", "DIAG_RATE_65_PLUS", "DIAG_RATE_65_PLUS_LL",
"DIAG_RATE_65_PLUS_UL", "DIAG_RATE_65_PLUS", "DIAG_RATE_65_PLUS_LL",
"DIAG_RATE_65_PLUS_UL", "DIAG_RATE_65_PLUS", "DIAG_RATE_65_PLUS_LL",
"DIAG_RATE_65_PLUS_UL"), VALUE = c(73.6, 66.2, 79.8, 73.7, 66.3,
80, 77, 69.1, 83.6, 77.5, 69.6, 84.2)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -12L))
I'm trying to visualise the error bars for the points seen here:
sample %>% filter(MEASURE == "DIAG_RATE_65_PLUS") %>% ggplot(aes(x=ACH_DATE, y=VALUE, group=ACH_DATE)) +
geom_dotplot(binaxis = "y", stackdir = "center", dotsize=0.2)
As you can see in the df the lower and upper limits are contained in a variable MEASURE with my point values of interest in a long format.
What I'm stuck is how I can filter the df further, to use the lower and upper limit values in the ymin and ymax arguments.
I've tried something like:
sample %>% filter(MEASURE == "DIAG_RATE_65_PLUS") %>% ggplot(aes(x=ACH_DATE, y=VALUE, group=ACH_DATE)) +
geom_dotplot(binaxis = "y", stackdir = "center", dotsize=0.2) +
geom_errorbar(aes(x = ACH_DATE,
ymin = sample %>% filter(MEASURE == "DIAG_RATE_65_PLUS_LL") %>% select(VALUE),
ymax = sample %>% filter(MEASURE == "DIAG_RATE_65_PLUS_UL") %>% select(VALUE)),
data = sample %>% filter(MEASURE != "DIAG_RATE_65_PLUS"),
colour="red")
Which throws the error: Error: Columns `ymin`, `ymax` must be 1d atomic vectors or lists. I've tried wrapping my input to the ymin and ymax arguments with as.vector, but that doesn't seem to help.
ggplot, like other tidyverse libraries, works with non-standard evaluation. It's expecting the bare names of data frame columns in arguments such as ymin. What you supplied is instead a data frame with only 1 column: dplyr::select returns a data frame/tibble with the given columns, hence the error about needing to supply a vector.
sample %>% filter(MEASURE == "DIAG_RATE_65_PLUS_LL") %>% select(VALUE)
#> # A tibble: 4 x 1
#> VALUE
#> <dbl>
#> 1 66.2
#> 2 66.3
#> 3 69.1
#> 4 69.6
If you really wanted to use this method of having all your types of measures in one column and filtering for different types, dplyr::pull takes a single column name and returns the data in that column as a vector.
However, there are multiple concerns you're trying to handle in this data frame that you probably ought to separate. You have observation values (means, medians, or whatever), you have upper confidence interval limits, and you have lower confidence interval limits. While the answer to ggplot issues is often long-shaping data, this is a case where these are three different concerns that have different places in your plot—therefore, you're better off making them individual columns. You can do this with tidyr::spread.
library(dplyr)
library(ggplot2)
sample %>%
tidyr::spread(key = MEASURE, value = VALUE)
#> # A tibble: 4 x 5
#> NAME ACH_DATE DIAG_RATE_65_PL… DIAG_RATE_65_PLU… DIAG_RATE_65_PLU…
#> <chr> <date> <dbl> <dbl> <dbl>
#> 1 NOTTING… 2017-09-01 77 69.1 83.6
#> 2 NOTTING… 2017-10-01 77.5 69.6 84.2
#> 3 WEST YO… 2017-09-01 73.6 66.2 79.8
#> 4 WEST YO… 2017-10-01 73.7 66.3 80
And then use those separate columns that have separate purposes for the corresponding parts of your geoms.
sample %>%
tidyr::spread(key = MEASURE, value = VALUE) %>%
ggplot(aes(x = ACH_DATE, y = DIAG_RATE_65_PLUS, group = ACH_DATE)) +
geom_dotplot(binaxis = "y") +
geom_errorbar(aes(ymin = DIAG_RATE_65_PLUS_LL, ymax = DIAG_RATE_65_PLUS_UL))
#> `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
Created on 2018-10-01 by the reprex package (v0.2.1)
This is a beginner question. I have spent most of the day trying to work out how to facet my data, but all of the examples of faceting that I have come across seem unsuited to my dataset.
Here are the first five rows from my data:
Date Germany.Yield Italy.Yield Greece.Yield Italy_v_Germany.Spread Greece_v_Germany.Spread
2020-04-19 -0.472 1.820 2.287 2.292 2.759
2020-04-12 -0.472 1.790 2.112 2.262 2.584
2020-04-05 -0.345 1.599 1.829 1.944 2.174
2020-03-29 -0.441 1.542 1.972 1.983 2.413
2020-03-22 -0.475 1.334 1.585 1.809 2.060
I simply want to create two line charts. On both charts the x-axis will be the date. On the first chart, the y-axis should be Italy_v_Germany.Spread and on the second, the y-axis should be Greece_v_Germany.Spread.
The first chart looks like this:
So I want the two charts to appear alongside each other, like this:
The one on the left should be Italy_v_Germany.Spread, and the one on the right should be Greece_v_Germany.Spread.
I really have no idea where to start with this. Hoping that someone can point me in the right direction.
In the interest I making the example reproducible, I will share a link to the CSV files which I'm using: https://1drv.ms/u/s!AvGKDeEV3LOsmmlHkzO6YVQTRiOX?e=mukBVy. Unforunately these files convert into excel format when shared via this link, so you may have to export the files to CSVs so that the code works.
Here is the code that I have so far:
library(ggplot2)
library(scales)
library(extrafont)
library(dplyr)
library(tidyr)
work_dir <- "D:\\OneDrive\\Documents\\Economic Data\\Historical Yields\\Eurozone"
setwd(work_dir)
# Germany
#---------------------------------------
germany_yields <- read.csv(file = "Germany 10-Year Yield Weekly (2007-2020).csv", stringsAsFactors = F)
germany_yields <- germany_yields[, -(3:6)]
colnames(germany_yields)[1] <- "Date"
colnames(germany_yields)[2] <- "Germany.Yield"
#---------------------------------------
# Italy
#---------------------------------------
italy_yields <- read.csv(file = "Italy 10-Year Yield Weekly (2007-2020).csv", stringsAsFactors = F)
italy_yields <- italy_yields[, -(3:6)]
colnames(italy_yields)[1] <- "Date"
colnames(italy_yields)[2] <- "Italy.Yield"
#---------------------------------------
# Greece
#---------------------------------------
greece_yields <- read.csv(file = "Greece 10-Year Yield Weekly (2007-2020).csv", stringsAsFactors = F)
greece_yields <- greece_yields[, -(3:6)]
colnames(greece_yields)[1] <- "Date"
colnames(greece_yields)[2] <- "Greece.Yield"
#---------------------------------------
# Join data
#---------------------------------------
combined <- merge(merge(germany_yields, italy_yields, by = "Date", sort = F),
greece_yields, by = "Date", sort = F)
combined <- na.omit(combined)
combined$Date <- as.Date(combined$Date,format = "%B %d, %Y")
combined["Italy_v_Germany.Spread"] <- combined$Italy.Yield - combined$Germany.Yield
combined["Greece_v_Germany.Spread"] <- combined$Greece.Yield - combined$Germany.Yield
#--------------------------------------------------------------------
fl_dates <- c(tail(combined$Date, n=1), head(combined$Date, n=1))
ggplot(data=combined, aes(x = Date, y = Italy_v_Germany.Spread)) + geom_line() +
scale_x_date(limits = fl_dates,
breaks = seq(as.Date("2008-01-01"), as.Date("2020-01-01"), by="2 years"),
expand = c(0, 0),
date_labels = "%Y")
You need to get your data into a long format, for example, by using pivot_wider. Then it should work.
library(dplyr)
library(tidyr)
library(ggplot2)
data <- tribble(~Date, ~Germany.Yield, ~Italy.Yield, ~Greece.Yield, ~Italy_v_Germany.Spread, ~Greece_v_Germany.Spread,
"2020-04-19", -0.472, 1.820, 2.287, 2.292, 2.759,
"2020-04-19", -0.472, 1.820, 2.287, 2.292, 2.759,
"2020-04-12", -0.472, 1.790, 2.112, 2.262, 2.584,
"2020-04-05", -0.345, 1.599, 1.829, 1.944, 2.174,
"2020-03-29", -0.441, 1.542, 1.972, 1.983, 2.413,
"2020-03-22", -0.475, 1.334, 1.585, 1.809, 2.060
)
data %>%
mutate(Date = as.Date(Date)) %>%
pivot_longer(
cols = ends_with("Spread"),
names_to = "country",
values_to = "Spread_v_Germany",
values_drop_na = TRUE
) %>%
ggplot(., aes(x = Date, y = Spread_v_Germany, group = 1)) +
geom_line() +
facet_wrap(. ~ country)