I have two data sets from an experiment on a person during different ambient temperatures. P1 represents a patient's physiological response data and P1IAQ represent the environmental monitoring data during the experiment. P1 recorded data 32 times per second but P1IAQ recorded data every 10 seconds.
head(P1IAQ)
Time RH Temp CO2
1 12:04:07 44.2 19.89 664
2 12:04:17 44.2 19.89 664
3 12:04:27 44.2 19.89 665
4 12:04:37 44.2 19.89 665
5 12:04:47 44.2 19.89 666
6 12:04:57 44.2 19.89 668
head(P1)
Time SkinTemp HeartRate RespirationRate
1 00:00:00 27.781 70 10
2 00:00:00 27.780 70 10
3 00:00:00 27.779 70 10
4 00:00:00 27.779 70 10
5 00:00:00 27.778 70 10
6 00:00:00 27.777 70 10
The problem I have is that the time stamp on P1 is wrong. How can I plot them together on the same graph to see if the SkinTemp has a time-lag after the environmental temp is decreased?
EDIT: dput for P1IAQ
I've added the first twenty values for the environmental data. I think the best thing would be to subtract 12:04:07 from all values to make the starting time 00:00:00. I've tried looking at lubridate.
library(lubridate)
P1IAQ$Time<-hms(P1IAQ$Time)
This datetime post looks interesting, but it's for plotting dates rather than actually altering them.
Plotting data against time in R
dput(P1IAQ)
structure(list(Time = structure(1:19, .Label = c("12:04:07",
"12:04:17", "12:04:27", "12:04:37", "12:04:47", "12:04:57", "12:05:07",
"12:05:17", "12:05:27", "12:05:37", "12:05:47", "12:05:57", "12:06:07",
"12:06:17", "12:06:27", "12:06:37", "12:06:47", "12:06:57", "12:07:07"
), class = "factor"), RH = c(44.2, 44.2, 44.2, 44.2, 44.2, 44.2,
44.2, 44.2, 44.1, 44.1, 44.2, 44.2, 44.2, 44.3, 44.2, 44.2, 44.2,
44.3, 44.3), Temp = c(19.89, 19.89, 19.89, 19.89, 19.89, 19.89,
19.89, 19.89, 19.89, 19.89, 19.94, 19.89, 19.94, 19.94, 19.94,
19.94, 19.94, 19.94, 19.94), CO2 = c(664L, 664L, 665L, 665L,
666L, 668L, 668L, 669L, 667L, 670L, 670L, 672L, 675L, 677L, 682L,
684L, 685L, 686L, 687L)), .Names = c("Time", "RH", "Temp", "CO2"
), class = "data.frame", row.names = c(NA, -19L))
EDIT: I've synchronised the times using lubridate:
P1IAQ$Time<-period_to_seconds(hms(as.character(P1IAQ$Time))-hms("12:04:07"))
P1$Time<-period_to_seconds(hms(as.character(P1$Time)))
But now plotting them together is tricky. I've tried ggplot2 but I can't get two vertical axes. Any thoughts
ggplot() +
geom_line(data = P1IAQ, aes(x = Time, y = Temp, color = "red")) +
geom_line(data = P1, aes(x = Time, y = Temp, color = "blue")) +
xlab('Time (s)') +
ylab('Temperature ºC')
If you only need the times in each data frame to be on a common scale, you can convert both of them to numeric seconds elapsed since the start of the experiment and not worry about date or time classes. Then you can join the two data frames based on the common time scale.
I used your P1IAQ data sample and created fake P1 data to go with it. Time in my P1 is probably not in the same format as your actual data. If you post a sample of your P1, I can adjust the example below to fit your actual data.
library(dplyr)
library(reshape2)
library(hms)
library(zoo)
library(ggplot2)
theme_set(theme_light())
# Fake P1 data frame
set.seed(10)
n=32*60*3 + 1
P1 = data.frame(Time=as.POSIXct(seq(0,180,length.out=n), origin=as.Date("2016-05-01"), tz="GMT"),
SkinTemp = round(cumsum(rnorm(n, 0, 0.01)) + 27.78, 2),
RespirationRate=round(rnorm(n, 10, 0.5)))
Convert P1$Time and P1IAQ$Time to numeric values equal to the number of seconds elapsed since the start of the experiment. (Note that P1IAQ$Time in the data you posted is a factor, so I converted to character before further processing.):
P1$nTime = as.numeric(as.hms(P1$Time))
P1IAQ$nTime = as.numeric(as.hms(as.character(P1IAQ$Time)))
P1IAQ$nTime = P1IAQ$nTime - min(P1IAQ$nTime)
Join P1 and P1IAQ by nTime:
P1j = full_join(P1, P1IAQ, by="nTime", suffix=c("_P1","_P1IAQ")) %>%
# Make sure joined data frame is sorted by nTime
arrange(nTime) %>%
# Fill missing values with Last One Carried Forward
mutate_at(vars(Time_P1IAQ, RH, Temp, CO2), na.locf)
Plot after converting data from wide to long format:
ggplot(P1j %>% select(Time_P1IAQ, nTime, Skin=SkinTemp, Ambient=Temp) %>%
# Convert from wide to long format for plotting
melt(id.var=c("Time_P1IAQ", "nTime")),
aes(nTime, value, group=Time_P1IAQ)) +
geom_line() +
facet_grid(variable ~ ., scales="free_y") +
scale_y_continuous(expand=c(0.5,0)) +
labs(x="Elapsed Time (sec)", y=expression(Temperature~"("*degree*C*")"))
Another option is to plot temperature changes relative to the start of the experiment. That way, you can have both lines on the same panel without having to deal with them being in different locations:
ggplot(P1j %>% select(Time_P1IAQ, nTime, Skin=SkinTemp, Ambient=Temp) %>%
# Convert from wide to long format for plotting
melt(id.var=c("Time_P1IAQ", "nTime")) %>%
# Convert temperatures to difference from starting values
group_by(variable) %>%
mutate(value = value - value[nTime==min(nTime)]),
aes(nTime, value, colour=variable)) +
geom_line() +
labs(x="Elapsed Time (sec)", y=expression(Temperature~Change~"("*degree*C*")"),
colour="")
I don't have your data but i will prepare something similar... In this case the length of P1 is diferent with P1IAQ:
library(ggplot2)
#I create a sample of your data
P1<-data.frame(1:10,51:60)
P1IAQ<-data.frame(1:8,1:8)
colnames(P1)<-c("Time","Temp")
colnames(P1IAQ)<-c("Time","Temp")
# I cathegory your data for plot
df = data.frame(Time=c(P1$Time,P1IAQ$Time), values=c(P1$Temp,P1IAQ$Temp),type=c(rep("P1",length(P1$Time)),rep("P1IAQ",length(P1IAQ$Time))))
ggplot(data=df, aes(x=Time, y=values, color=type)) +
geom_line() +
facet_grid(type ~ ., scales="free") +
xlab('Time (s)') +
ylab('Temperature ºC')
Related
I've trying to plot data that has been mutated into quarterly growth rates from nominal levels.
i.e the original dataset was
Date GDP Level
2010Q1 457
2010Q2 487
2010Q3 538
2010Q4 589
2011Q1 627
2011Q2 672.2
2011Q3 716.4
2011Q4 760.6
2012Q1 804.8
2012Q2 849
2012Q3 893.2
2012Q4 937.4
Which was in an excel file which I have imported using
dataset <- read_excel("xx")
Then, I have done the below in order to mutate it to quarter on quarter growth ("QoQ Growth):
dataset %>%
mutate(QoQ Growth= (GDP Level) / lag(GDP Level, n=1) - 1)
I would like to now plot this % growth across time, however I'm not too sure how what the geom_line code is for a mutated variable, any help would be really truly appreciated! I'm quite new to R and really trying to learn, thanks!
Something like this?
library(tidyverse)
df %>%
mutate(QoQGrowth = (GDPLevel) / lag(GDPLevel, n=1) - 1) %>%
ggplot(aes(factor(Date), QoQGrowth, group=1)) +
geom_line()
Output
Data
df <- structure(list(Date = c("2010Q1", "2010Q2", "2010Q3", "2010Q4",
"2011Q1", "2011Q2", "2011Q3", "2011Q4", "2012Q1", "2012Q2", "2012Q3",
"2012Q4"), GDPLevel = c(457, 487, 538, 589, 627, 672.2, 716.4,
760.6, 804.8, 849, 893.2, 937.4)), class = "data.frame", row.names = c(NA,
-12L))
Package zoo defines a S3 class "yearqtr" and has a function to handle quarterly dates, as.yearqtr. Combined with ggplot2's scale_x_date, the formating of quarterly axis labels becomes easier.
dataset <- read.table(text = "
Date 'GDP Level'
2010Q1 457
2010Q2 487
2010Q3 538
2010Q4 589
2011Q1 627
2011Q2 672.2
2011Q3 716.4
2011Q4 760.6
2012Q1 804.8
2012Q2 849
2012Q3 893.2
2012Q4 937.4
", header = TRUE, check.names = FALSE)
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(zoo))
library(ggplot2)
dataset %>%
mutate(Date = as.yearqtr(Date, format= "%Y Q%q"),
Date = as.Date(Date)) %>%
mutate(`QoQ Growth` = `GDP Level` / lag(`GDP Level`, n = 1) - 1) %>%
ggplot(aes(Date, `QoQ Growth`)) +
geom_line() +
scale_x_date(date_breaks = "3 months", labels = as.yearqtr) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
#> Warning: Removed 1 row(s) containing missing values (geom_path).
Created on 2022-03-08 by the reprex package (v2.0.1)
Convert dataset to a zoo object z, use diff.zoo to get the growth, QoQ Growth, and then use autoplot.zoo with scale_x_yearqtr.
library(zoo)
library(ggplot2)
z <- read.zoo(dataset, FUN = as.yearqtr)
`QoQ Growth` <- diff(z, arith = FALSE) - 1
autoplot(`QoQ Growth`) +
scale_x_yearqtr(format = "%YQ%q", n = length(`QoQ Growth`)) +
xlab("")
Sample of dataset:
sample <- structure(list(NAME = c("WEST YORKSHIRE", "WEST YORKSHIRE", "WEST YORKSHIRE",
"WEST YORKSHIRE", "WEST YORKSHIRE", "WEST YORKSHIRE", "NOTTINGHAMSHIRE",
"NOTTINGHAMSHIRE", "NOTTINGHAMSHIRE", "NOTTINGHAMSHIRE", "NOTTINGHAMSHIRE",
"NOTTINGHAMSHIRE"), ACH_DATE = structure(c(17410, 17410, 17410,
17440, 17440, 17440, 17410, 17410, 17410, 17440, 17440, 17440
), class = "Date"), MEASURE = c("DIAG_RATE_65_PLUS", "DIAG_RATE_65_PLUS_LL",
"DIAG_RATE_65_PLUS_UL", "DIAG_RATE_65_PLUS", "DIAG_RATE_65_PLUS_LL",
"DIAG_RATE_65_PLUS_UL", "DIAG_RATE_65_PLUS", "DIAG_RATE_65_PLUS_LL",
"DIAG_RATE_65_PLUS_UL", "DIAG_RATE_65_PLUS", "DIAG_RATE_65_PLUS_LL",
"DIAG_RATE_65_PLUS_UL"), VALUE = c(73.6, 66.2, 79.8, 73.7, 66.3,
80, 77, 69.1, 83.6, 77.5, 69.6, 84.2)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -12L))
I'm trying to visualise the error bars for the points seen here:
sample %>% filter(MEASURE == "DIAG_RATE_65_PLUS") %>% ggplot(aes(x=ACH_DATE, y=VALUE, group=ACH_DATE)) +
geom_dotplot(binaxis = "y", stackdir = "center", dotsize=0.2)
As you can see in the df the lower and upper limits are contained in a variable MEASURE with my point values of interest in a long format.
What I'm stuck is how I can filter the df further, to use the lower and upper limit values in the ymin and ymax arguments.
I've tried something like:
sample %>% filter(MEASURE == "DIAG_RATE_65_PLUS") %>% ggplot(aes(x=ACH_DATE, y=VALUE, group=ACH_DATE)) +
geom_dotplot(binaxis = "y", stackdir = "center", dotsize=0.2) +
geom_errorbar(aes(x = ACH_DATE,
ymin = sample %>% filter(MEASURE == "DIAG_RATE_65_PLUS_LL") %>% select(VALUE),
ymax = sample %>% filter(MEASURE == "DIAG_RATE_65_PLUS_UL") %>% select(VALUE)),
data = sample %>% filter(MEASURE != "DIAG_RATE_65_PLUS"),
colour="red")
Which throws the error: Error: Columns `ymin`, `ymax` must be 1d atomic vectors or lists. I've tried wrapping my input to the ymin and ymax arguments with as.vector, but that doesn't seem to help.
ggplot, like other tidyverse libraries, works with non-standard evaluation. It's expecting the bare names of data frame columns in arguments such as ymin. What you supplied is instead a data frame with only 1 column: dplyr::select returns a data frame/tibble with the given columns, hence the error about needing to supply a vector.
sample %>% filter(MEASURE == "DIAG_RATE_65_PLUS_LL") %>% select(VALUE)
#> # A tibble: 4 x 1
#> VALUE
#> <dbl>
#> 1 66.2
#> 2 66.3
#> 3 69.1
#> 4 69.6
If you really wanted to use this method of having all your types of measures in one column and filtering for different types, dplyr::pull takes a single column name and returns the data in that column as a vector.
However, there are multiple concerns you're trying to handle in this data frame that you probably ought to separate. You have observation values (means, medians, or whatever), you have upper confidence interval limits, and you have lower confidence interval limits. While the answer to ggplot issues is often long-shaping data, this is a case where these are three different concerns that have different places in your plot—therefore, you're better off making them individual columns. You can do this with tidyr::spread.
library(dplyr)
library(ggplot2)
sample %>%
tidyr::spread(key = MEASURE, value = VALUE)
#> # A tibble: 4 x 5
#> NAME ACH_DATE DIAG_RATE_65_PL… DIAG_RATE_65_PLU… DIAG_RATE_65_PLU…
#> <chr> <date> <dbl> <dbl> <dbl>
#> 1 NOTTING… 2017-09-01 77 69.1 83.6
#> 2 NOTTING… 2017-10-01 77.5 69.6 84.2
#> 3 WEST YO… 2017-09-01 73.6 66.2 79.8
#> 4 WEST YO… 2017-10-01 73.7 66.3 80
And then use those separate columns that have separate purposes for the corresponding parts of your geoms.
sample %>%
tidyr::spread(key = MEASURE, value = VALUE) %>%
ggplot(aes(x = ACH_DATE, y = DIAG_RATE_65_PLUS, group = ACH_DATE)) +
geom_dotplot(binaxis = "y") +
geom_errorbar(aes(ymin = DIAG_RATE_65_PLUS_LL, ymax = DIAG_RATE_65_PLUS_UL))
#> `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
Created on 2018-10-01 by the reprex package (v0.2.1)
I've a column in seconds that need to plot as "%H%M%S".
I've tried using lubridate pkg, but the column results in:
loadtime_dfs$avgPageLoadTime <- seconds_to_period(loadtime_df$avgPageLoadTime)
Formal class 'Period' [package "lubridate"]
that I can plot but doesn't show any format.
loadtime_df <- structure(list(date = structure(c(17766, 17767, 17768, 17769,
17770, 17771), class = "Date"), pagePath = c("/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151",
"/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151",
"/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151",
"/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151",
"/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151",
"/webapp/wcs/stores/servlet/CategoryDisplay?urlRequestType=Base&catalogId=3074457345616676668&categoryId=3074457345616676994&pageView=grid&urlLangId=-24&beginIndex=0&langId=-24&top_category=3074457345616676981&parent_category_rn=3074457345616720192&storeId=10151"
), pageviews = c(245L, 225L, 194L, 214L, 214L, 213L), pageLoadTime = c(18965L,
185834L, 31115L, 114561L, 88807L, 0L), avgPageLoadTime = c(6,
27, 16, 138, 144, 0), bouncerate = c(5.63380281690141, 3.48837209302326,
5.40540540540541, 7.01754385964912, 0, 5), mes = c("agosto",
"agosto", "agosto", "agosto", "agosto", "agosto")), .Names = c("date",
"pagePath", "pageviews", "pageLoadTime", "avgPageLoadTime", "bouncerate",
"mes"), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
This is what I need to plot:
ggplot(loadtime_df, aes(date,avgPageLoadTime)) +
geom_point() +
geom_smooth()
But with the Y axis with breaks: "00:01:00", "00:02:00", "00:03:00", "00:04:00", "00:05:00".
You are going to have to provide strings for ggplot to assign as labels.
If you read ?scale_y_continuous(labels=...), you'll see that labels= takes either waiver(), character, or a function. If you want to specify the specific locations and representations, then you'll want to specific both breaks= and labels=. However, typically you want ggplot2 to determine where to put the axis labels, so we'll provide a function that takes a value and returns a string.
I'm guessing there's a helper function somewhere to do this, but here's a base-R version. (The origin of the function is unimportant, since we can replace our function with another with likely the same outcome.)
This formatting function cheats a little by temporarily converting the seconds of avgPageLoadTime to POSIXct and then to a string. Doing it this way means it honors options("digits.secs"), if set.
fmt_hms <- function(x, digits.secs=NULL) {
if (!is.null(digits.secs)) {
oopts <- options(digits.secs = digits.secs)
on.exit(options(oopts), add=TRUE)
}
format(as.POSIXct(x, origin="1970-01-01 00:00:00"), format="%H:%M:%OS", tz="UTC")
}
In order to demonstrate this, I'll change one of the values of your data:
loadtime_df$avgPageLoadTime[3] <- loadtime_df$avgPageLoadTime[3] + 0.123456
fmt_hms(loadtime_df$avgPageLoadTime)
# [1] "00:00:06" "00:00:27" "00:00:16" "00:02:18" "00:02:24" "00:00:00"
fmt_hms(loadtime_df$avgPageLoadTime, digits.secs=3)
# [1] "00:00:06.000" "00:00:27.000" "00:00:16.123" "00:02:18.000" "00:02:24.000"
# [6] "00:00:00.000"
So we can just provide this function:
library(ggplot2)
ggplot(loadtime_df, aes(date,avgPageLoadTime)) +
geom_point() +
geom_smooth() +
scale_y_continuous(labels=fmt_hms)
I think you need to convert the date values in seconds to a %H%m%s format and then try plotting. I think you need one of the below approaches -
library(ggplot2)
library(lubridate)
# convert seconds to periods
td <- seconds_to_period(loadtime_df$avgPageLoadTime)
# then apply the required format
avgPageLoadTime_vector <- sprintf('%02d:%02d:%02d', td#hour, minute(td),
second(td))
# plotting using %H%m%s we can use them as y-axis ticks
# this will give you the same plot as above but Y-axis is fuzzy
ggplot(loadtime_df, aes(date,avgPageLoadTime)) +
geom_point() +
geom_smooth() +
scale_y_continuous(breaks = loadtime_df$avgPageLoadTime,
labels = avgPageLoadTime_vector)
# if you just want to plot with points and not use geom_smooth
# convert the column avgPageLoadTime into %H%m%s date-time format
loadtime_df$avgPageLoadTime <- avgPageLoadTime_vector
# this gives you the right Y-axis values but no smoothing
ggplot(loadtime_df, aes(date,avgPageLoadTime)) +
geom_point()
]2
Hi all: I am struggling to color points by date in ggplot2. There are two outcomes that would work for me here. 1) colour the points by the variable recent_elections and just add straight lines denoting the date of the most recent election for each point. The current code does that. 2) preferably, but harder, just add the lines, coloured differently for each election, showing a legend that printed the date of the most recent federal election.
My current data and attempt is below.
library(dplyr)
library(tidyr)
library(ggplot2)
members <- structure(list(date = structure(c(6209, 6574, 7305, 14984, 15339,
15341, 17169, 17174), class = "Date"), members = c(180835, 193225,
200010, 86545, 95000, 128351, 41000, 124000), population = c(26449000,
26798000, 27512000, 33476688, 33476688, 33476688, 35151728, 35151728
), votes_previous_election = c(2359915, 2685263, 2685263, 4508474,
4508474, 4508474, 3470350, 3470350), vote_percent = c(18.8, 20.4,
20.4, 30.6, 30.6, 30.6, 19.7, 19.7), seats_previous_election = c(32,
43, 43, 103, 103, 103, 44, 44), recent_election = structure(c(5360,
6899, 6899, 15096, 15096, 15096, 16727, 16727), class = "Date")), .Names =
c("date",
"members", "population", "votes_previous_election", "vote_percent",
"seats_previous_election", "recent_election"), class = "data.frame",
row.names = c(NA,
-8L))
members %>%
select(population, votes_previous_election, seats_previous_election, members,
date, recent_election) %>%
mutate(., members_per_capita=members/population,
members_votes=members/votes_previous_election,
members_seats=members/seats_previous_election) %>%
gather(Variable, Value, c(members_per_capita,members_votes,
members_seats))%>%
ggplot(., aes(x=date, y=Value,
group=recent_election))+
geom_point(aes(fill=recent_election))+
facet_wrap(~Variable, scales='free')+
geom_vline(data=members, aes(xintercept=as.numeric(recent_election), col='red'), show.legend=F)
members %>%
select(population, votes_previous_election, seats_previous_election, members,
date, recent_election) %>%
mutate(., members_per_capita=members/population,
members_votes=members/votes_previous_election,
members_seats=members/seats_previous_election) %>%
gather(Variable, Value, c(members_per_capita,members_votes,
members_seats))%>%
ggplot(., aes(x=date, y=Value,
group=recent_election))+
geom_point()+
geom_vline(data=members, aes(xintercept=as.numeric(recent_election), col=factor(recent_election)), show.legend=T)+
facet_wrap(~Variable, scales='free') +
scale_color_discrete(name = "Recent Election") + xlim(as.Date("1984-01-01"), NA)
I changed the col="red" in geom_vline to col=factor(recent_election) so that the vertical lines are colored by recent_election. The factor() makes sure that recent_election is treated as discrete instead of continuous. scale_color_discrete sets the legend title. Note that the election date "1984-09-04" is going out of the x range of your points, so I added a xlim(as.Date("1984-01-01"), NA) to also include that election date. NA sets the upper limit automatically.
Let's say I have the following data:
stocks <- structure(list(date = structure(c(15120, 15126, 15156, 15187,
15218, 15250, 15279, 15309, 15342, 15371), class = "Date"), AAPL = c(0,
-0.0349594915528398, 0.163285209696362, -0.0144692603838991,
-0.00912094189637977, 0.0615229895783601, -0.0557834027614259,
0.0596546102691159, 0.127111450820476, 0.188310389721697), LMT = c(0,
0.0394093623514219, -0.064715298915223, -0.0103142125320749,
-0.0208923278478336, 0.0448787708206146, 0.0430164493053814,
0.035188599184363, 0.0175524826908838, 0.0861273642597269)), .Names = c("date",
"AAPL", "LMT"), row.names = c(NA, 10L), class = "data.frame")
Which looks something like that:
date AAPL LMT
1 2011-05-26 0.000000000 0.00000000
2 2011-06-01 -0.034959492 0.03940936
3 2011-07-01 0.163285210 -0.06471530
4 2011-08-01 -0.014469260 -0.01031421
5 2011-09-01 -0.009120942 -0.02089233
6 2011-10-03 0.061522990 0.04487877
7 2011-11-01 -0.055783403 0.04301645
8 2011-12-01 0.059654610 0.03518860
9 2012-01-03 0.127111451 0.01755248
10 2012-02-01 0.188310390 0.08612736
Then I melt it:
library(reshape2)
stocks <- melt(stocks, id.vars = "date")
And then plot it as the cumulative series:
library(ggplot2)
ggplot(stocks, aes(date, cumsum(value), color = variable)) + geom_line()
As you see, the starting points of the series for some reason have different y values (and thus, the graphs do start from different points). The question would be the following: is there any way to make both AAPL and LMT series start from the same (0,0) point?
I would calculate the cumsum value first using dplyr or plyr:
library(dplyr)
stocks %>%
group_by(variable) %>%
mutate(cumsum = cumsum(value)) %>%
ggplot(., aes(x = date, color = variable)) +
geom_line(aes(y = cumsum))