Data:
df1 <- structure(list(Index = 1:11, Duration = structure(c(1487577655,
1487577670, 1487577675, 1487577680, 1487577685, 1487577680, 1487577700,
1487577705, 1487577695, 1487577700, 1487577680), class = c("POSIXct",
"POSIXt"), tzone = "")), .Names = c("Index", "Duration"), class = "data.frame", row.names = 3:13)
Now I construct the graph as follows:
g1 <- ggplot(df1, aes(x = Index, y = Duration, color = Duration))+
geom_point()+
geom_line()+
scale_y_datetime(labels = date_format("%M:%S"))
As it is now, the color scale is set to the default "Black" to "Blue" gradient.
The problem is, I get an error trying to assign a custom gradient to the data.
For a non-POSIXct object:
scale_color_gradient("Duration", low = "#D80427", high = "#07a0ff", space = "Lab")
works, but I get the following error with the POSIXct object df1$Duration as the explanatory variable:
Error in Ops.POSIXt((x - from[1]), diff(from)) : '/' not defined
for "POSIXt" objects
Is there a different gradient function I need to use when graphing a POSIXct object?
You may use trans = time_trans():
library(ggplot2)
library(scales)
g1 +
scale_color_gradient("Duration", low = "#D80427", high = "#07a0ff",
trans = time_trans())
If you wish another format of the labels in the legend, add e.g. labels = format(pretty(df1$Duration), "%M:%S").
We can convert date to number for colour:
library(ggplot2)
library(scales)
ggplot(df1, aes(x = Index, y = Duration, color = as.numeric(Duration))) +
geom_point() +
geom_line() +
scale_y_datetime(labels = date_format("%M:%S")) +
scale_color_gradient("Duration", low = "#D80427", high = "#07A0FF",
labels = c("00", "10", "20", "30", "40"))
As suggested by #Henrik, to avoid hardcoding the labels use below:
# avoid hardcoding labels using pretty()
ggplot(df1, aes(x = Index, y = Duration, color = as.numeric(Duration))) +
geom_point() +
geom_line() +
scale_y_datetime(labels = date_format("%M:%S")) +
scale_color_gradient("Duration", low = "#D80427", high = "#07A0FF",
breaks = pretty(as.numeric(df1$Duration)),
labels = format(pretty(df1$Duration), "%M:%S"))
Related
I have this data frame where I want to create multiple plots at the same time in a loop, but when I run the code it gives me an error. Can anyone please tell me what I am doing wrong!
Data:
structure(list(Date = structure(c(289094400, 297043200, 304992000,
312854400, 320716800, 328665600), tzone = "UTC", class = c("POSIXct",
"POSIXt")), NORTH = c(4.06976744186047, 5.51675977653633, 7.2799470549305,
4.75015422578655, 4.59363957597172, 3.15315315315317), YORKSANDTHEHUMBER = c(4.0121120363361,
5.45851528384282, 9.52380952380951, 6.04914933837431, 3.03030303030299,
5.42099192618225), NORTHWEST = c(6.57894736842105, 6.95256660168939,
6.50060753341436, 5.5904164289789, 4.59211237169096, 4.70041322314051
), EASTMIDS = c(4.98489425981872, 8.20143884892085, 6.91489361702127,
5.22388059701494, 5.61465721040189, 4.64465584778958), WESTMIDS = c(4.65838509316771,
4.74777448071216, 8.66855524079319, 6.56934306569344, 3.22896281800389,
3.17535545023698), EASTANGLIA = c(6.74525212835624, 8.58895705521476,
8.47457627118643, 10.7291666666667, 4.8447789275635, 4.84522207267835
), OUTERSEAST = c(6.7110371602884, 7.53638253638255, 9.47317544707589,
8.56512141280351, 3.82269215128102, 2.11515863689776), OUTERMET = c(4.54545454545458,
6.58505698607005, 7.36633663366336, 7.08225746956843, 4.3747847054771,
1.68316831683168), LONDON = c(8.11719500480309, 10.3065304309196,
6.32299637535239, 7.65151515151515, 1.30190007037299, 2.1535255296978
), SOUTHWEST = c(6.17577197149644, 7.71812080536912, 7.63239875389407,
9.45489628557649, 2.46804759806079, 2.19354838709679), WALES = c(6.09418282548476,
8.35509138381203, 7.40963855421687, 7.01065619742007, 1.15303983228513,
3.47150259067357), SCOTLAND = c(5.15222482435597, 4.12026726057908,
5.40106951871658, 8.67579908675796, -0.280112044817908, 2.94943820224719
), NIRELAND = c(4.54545454545454, 4.94752623688156, 4.42857142857145,
2.96397628818967, 6.06731620903454, 0.0835073068893502), UK = c(5.76890543055322,
7.20302836425676, 7.39543442582184, 7.22885986848197, 3.23472252213347,
2.95766398929048)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
Code:
for (i in 2:ncol(data2)) { # Printing ggplot within for-loop
print(ggplot(data2, aes(x = Date, y = data2[, i])) + # Basic ggplot2 plot of x & y's
geom_line() +
labs(title = "Uk States",
y = "",
x = "") +
theme_bw() +
geom_hline(yintercept = 0))
Sys.sleep(1)
}
Error:
Don't know how to automatically pick scale for object of type tbl_df/tbl/data.frame. Defaulting to continuous.
Error in is.finite(x) : default method not implemented for type 'list'
I would suggest to loop over the column names instead of value. You may then use .data to use as y-index.
library(tidyverse)
for(i in names(data2)[-1]) { # Printing ggplot within for-loop
# Basic ggplot2 plot of x & y's
print(ggplot(data2, aes(x = Date, y = .data[[i]])) +
geom_line()+ labs(title = "Uk States",
y = "",
x = "")+
theme_bw()+
geom_hline(yintercept = 0))
Sys.sleep(1)
}
You may also try facet_wrap to combine multiple plots together.
data2 %>%
pivot_longer(cols = -Date) %>%
ggplot(aes(Date, value)) +
geom_line() + facet_wrap(~name) +
labs(title = "Uk States", x = "", y = "") +
theme_bw() +
geom_hline(yintercept = 0)
Another way of generating ggplot in a loop is to use lapply, where we loop for colnames and use aes_string as the aesthetic mapping.
Here the results are saved to the list ggplot_list, where you can extract individual plot by indexing (e.g. plot for NORTH is stored in ggplot_list[[1]])
Note that I've changed labs(title = i) so that the plot title would be your column names.
library(ggplot2)
ggplot_list <- lapply(colnames(data2[-1]), \(i) {
ggplot(data2, aes_string("Date", x)) +
geom_line() +
labs(title = i, y = "", x = "") +
theme_bw() +
geom_hline(yintercept = 0)
})
I have this simple data:
structure(list(ID = 1:2, timing = structure(c(1654641111.14,
1654640774.523), tzone = "CET", class = c("POSIXct", "POSIXt"))), class = "data.frame", row.names = c(NA,
-2L))
ID timing
1 1 2022-06-08 00:31:51.140
2 2 2022-06-08 00:26:14.523
When I plot using ggplot2, I get this:
ggplot(df_test,
aes(x = ID,
y = timing)) +
geom_point()
But when I set limits, the graph is empty:
lims <- as.POSIXct(strptime(c("35:00", "25:00"),
format = "%M:%OS",
tz = "CET"))
ggplot(df_test,
aes(x = ID,
y = timing)) +
geom_point() +
scale_y_datetime(limits = lims)
Warning message:
Removed 2 rows containing missing values (geom_point).
I tried to find any solution, but I can't figure out why the limits remove all my values. Does anyone have a solution?
This is simply due to your limits being round the wrong way (they should be earliest, latest whereas you have latest, earliest).
So you can do:
ggplot(df_test,
aes(x = ID,
y = timing)) +
geom_point() +
scale_y_datetime(limits = rev(lims))
I am trying to make a graph with "time markers". These time markers are vertical lines for certain dates. Time data are POSIXct format. I would like to use the awesome interactive interface of Plotly and use my ggplot objects in it.
The problem is that these "time markers" doesn't show in after using ggplotly(). I ave already tried with plotly::add_segments() but it does not work.
Here are two reproductible examples :
1. With non-POSIXct data it works fine
# dummy dataset
df2 = data.frame(id = 1:10, measure = runif(10, 0, 20))
events2 = data.frame(number = c(2,3,8))
# ggplot graph
p2 = ggplot() + geom_line(data = df2, aes(x = id, y = measure)) +
geom_vline(data = events2, aes(xintercept = events2$number), color = "red")
p2
# plotly graph that displays the geom_vline properly
ggplotly(p2)
2. With POSIXct data is doesn't display the correct result
# dummy dataset
df = data.frame(date = seq(as.POSIXct("2017-07-01", tz = "UTC", format = "%Y-%m-%d"),
as.POSIXct("2018-04-15", tz = "UTC", format = "%Y-%m-%d"),
"1 month"),
measure = runif(10, 0, 20))
events = data.frame(date_envents = as.POSIXct(c("2017-10-12", "2017-11-12", "2018-03-15"), tz = "UTC", format = "%Y-%m-%d"))
# ggplot graph
p = ggplot() + geom_line(data = df, aes(x = date, y = measure)) +
geom_vline(data = events, aes(xintercept = events$date), color = "red")
p
# plotly graph that does not display the geom_vline properly
ggplotly(p)
I have seen some workaround (like this one : Add vertical line to ggplotly plot) but it is "complicated". Is there a more simple way to solve this problem ?
I am using Windows 10 with R version 3.5.0, RStudio and the following packages :
library(tidyverse) and library(plotly)
A simple workaround is to set the xintecept of the geom_vline to numeric.
sample data
df = data.frame(date = seq(as.POSIXct("2017-07-01", tz = "UTC", format = "%Y-%m-%d"),
as.POSIXct("2018-04-15", tz = "UTC", format = "%Y-%m-%d"),
"1 month"),
measure = runif(10, 0, 20))
events = data.frame(date_envents = as.POSIXct(c("2017-10-12", "2017-11-12", "2018-03-15"), tz = "UTC", format = "%Y-%m-%d"))
code
p = ggplot() + geom_line(data = df, aes(x = date, y = measure)) +
geom_vline(data = events, aes(xintercept = as.numeric(events$date)), color = "red")
result
ggplotly(p)
I struggle to add a line on top with the total of this stacked barchart:
demandDriversdf = structure(list(year = structure(c(1356998400, 1388534400, 1420070400,
1451606400, 1483228800), tzone = "GMT", tclass = c("POSIXct",
"POSIXt"), class = c("POSIXct", "POSIXt")), one= c(12.4882571461364,
13.0984912135388, 12.1908055157534, 8.35335266490711, 4.38593754938248
), two= c(8.73113484771066, -4.34931681021004, -3.04955505552055,
-1.69136803847247, 3.06500464975644), three= c(0.0669199673877559,
-0.194488564805058, 0.721483847234409, 2.85829802643513, 6.14894193920574
), four= c(6.98748008979101, 3.7122726468811, -15.0029846301367,
-20.3768539034347, 9.38948700033012)), .Names = c("year", "one",
"two", "three", "four"), row.names = c("2013-01-01", "2014-01-01",
"2015-01-01", "2016-01-01", "2017-01-01"), class = "data.frame")
demandDriversdf2 = reshape2::melt(demandDriversdf, id.vars=c("year"), value.name="driver")
rowS = rowSums(demandDriversdf[,setdiff(colnames(demandDriversdf),"year")])
demandDriversdf2 = rbind(demandDriversdf2, data.frame(year = names(rowS), variable="Total", driver = rowS))
demandDriversdf2$year=substr(demandDriversdf2$year,1,4)
demandDriversdf2_1 <- subset(demandDriversdf2,driver >= 0 & variable!="Total")
demandDriversdf2_2 <- subset(demandDriversdf2,driver < 0 & variable!="Total")
gdemandDrivers = ggplot2::ggplot() +
ggplot2::geom_bar(data = demandDriversdf2_1, aes(x=year, y=driver, fill=variable),stat = "identity") +
ggplot2::geom_bar(data = demandDriversdf2_2, aes(x=year, y=driver, fill=variable),stat = "identity") +
ggplot2::geom_line(data = subset(demandDriversdf2, variable=="Total"), aes(x=year, y=driver)) +
ggplot2::scale_fill_brewer(palette = 2, type = "qual")
I get this strange warning
geom_path: Each group consists of only one observation. Do you need to
adjust the group aesthetic?
and ideally would like the line to be in black with also black points.
Just add group = 1 to aes() in geom_line():
library(ggplot2)
ggplot() +
geom_bar(data = demandDriversdf2_1, aes(x=year, y=driver, fill=variable),stat = "identity") +
geom_bar(data = demandDriversdf2_2, aes(x=year, y=driver, fill=variable),stat = "identity") +
geom_line(data = subset(demandDriversdf2, variable=="Total"),
aes(x=year, y=driver, group = 1)) +
scale_fill_brewer(palette = 2, type = "qual")
The reason for this:
For line graphs, the data points must be grouped so that it knows which points to connect. In this case, it is simple -- all points should be connected, so group=1. When more variables are used and multiple lines are drawn, the grouping for lines is usually done by variable.
Reference: Cookbook for R, Chapter: Graphs Bar_and_line_graphs_(ggplot2), Line graphs.
I am new to R and ggplot2.I have searched a lot regarding this but I could not find the solution.
Sample observation1 observation2 observation3 percentage
sample1_A 163453473 131232689 61984186 30.6236955883
Sample1_B 170151351 137202212 59242536 26.8866816109
sample2_A 194102849 162112484 89158170 40.4183031852
sample2_B 170642240 141888123 79925652 41.7493687378
sample3_A 192858504 161227348 90532447 41.8068248626
sample3_B 177174787 147412720 81523935 40.5463120438
sample4_A 199232380 174656081 118115358 55.6409038531
sample4_B 211128931 186848929 123552556 54.7201927527
sample5_A 186039420 152618196 87012356 40.9656544833
sample5_B 145855252 118225865 66265976 39.5744515254
sample6_A 211165202 186625116 112710053 48.5457722338
sample6_B 220522502 193191927 114882014 47.238670909
I am planning to plot a bar plot with ggplot2. I want to plot the first three columns as a bar plot "dodge" and label the observation3 bar with the percentage. I could plot the bars as below but I could not use geom_text() to add the label.
data1 <- read.table("readStats.txt", header=T)
data1.long <- melt(data1)
ggplot(data1.long[1:36,], aes(data1.long$Sample[1:36],y=data1.long$value[1:36], fill=data1.long$variable[1:36])) + geom_bar(stat="identity", width=0.5, position="dodge")
Transform data1 to long form with the observation columns as the measure variables and the Sample and percentage columns as the id variables. Compute the maximum value, mx, to be used to place the percentages. Then perform the plot. Note that geom_bar uses data1.long but geom_text uses data1. We have colored the text giving the percentages the same color as the observation3 bars. (See this post for how to specify default colors.) Both inherit aes(x = Sample) but use different y and other aesthetics. We clean up the X axis labels by removing all lower case letters and underscores from the data1$Sample (optional).
library(ggplot2)
library(reshape2)
data1.long <- melt(data1, measure = 2:4) # cols 2:4 are observation1, ..., observation3
mx <- max(data1.long$value) # maximum observation value
ggplot(data1.long, aes(x = Sample, y = value)) +
geom_bar(aes(fill = variable), stat = "identity", width = 0.5, position = "dodge") +
geom_text(aes(y = mx, label = paste0(round(percentage), "%")), data = data1,
col = "#619CFF", vjust = -0.5) +
scale_x_discrete(labels = gsub("[a-z_]", "", data1$Sample))
(click on chart to enlarge)
Note: We used this data. Note that one occurrence of Sample was changed to sample with a lower case s:
Lines <- "Sample observation1 observation2 observation3 percentage
sample1_A 163453473 131232689 61984186 30.6236955883
sample1_B 170151351 137202212 59242536 26.8866816109
sample2_A 194102849 162112484 89158170 40.4183031852
sample2_B 170642240 141888123 79925652 41.7493687378
sample3_A 192858504 161227348 90532447 41.8068248626
sample3_B 177174787 147412720 81523935 40.5463120438
sample4_A 199232380 174656081 118115358 55.6409038531
sample4_B 211128931 186848929 123552556 54.7201927527
sample5_A 186039420 152618196 87012356 40.9656544833
sample5_B 145855252 118225865 66265976 39.5744515254
sample6_A 211165202 186625116 112710053 48.5457722338
sample6_B 220522502 193191927 114882014 47.238670909"
data1 <- read.table(text = Lines, header = TRUE)
UPDATE: minor improvements
It might be that G. Grothendieck's answer is a better solution, but here's my suggestion (code below)
# install.packages("ggplot2", dependencies = TRUE)
require(ggplot2)
df <- structure(list(Sample = structure(1:12, .Label = c("sample1_A",
"Sample1_B", "sample2_A", "sample2_B", "sample3_A", "sample3_B",
"sample4_A", "sample4_B", "sample5_A", "sample5_B", "sample6_A",
"sample6_B"), class = "factor"), observation1 = c(163453473L,
170151351L, 194102849L, 170642240L, 192858504L, 177174787L, 199232380L,
211128931L, 186039420L, 145855252L, 211165202L, 220522502L),
observation2 = c(131232689L, 137202212L, 162112484L, 141888123L,
161227348L, 147412720L, 174656081L, 186848929L, 152618196L,
118225865L, 186625116L, 193191927L), observation3 = c(61984186L,
59242536L, 89158170L, 79925652L, 90532447L, 81523935L, 118115358L,
123552556L, 87012356L, 66265976L, 112710053L, 114882014L),
percentage = c(30.6236955883, 26.8866816109, 40.4183031852,
41.7493687378, 41.8068248626, 40.5463120438, 55.6409038531,
54.7201927527, 40.9656544833, 39.5744515254, 48.5457722338,
47.238670909)), .Names = c("Sample", "observation1", "observation2",
"observation3", "percentage"), class = "data.frame", row.names = c(NA,
-12L))
# install.packages("reshape2", dependencies = TRUE)
require(reshape2)
data1.long <- melt(df, id=c("Sample"), measure.var = c("observation1", "observation2", "observation3"))
data1.long$percentage <- paste(round(data1.long$percentage, 2), "%", sep="")
data1.long[data1.long$variable == "observation1" | data1.long$variable == "observation2" ,2] <- ""
ggplot(data1.long, aes(x = Sample, y = value, fill=variable)) +
geom_bar(, stat="identity", width=0.5, position="dodge") +
geom_text(aes(label = percentage), vjust=2.10, size=2, hjust=-.06, angle = 90)