r ggplot not recognizing date format - r

I have the following chart.
p1 <- ggplot(data = mydat, aes(x = time))+
geom_line(aes(y = sumabsdiff, colour = 'sumabsdiff'))+
geom_line(aes(y = windsize, col='windsize'))+
scale_x_time(breaks = scales::date_breaks('1 sec'))+ #('15 secs'))+
scale_color_manual(values=c('sumabsdiff' = 'black',
"windsize" = "red"))+
theme(legend.position = "top")
As you can see, the date is all messed up even though time seems perfectly fine to me.
> mydat$time
[1] "2022-09-19 12:44:47 UTC" "2022-09-19 12:44:48 UTC" "2022-09-19 12:44:49 UTC" "2022-09-19 12:44:50 UTC"
[5] "2022-09-19 12:44:50 UTC" "2022-09-19 12:44:50 UTC".
Any idea why?
Data:
mydf <- structure(list(time = structure(c(1663591487.801, 1663591488.614,
1663591489.626, 1663591490.097, 1663591490.202, 1663591490.717
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), bid = c(11735.68,
11735.18, 11734.93, 11734.43, 11734.3, 11734.43), ask = c(11737.58,
11737.08, 11736.83, 11736.33, 11736.2, 11736.33), flags = c(6,
6, 6, 6, 6, 6), typical = c(11736.63, 11736.13, 11735.88, 11735.38,
11735.25, 11735.38), row = 266:271, prevrow_short = c(258L, 258L,
260L, 261L, 262L, 265L), windsize = c(9, 10, 9, 9, 9, 7), diff = c(-0.119999999998981,
-0.5, -0.25, -0.5, -0.130000000001019, 0.130000000001019), absdiff = c(0.119999999998981,
0.5, 0.25, 0.5, 0.130000000001019, 0.130000000001019), sumabsdiff = c(3.60999999999694,
4.10999999999694, 3.72999999999593, 3.85999999999694, 3.61999999999898,
2.13000000000102), positive = c(FALSE, FALSE, FALSE, FALSE, FALSE,
TRUE), meanpos = c(0.444444444444444, 0.4, 0.333333333333333,
0.222222222222222, 0.222222222222222, 0.285714285714286), posdiff = c(0,
0, 0, 0, 0, 0.130000000001019), negdiff = c(0.119999999998981,
0.5, 0.25, 0.5, 0.130000000001019, 0), sumposdiff_short = c(1.36999999999898,
1.36999999999898, 1.23999999999796, 0.869999999998981, 0.869999999998981,
0.630000000001019), sumnegdiff_short = c(2.23999999999796, 2.73999999999796,
2.48999999999796, 2.98999999999796, 2.75, 1.5), power_short = c(0.37950138504159,
0.333333333333333, 0.332439678283999, 0.225388601036184, 0.240331491712493,
0.295774647887661), market_open = c(FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE), timediff = c(0.219000101089478, 0.812999963760376,
1.01199984550476, 0.470999956130981, 0.105000019073486, 0.515000104904175
), avgspeed = c(2.71247733209185, 2.42072135038066, 2.44698207323042,
2.3255814641193, 2.8019926211971, 2.14658085742225), relative_positive_diff = c(0.37950138504159,
0.333333333333333, 0.332439678283999, 0.225388601036184, 0.240331491712493,
0.295774647887661), timesec = structure(c(1663591487, 1663591488,
1663591489, 1663591490, 1663591490, 1663591490), class = c("POSIXct",
"POSIXt"), tzone = "UTC")), pandas.index = <environment>, row.names = 266:271, class = "data.frame")
By the way, the time actually includes milliseconds, perhaps that is the cause

?scale_x_time:
These are the default scales for the three date/time class. These
will usually be added automatically. To override manually, use
scale_*_date for dates (class 'Date'), scale_*_datetime for
datetimes (class 'POSIXct'), and scale_*_time for times (class
'hms').
Your time variable is class POSIXt, not hms, so you should be using scale_x_datetime instead.
ggplot(data = mydf, aes(x = time))+
geom_line(aes(y = sumabsdiff, colour = 'sumabsdiff'))+
geom_line(aes(y = windsize, col='windsize'))+
scale_x_datetime(breaks = "1 secs") +
scale_color_manual(values=c('sumabsdiff' = 'black',
"windsize" = "red"))+
theme(legend.position = "top")
You can format the axis labels with date_labels= and %-codes (listed in ?strptime):
ggplot(data = mydf, aes(x = time))+
geom_line(aes(y = sumabsdiff, colour = 'sumabsdiff'))+
geom_line(aes(y = windsize, col='windsize'))+
# scale_x_datetime(breaks = scales::date_breaks('1 sec'))+ #('15 secs'))+
scale_x_datetime(breaks = "1 sec", date_labels = "%H:%M:%S") +
scale_color_manual(values=c('sumabsdiff' = 'black',
"windsize" = "red"))+
theme(legend.position = "top")

Related

Trying to replicate a visualisation in R

Relatively inexperienced R user. I am trying to create something similar to the visualisation below with data for another country.
I've gone as far as creating the basic structure with data plotted in a vertical annual timeline with months running along the x axis but I have no idea how to edit the individual data points. I would appreciate any idea on how to move forward or even a completely different approach.
Here is my code using ggplot2:
p <- ggplot(forestfiresv, aes(y=year, x=dtstart))
p+geom_point() +
scale_x_datetime(lim=as.POSIXct(c("2021-01-01 00:01","2021-12-31 00:00", origin=lubridate::origin), "%m/%d %H:%M",tz="UTC"),expand = c(0,0), date_breaks="2 months", labels = date_format("%b"))+
theme_bw()
A data sample:
structure(list(year = c("2000", "2000", "2000", "2000", "2000",
"2000", "2000", "2000", "2000", "2000"), `Start date` = structure(c(11174, 11167, 11166, 11191,
11222, 11144, 11151, 11192, 11244, 11187), class = "Date"), `Start time` = c("02:15",
"16:05", "10:47", "15:41", "23:30", "15:29", "14:00", "13:53",
"17:39", "11:09"), `End date` = structure(c(11174,
11178, 11166, 11192, 11223, 11146, 11152, 11197, 11244, 11191
), class = "Date"), `End time` = c("14:00", "07:00", "19:00",
"22:00", "02:00", "12:00", "00:10", "13:30", "19:07", "11:30"
), Δάση = c(200, 1400, 400, 0, 0, 0, 600, 2000, 0, 260), `Forest` = c(800,
0, 0, 100, 100, 700, 0, 0, 0, 0), `Agricultural land` = c(0, 0, 0, 200, 0, 0, 200, 500, 0, 0), totalareaburnt = c(1000, 1400, 400, 500, 500, 700, 800, 2500, 350, 360), dtstart = structure(c(1628129700, 1627574700, 1627469220, 1629646860, 1632353400, 1625585340, 1626184800, 1629726780, 1634233140, 1629284940), class = c("POSIXct", "POSIXt"), tzone = "UTC"), dtend = structure(c(1628172000, 1628492400, 1627498800, 1629756000, 1632362400, 1625745600, 1626221400, 1630157400, 1634238420, 1629631800), class = c("POSIXct", "POSIXt"), tzone = "UTC")), .internal.selfref = <pointer: (nil)>, row.names = c(NA, 10L), class = c("data.table", "data.frame"))
This is the best I've obtained so far, but I bet it could be better. I've increased your example data frame because there was only one year of observation and I've injected some randomness to make the plot look better.
library(ggplot2)
ddf <- rbind(df,df,df,df,df,df,df,df,df,df)
ddf$year <- rep(2000:2009,each=10)
ddf$totalareaburnt <- sample(200:2500,100,replace = T)
ddf$dtstart <- ddf$dtstart+sample(86400*1:90,100,replace = T)
#duration in days
ddf$duration <- as.numeric(df$dtend-df$dtstart)/24
ddf$year <- as.integer(ddf$year)
ggplot(ddf,
aes(y = year,
x = dtstart)) +
geom_point(aes(size = totalareaburnt,
col = duration),
shape = 17,
alpha = 0.7) +
scale_x_datetime(
lim = as.POSIXct(
c("2021-01-01 00:01", "2021-12-31 00:00", origin = lubridate::origin),
"%m/%d %H:%M",
tz = "UTC"
),
expand = c(0, 0),
date_breaks = "1 months",
labels = scales::date_format("%b")
) +
theme_minimal() +
theme(
legend.position = "top",
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
axis.line = element_line(),
axis.ticks = element_line()
) +
scale_y_continuous(trans = "reverse", breaks = unique(ddf$year))+
scale_colour_gradientn(name= "Duartion (day)",colours = c( "yellow", "orange","darkred"))+
scale_size_continuous(name="Area burned (ha)")

Using segment labels in ggplot with ggrepel with smooth segments

This is my dataframe:
df<-structure(list(year = c(1984, 1984), team = c("Australia", "Brazil"
), continent = c("Oceania", "Americas"), medal = structure(c(3L,
3L), .Label = c("Bronze", "Silver", "Gold"), class = "factor"),
n = c(84L, 12L)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))
And this is my ggplot (my question is related to the annotations regard Brazil label):
ggplot(data = df)+
geom_point(aes(x = year, y = n)) +
geom_text_repel(aes(x = year, y = n, label = team),
size = 3, color = 'black',
seed = 10,
nudge_x = -.029,
nudge_y = 35,
segment.size = .65,
segment.curvature = -1,
segment.angle = 178.975,
segment.ncp = 1)+
coord_flip()
So, I have a segment divided by two parts. On both parts I have 'small braks'. How can I avoid them?
I already tried to use segment.ncp, change nudge_xor nudge_ynut its not working.
Any help?
Not really sure what is going on here. This is the best I could generate by experimenting with variations to the input values for segment... arguments.
There is some guidance at: https://ggrepel.slowkow.com/articles/examples.html which has an example with shorter leader lines, maybe that's an approach you could use.
df<-structure(list(year = c(1984, 1984), team = c("Australia", "Brazil"
), continent = c("Oceania", "Americas"), medal = structure(c(3L,
3L), .Label = c("Bronze", "Silver", "Gold"), class = "factor"),
n = c(84L, 12L)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))
library(ggplot2)
library(ggrepel)
ggplot(data = df)+
geom_point(aes(x = year, y = n)) +
geom_text_repel(aes(x = year, y = n, label = team),
size = 3, color = 'black',
seed = 1,
nudge_x = -0.029,
nudge_y = 35,
segment.size = 0.5,
segment.curvature = -0.0000002,
segment.angle = 1,
segment.ncp = 1000)+
coord_flip()
Created on 2021-08-26 by the reprex package (v2.0.0)

How to specify a certain csv in the errorbar line

I am trying to make a plot with three different csvs. In 2 of them, the columns are the same i.e. Year, GMSL and GMSLerror.
In the Frederikse file the columns are Year, GMSL, GMSLerrorlow and GMSLerrorup. How can I tell R to plot the Frederikse error using the columns GMSLerrorlow and GMSLerrorup? I tried the following but it did not work. Thanks.
p1<-files <- c("Frederikse.csv", "ChurchandWhite.csv","Hay.csv")
map_dfr(files, ~ read_csv(.x) %>%
mutate(Author = .x)) %>%
ggplot(aes(x = Time, y = GMSL, color = Author,fill=Author)) +
geom_line(size=0.6)+
theme_bw(12)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
labs(x = "Year", y = "GMSL (mm)",color="Author")+
geom_errorbar(aes(ymin=GMSL-GMSLerror, ymax =GMSL+GMSLerror,alpha=Author))+
geom_errorbar("Frederikse.csv",(aes(ymin=GMSL-GMSLerrorlow, ymax =GMSL+GMSLerrorup,alpha=Author)))
scale_alpha_manual(values = c(0.3, 0.3, 0.8))+
scale_colour_manual(values=c("#BAB3F0","#1D3E72","#201641"))
p1
structure(list(Year = 1900:1905, GMSLerrorlow = c(-203.5572666,
-201.0185091, -212.0740442, -202.6975639, -200.1670151, -192.1312551
), GMSL = c(-173.2614421, -168.8016753, -180.389967, -170.2678322,
-168.7200709, -160.9814287), GMSLerrorup = c(-141.002807, -135.8976091,
-148.213824, -138.9305182, -137.4501224, -130.3514508)), row.names = c(NA,
6L), class = "data.frame")
structure(list(Time = 1900:1905, GMSL = c(-131.15, -130.5, -129.77,
-128.85, -128.1, -127.56), GMSLerror = c(25.32, 25.17, 25.01,
24.86, 24.7, 24.55)), row.names = c(NA, 6L), class = "data.frame")
structure(list(Time = c(1880.0417, 1880.125, 1880.2083, 1880.2917,
1880.375, 1880.4583), GMSL = c(-183, -171.1, -164.3, -158.2,
-158.7, -159.6), GMSLerror = c(24.2, 24.2, 24.2, 24.2, 24.2,
24.2)), row.names = c(NA, 6L), class = "data.frame")````
You can do this with mutate to make GMSLerrorlow column for all datasets
p1<-files <- c("Frederikse.csv", "ChurchandWhite.csv","Hay.csv")
set_names(files) %>% # give names - can use str_remove to drop `.csv` from names
map_dfr( ~ read_csv(.x), .id = "Author") %>% #use .id argument
mutate(
GMSLerrorlow = if_else(Author != "Frederikse.csv", GMSLerror, GMSLerrorlow),
GMSLerrorup = if_else(Author != "Frederikse.csv", GMSLerror, GMSLerrorup)
) %>%
ggplot(aes(x = Time, y = GMSL, color = Author,fill=Author)) +
geom_line(size=0.6)+
theme_bw(12)+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
labs(x = "Year", y = "GMSL (mm)",color="Author")+
geom_errorbar(aes(ymin=GMSL-GMSLerrorlow, ymax =GMSL+GMSLerrorup,alpha=Author))+
scale_alpha_manual(values = c(0.3, 0.3, 0.8))+
scale_colour_manual(values=c("#BAB3F0","#1D3E72","#201641"))

Not able to print forestplot in high resolution format in R

I need to create a forestplot of high resolution. I used the forestplot() function from library(forestplot) to create my plot, and then attempted to use the tiff() function to create a high resolution image for publication. However, my image turned blank.
It works if I export directly from R but not as high resolution as it was supposed to.
library(forestplot)
df <- structure(list(
mean = c(NA, 0.22, 0.20, 0.27),
lower = c(NA, 0.05, 0.04, 0.01),
upper = c(NA, 0.95, 1.08, 9.12)),
.Names = c("mean", "lower", "upper"),
row.names = c(NA, -4L),
class = "data.frame")
tabletext <- cbind(
c("", "Pooled", "Group 1", "Group 2"),
c("N", "4334", "3354", "980"),
c("HR (95% CI)", "0.22 (0.05, 0.95)", "0.20 (0.04, 1.08)", "0.27 (0.01, 9.12)"),
c("p-value", "0.042", "0.061", "0.467")
)
ggfp <- forestplot(tabletext,
df,
new_page = TRUE,
is.summary = c(TRUE, rep(FALSE, 3)),
clip = c(0, 2),
colgap = unit(5, "mm"),
line.margin = unit(2, "mm"),
lineheight = unit(1, "in"),
txt_gp = fpTxtGp(label = gpar(cex = 1),
ticks = gpar(cex = 1)),
align = c("l", "c", "c", "c"),
boxsize = 0.2,
xticks = seq(0, 2.0, 0.5),
zero = 1,
col = fpColors(box = "royalblue",
line = "darkblue"),
mar = unit(c(-1, 0.5, -2, 0.5), "in"))
tiff("forestplot.tiff", units = "in", width = 9, height = 7, res = 300)
ggfp
dev.off()
The file was created but it was a blank page
This works for me (output file is 17MB):
library(forestplot)
setwd("/path/to/directory/for/plot")
df <- structure(list(
mean = c(NA, 0.22, 0.20, 0.27),
lower = c(NA, 0.05, 0.04, 0.01),
upper = c(NA, 0.95, 1.08, 9.12)),
.Names = c("mean", "lower", "upper"),
row.names = c(NA, -4L),
class = "data.frame")
tabletext <- cbind(
c("", "Pooled", "Group 1", "Group 2"),
c("N", "4334", "3354", "980"),
c("HR (95% CI)", "0.22 (0.05, 0.95)", "0.20 (0.04, 1.08)", "0.27 (0.01, 9.12)"),
c("p-value", "0.042", "0.061", "0.467")
)
tiff("forestplot.tiff", units = "in", width = 9, height = 7, res = 300)
forestplot(tabletext,
df,
new_page = TRUE,
is.summary = c(TRUE, rep(FALSE, 3)),
clip = c(0, 2),
colgap = unit(5, "mm"),
line.margin = unit(2, "mm"),
lineheight = unit(1, "in"),
txt_gp = fpTxtGp(label = gpar(cex = 1),
ticks = gpar(cex = 1)),
align = c("l", "c", "c", "c"),
boxsize = 0.2,
xticks = seq(0, 2.0, 0.5),
zero = 1,
col = fpColors(box = "royalblue",
line = "darkblue"),
mar = unit(c(-1, 0.5, -2, 0.5), "in"))
dev.off()

Add p-values from own formula to ggplot2

I would like to add different p-values from an specific formula in a plot. I need different p-values from each of the subjects. Here is the code I used, which did not work:
formula <- lme(scale(Inactive.freq)~ scale(Time.point), random=~ 1|Subject, data=Freq_df, method='ML')
gggplot(Freq_df, aes(x=Time.point, y=Inactive.freq, group=Subject,colour=Subject)) +
geom_line(size=2)+
theme_minimal()+
geom_point()+
stat_smooth(method=lm, se = FALSE,linetype ="dashed")+
geom_smooth(method = "lm", formula = formula)+
stat_poly_eq(aes(label = paste(stat(eq.label),
stat(adj.rr.label), sep = "~~~~")), formula = formula, parse = TRUE) +
stat_fit_glance(label.x.npc = "right", label.y.npc = "bottom", geom = "text",
aes(label = paste("P-value = ", signif(..p.value.., digits = 3), sep = "")))
I would appreciate any help. Thank you!
UPDATE
My data:
structure(list(Subject = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label =
c("Caesar",
"DL", "Kyosti", "Paul", "Richards", "Taylor"), class = "factor"),
Time.point = c(1, 3, 4, 5, 6, 7), Pacing.freq = c(0.644444444444444,
0.562962962962963, 0.411111111111111, 0.122222222222222,
0, 0), Affiliative.freq = c(0.0703125, 0.138576779026217,
0.00760456273764259, 0.00617283950617284, 0.0634920634920635,
0.0629370629370629), Inactive.freq = c(0, 0, 0.174904942965779,
0.518518518518518, 0.290322580645161, 0.172661870503597),
Not.alert.alone.freq = c(0, 0, 0.174904942965779, 0.518518518518518,
0.279569892473118, 0.165467625899281), Not.alert.with.cagemate.freq = c(0,
0, 0, 0, 0.0108695652173913, 0.00719424460431655), Alert.with.cagemate.freq = c(0.06640625,
0.0262172284644195, 0, 0, 0, 0.00719424460431655), Non_visible = c(15L,
3L, 7L, 18L, 84L, 131L), Visible = c(255L, 267L, 263L, 162L,
186L, 139L)), row.names = c(NA, 6L), class = "data.frame")
This can be done using another layer with the "stat_fit_glance" method provided with the package ggpmisc (which you are already using, I believe...). It's a great package with lot more capabilities for annotating ggplot2.
The solution would be:
The modified data
Freq_df <- structure(list(Subject = as.factor(c(rep("Caesar", 3), rep("DL", 3))),
Time.point = c(1, 3, 4, 5, 6, 7),
Pacing.freq = c(0.644444444444444, 0.562962962962963,
0.411111111111111, 0.122222222222222, 0, 0),
Affiliative.freq = c(0.0703125, 0.138576779026217, 0.00760456273764259,
0.00617283950617284, 0.0634920634920635, 0.0629370629370629),
Inactive.freq = c(0, 0, 0.174904942965779, 0.518518518518518,
0.290322580645161, 0.172661870503597),
Not.alert.alone.freq = c(0, 0, 0.174904942965779, 0.518518518518518,
0.279569892473118, 0.165467625899281),
Not.alert.with.cagemate.freq = c(0, 0, 0, 0,
0.0108695652173913, 0.00719424460431655),
Alert.with.cagemate.freq = c(0.06640625, 0.0262172284644195, 0, 0, 0,
0.00719424460431655),
Non_visible = c(15L, 3L, 7L, 18L, 84L, 131L),
Visible = c(255L, 267L, 263L, 162L, 186L, 139L)),
row.names = c(NA, 6L), class = "data.frame")
The data needed to be changed, as a line cannot be fitted unless at least two data points are there, whereas you provided one data point per subject. So I limited it to two subjects with three points per subject. But you get the idea :)
The plotting code
ggplot(Freq_df, aes(x = Time.point, y = Pacing.freq)) + ylim(-0.5, 1.5) +
geom_line(size=2, alpha = 0.5) + geom_point(aes(group = "Subject"), size = 3) +
geom_smooth(method = "lm", formula = formula) + facet_wrap('Subject') +
stat_poly_eq(aes(label = paste(stat(eq.label), stat(adj.rr.label),
sep = "~~~~")), formula = formula, parse = TRUE) +
stat_fit_glance(label.x.npc = "right", label.y.npc = "bottom", geom = "text",
aes(label = paste("P-value = ", signif(..p.value.., digits = 15),
sep = "")))
EDIT 1:
#another way to use `stat_fit_glance` (not shown in the graph here)
stat_fit_glance(label.x = "right", label.y = "bottom",
aes(label = sprintf('r^2~"="~%.3f~~italic(p)~"="~%.2f',
stat(r.squared), stat(p.value))), parse = T)
`Facet-wrap' will do the trick if you need seperate p-values (seperate line-fitting) per group (and also not too many groups I believe... there must be a limit to number of facets allowed, which I don't know!).
OUTPUT
Play with the options to get desired output, e.g. if you use label.x.npc = "left" & label.y.npc = "bottom", then the regression equation & the p value labels might overlap.

Resources