Too many data points on moisture data - r

When I create my graph with geom line I get this graph
Here is my code
ggplot(Moisture_kurokawa, aes(x = Date))+ geom_line(aes(y = W5, colour = "W5"))+ geom_line(aes(y = W7, colour = "W7"))+ geom_line(aes(y = W9, colour = "W9"))+ geom_line(aes(y = W11, colour = "W11"))
Any help on how to get it smoothen or see all the data points?
My data file link.

You should take some time to read through some ggplot2 tutorials so you can let it do some of the work for you, especially if you do a bit of data rearranging.
You also need to have a proper date+time object for the resolution you want.
library(tidyverse)
Moisture_kurokawa <- read_csv("~/Data/Moisture kurokawa.csv")
mutate(Moisture_kurokawa,
timestamp = lubridate::mdy_hms(sprintf("%s %s", Date, Time))) %>%
select(-Date, -Time) %>%
gather(W, value, -timestamp) -> moisture_long
moisture_long
## # A tibble: 17,645 x 3
## timestamp W value
## <dttm> <chr> <dbl>
## 1 2017-06-24 00:00:00 W5 0.333
## 2 2017-06-24 00:30:00 W5 0.333
## 3 2017-06-24 01:00:00 W5 0.334
## 4 2017-06-24 01:30:00 W5 0.334
## 5 2017-06-24 02:00:00 W5 0.334
## 6 2017-06-24 02:30:00 W5 0.334
## 7 2017-06-24 03:00:00 W5 0.335
## 8 2017-06-24 03:30:00 W5 0.335
## 9 2017-06-24 04:00:00 W5 0.335
## 10 2017-06-24 04:30:00 W5 0.335
## # ... with 17,635 more rows
ggplot(moisture_long, aes(timestamp, value, group=W, color=W)) +
geom_line()
With your data in a better shape, you can even do:
ggplot(moisture_long, aes(timestamp, value, group=W, color=W)) +
geom_line() +
facet_wrap(~W)

Moisture_kurokawa <- read.table("Moisture kurokawa.csv", header=T, sep=",")
# Create a datetime object with as.POSIXct
Moisture_kurokawa$DateTime <- as.POSIXct(
paste0(Moisture_kurokawa$Date, Moisture_kurokawa$Time),
format="%m/%d/%Y %H:%M")
library(ggplot2)
ggplot(Moisture_kurokawa, aes(x = DateTime))+
geom_line(aes(y = W5, colour = "W5"))+
geom_line(aes(y = W7, colour = "W7"))+
geom_line(aes(y = W9, colour = "W9"))+
geom_line(aes(y = W11, colour = "W11"))

Related

How to use geom_rect to highlight specified facets

I have a facet plot that I need to place a rectangle in or highlight 3 specific facets. Facets 5, 6, and 10. See Below:
I found some code referring to "geom_rect" that seems like it may work but it won't show up, also doesn't give me any error message. Here is the code:
weekly_TS_PDF<- ggplot(TS_stack, aes(x= TS_log, y = TS_depth, color= sentiment)) +
scale_y_reverse(limits= c(16,2), breaks= seq(16,2)) +
geom_rect(data = data.frame(Week = 5), aes(xmin = -65, xmax = -55, ymin = 1, ymax = 16), alpha = .3, fill="grey", inherit.aes = F) +
geom_point() + facet_grid(.~ Week) + geom_hline(data = week_avg_15E, aes(yintercept = x), linetype = "solid") +
ylab("Target Depth (m)") + xlab("Mean Target Strength (dB)") + ggtitle("Mean TS by Depth by Week (12 hour resolution)") +
guides(color=guide_legend("Year"))
Reprex data:
X TS_depth Group.1 x TS_log Date_time AMPM Week sentiment
1 1 9.593093 2020-12-01 18:00:00 5.390264e-07 -62.68390 2020-12-01 18:00:00 PM 5 Year 1
2 2 9.550032 2020-12-02 06:00:00 4.022841e-07 -63.95467 2020-12-02 06:00:00 AM 6 Year 1
3 3 9.677069 2020-12-02 18:00:00 6.277191e-07 -62.02235 2020-12-02 18:00:00 PM 7 Year 1
4 4 9.679256 2020-12-03 06:00:00 3.501608e-07 -64.55732 2020-12-03 06:00:00 AM 8 Year 1
5 5 9.606380 2020-12-03 18:00:00 6.698625e-07 -61.74014 2020-12-03 18:00:00 PM 9 Year 1
6 6 9.548408 2020-12-04 06:00:00 4.464622e-07 -63.50215 2020-12-04 06:00:00 AM 10 Year 1
I just need to highlight or put a rectangle in facets 5,6, and 10. Any help is appreciated.

Creating continuous line graph in ggplot with NA values and adding secondary y axis

I would like to create a continuous time-series line graph. However, I have NA values in my data so the typical output is discontinuous. I tried using the na.omit argument but an error appears
Error in charToDate(x) : character string is not in a standard
unambiguous format"
Here is my script:
test <- read.csv(
file=paste0("testdata.csv"),
stringsAsFactors = FALSE)
test$Date <- as.Date(test$Date)
ggplot(na.omit(test), aes(x=Date, y=A))+
geom_line(na.rm=TRUE)+
xlab("") + ylab("A")+
(scale_x_date(breaks=date_breaks("1 month"),labels=date_format("%b")))+
scale_y_continuous(expand = c(0, 0), limits = c(28, 31))+
geom_point(shape=1)+
theme_bw()
Aside from that, I would also like to create a second y-axis in the same plot. I used sec.axis argument. The data for this axis also has NA values. However, since the first part of the script is having problems, I can not confirm if my code works. Here is the additional code:
geom_line(aes(y = B/20, colour ="B")) +
scale_y_continuous(expand=c(0,0), sec.axis = sec_axis(~.*20, bquote(B)))+
geom_point(shape=0)
Here is a portion of my data
Date
A
B
2020-09-23
28.2
NA
2020-09-30
NA
0.192
2020-10-01
28.4
NA
2020-10-07
28.6
NA
2020-10-14
28.8
NA
2020-10-21
28
NA
2020-10-28
NA
0.136
2020-11-01
28.5
NA
2020-11-04
27.6
NA
2020-11-11
27.9
NA
2020-11-18
27.9
NA
2020-11-25
NA
0.184
2020-12-01
28.1
NA
2020-12-02
28.4
NA
2020-12-09
29
NA
I'm not sure that this is what you want that portion of data's B have a lot of NA's.
comment: if na.omit to portion of data, nothing left so I cannot proceed with na.omit.
test2 <- test %>% mutate(Date = as.Date(Date))
test3 <- test2 %>%
select(Date, A) %>%
na.omit
test3 %>%
mutate(Date = as.Date(Date)) %>%
ggplot(aes(x=Date, y=A))+
geom_line(na.rm=TRUE)+
xlab("") + ylab("A")+
(scale_x_date(breaks=date_breaks("1 month"),labels=date_format("%b")))+
scale_y_continuous(expand = c(0, 0), limits = c(28, 31))+
geom_point(shape=1)+
theme_bw() +
geom_line(data = test2, aes(x = Date,y = B/20, colour ="B")) +
scale_y_continuous(expand=c(0,0), sec.axis = sec_axis(~.*20, bquote(B)))+
geom_point(shape=0,data = test2,aes(y = B* 20, colour ="B"))

ggplot `geom_segment()` fails to recognize `group_by()` specification

library(tidyverse)
library(lubridate)
library(stringr)
df <-
tibble(Date = as.Date(0:364, origin = "2017-07-01"), Value = rnorm(365)) %>%
mutate(Year = str_sub(Date, 1, 4),
MoFloor = floor_date(Date, unit = "month")) %>%
group_by(Year, MoFloor) %>%
mutate(MoAvgValue = mean(Value)) %>%
ungroup() %>%
group_by(Year) %>%
mutate(MinMoFloor = min(MoFloor),
MaxMoFloor = max(MoFloor),
YearAvgValue = mean(MoAvgValue))
#> # A tibble: 365 x 8
#> # Groups: Year [2]
#> Date Value Year MoFloor
#> <date> <dbl> <chr> <date>
#> 1 2017-07-01 -1.83 2017 2017-07-01
#> 2 2017-07-02 -2.13 2017 2017-07-01
#> 3 2017-07-03 1.49 2017 2017-07-01
#> 4 2017-07-04 0.0753 2017 2017-07-01
#> 5 2017-07-05 -0.437 2017 2017-07-01
#> 6 2017-07-06 -0.327 2017 2017-07-01
#> 7 2017-07-07 -1.28 2017 2017-07-01
#> 8 2017-07-08 0.280 2017 2017-07-01
#> 9 2017-07-09 1.24 2017 2017-07-01
#> 10 2017-07-10 0.0921 2017 2017-07-01
#> # ... with 355 more rows, and 4 more
#> # variables: MoAvgValue <dbl>,
#> # MinMoFloor <date>,
#> # MaxMoFloor <date>,
#> # YearAvgValue <dbl>
Let's first plot the data frame above.
ggplot(df, aes(MoFloor, MoAvgValue, group = Year)) +
facet_grid(~Year, scale = "free_x", space = "free_x") +
geom_point()
In my call to the facet_grid() function I added the arguments scale = "free_x" and space = "free_x" to get rid of empty white space on the plots.
When I go ahead and add geom_segment()s based on group_by()d data, the scale = "free_x" and space = "free_x" arguments are negated. The empty white space reappears!
ggplot(df, aes(MoFloor, MoAvgValue, group = Year)) +
facet_grid(~Year, scale = "free_x", space = "free_x") +
geom_point() +
geom_segment(data = df,
aes(x = min(MinMoFloor),
y = YearAvgValue,
xend = max(MaxMoFloor),
yend = YearAvgValue))
My df data frame is grouped by Year. Why doesn't the geom_segment() function recognize this when I enter (for example) the x = min(MinMoFloor) argument? geom_segment() is pulling the min(MinMoFloor) from the global column, instead of the grouped column. How do I geom_segment() to evaluate the MinMoFloor column as grouped data?

R shift scale at x axis date with a non-continuous sequence of time

I have a time serie data from two consecutive years (2017-2018), from january to december of each year. Then I need to plot the data from sept-17 to april-18.
I could do it with a very hand-made code, however I realize it could be done very much straightforward way with the packages availabe today for managing dates on plots (packages "scales", "lubridate", etc.)
Can someone help me to simplify my work for doing the second plot?
I will really appreciate it.
suppressWarnings(suppressMessages(library("tidyverse", quietly = T)))
dat <- tibble(
date = seq(as.Date("2017-01-01"), as.Date("2018-12-31"), by=1),
var = rgamma(length(date), shape=2, scale=2)) %>%
mutate(year = lubridate::year(date),
month = lubridate::month(date),
julian = lubridate::yday(date))
dat
#> # A tibble: 730 x 5
#> date var year month julian
#> <date> <dbl> <dbl> <dbl> <dbl>
#> 1 2017-01-01 12.9 2017 1 1
#> 2 2017-01-02 6.69 2017 1 2
#> 3 2017-01-03 6.11 2017 1 3
#> 4 2017-01-04 1.68 2017 1 4
#> 5 2017-01-05 1.22 2017 1 5
#> 6 2017-01-06 10.2 2017 1 6
#> 7 2017-01-07 5.13 2017 1 7
#> 8 2017-01-08 4.61 2017 1 8
#> 9 2017-01-09 3.79 2017 1 9
#> 10 2017-01-10 1.11 2017 1 10
#> # … with 720 more rows
dat %>%
ggplot() +
geom_line(aes(julian, var, color = factor(month), linetype=factor(year)))
dat %>%
filter((year == 2017 & month %in% c("9","10", "11", "12"))|
(year == 2018 & month %in% c("1", "2", "3"))) %>%
mutate(julian_AWS = ifelse(julian>=244, julian-243, julian+123)) %>%
ggplot() +
geom_line(aes(julian_AWS, var, color = factor(month), linetype=factor(year)))+
scale_x_continuous(breaks = c(1,#S
31,#O
61,#N
91,#D
121,#E
151,#F
181),#M
labels = c("Sep", "Oct", "Nov", "Dec", "Jan", "Feb", "Mar"))+
theme(axis.text.x=element_text(hjust=-1))
Created on 2019-05-05 by the reprex package (v0.2.1)
I don't think you need to delve into the julian date formats. See if this gets you what you need:
dat %>%
filter(date >= '2017-09-01', date < '2018-04-01') %>%
ggplot() +
geom_line(aes(date, var, color = factor(month), linetype = factor(year))) +
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
theme(axis.text.x = element_text(hjust = -1))
For more info on date label formats, see ?strftime

Using xblocks for interpolated values across time series in r

I'm having trouble figuring out how to use xblocks() to work. First, here's a small example from a much larger dataset:
data <- data.frame(
Date = sample(c("1993-07-05", "1993-07-05", "1993-07-05", "1993-08-30", "1993-08-30", "1993-08-30", "1993-08-30", "1993-09-04", "1993-09-04")),
Oxygen = sample(c("0.9", "0.4", "4.2", "5.6", "7.3", NA, "9.5", NA, "0.3")))
I then averaged values for each month using xts:
xtsAveragedata <- xts(Averagedata[-1], Averagedata[[1]])
xtsAverageMonthlyData <- apply.monthly(xtsAveragedata, FUN = mean)
Now, I linear interpolated my data:
Interpolateddata <- na.approx(xtsAverageMonthlyData)
I want to create a figure in which I use xblocks() or something similar to show the regions in my data where I used interpolation, something like this, which I found online:
How do I go about doing this for all values/automate for my entire dataset? There's no examples I could translate into something like this from the reference guide.
Thank you for your help. It is greatly appreciated.
So this doesn't use xts or zoo, but maybe this walkthrough will be helpful. I am using a slightly larger (and daily) dataset, but it should be reproducible:
library(tidyverse)
library(lubridate)
set.seed(4)
df <- tibble(
Date = seq.Date(ymd("1993-07-01"), by = "1 day", length.out = 100),
Oxygen = runif(100, 0, 10)
)
# Randomly assign 20 records to NA
df[sample(1:nrow(df), 20), "Oxygen"] <- NA
df_for_plot <- df %>%
arrange(Date) %>%
group_by(month(Date)) %>%
mutate(
is_na = is.na(Oxygen),
month_avg = mean(Oxygen, na.rm = TRUE),
oxygen_to_plot = if_else(is_na, month_avg, Oxygen)
)
df_for_plot
#> # A tibble: 100 x 6
#> # Groups: month(Date) [4]
#> Date Oxygen `month(Date)` is_na month_avg oxygen_to_plot
#> <date> <dbl> <dbl> <lgl> <dbl> <dbl>
#> 1 1993-07-01 5.86 7 FALSE 5.87 5.86
#> 2 1993-07-02 0.0895 7 FALSE 5.87 0.0895
#> 3 1993-07-03 2.94 7 FALSE 5.87 2.94
#> 4 1993-07-04 2.77 7 FALSE 5.87 2.77
#> 5 1993-07-05 8.14 7 FALSE 5.87 8.14
#> 6 1993-07-06 NA 7 TRUE 5.87 5.87
#> 7 1993-07-07 7.24 7 FALSE 5.87 7.24
#> 8 1993-07-08 9.06 7 FALSE 5.87 9.06
#> 9 1993-07-09 9.49 7 FALSE 5.87 9.49
#> 10 1993-07-10 0.731 7 FALSE 5.87 0.731
#> # ... with 90 more rows
# Plot the regular data, but for the geom_rect use only the filtered data where the is_na column is TRUE.
# Assuming you have daily data, you just set the xmax to be that Date + 1.
ggplot(df_for_plot, aes(x = Date, y = oxygen_to_plot)) +
geom_line() +
geom_rect(
data = df_for_plot %>% filter(is_na),
aes(xmin = Date, xmax = Date + 1, ymin = -Inf, ymax = +Inf), fill = "skyblue", alpha = 0.5
)

Resources