Rolling mean function similar to pandas in zoo - r

I want to create a rolling mean of some geochemical data. Currently, I have data for every 1 mm from 0mm to 45.7 mm and I want to average every 10 mm to create 1 cm averages.
This is the data at the moment but it still is not giving me 1 cm averages. Can someone tell me where I am going wrong? Thanks
wapPbTa<-read.csv("wappbta.csv",header=TRUE)
wapPbTa<-wapPbTa[-c(251:458), ]
library(ggplot2)
library(tidypaleo)
library(zoo)
width <- 10
RM<-rollmean(x = wapPbTa$PbTa, k = width,fill=NA)
##Averaged data
ggplot(wapPbTa, aes(x =RM , y = Depth))+
labs(y = "Depth (cm)")+
geom_lineh(size=1)+
geom_point(size=2)+
theme_classic()+
scale_y_reverse()
## Unaveraged data
ggplot(wapPbTa, aes(x =PbTa , y = Depth))+
labs(y = "Depth (cm)")+
geom_lineh(size=1)+
geom_point(size=2)+
theme_classic()+
scale_y_reverse()
structure(list(Depth = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,
0.8, 0.9, 1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9), PbTa = c(0.163857678,
0.161569533, 0.086305592, 0, 0.006086142, 0, 0, 0.044096031,
0.050739958, 0.088385995, 0.104100946, 0.133012821, 0, 0.127524872,
0.046368715, 0.02514558, 0.109383676, 0.081979695, 0.0766503,
0.064679583)), row.names = c(NA, 20L), class = "data.frame")

This type of problems generally has to do with reshaping the data. The format should be the long format and the data is in wide format. See this post on how to reshape the data from wide to long format.
library(ggplot2)
library(dplyr)
library(tidyr)
library(tidypaleo)
library(zoo)
width <- 10
wapPbTa$RM <- rollmeanr(x = wapPbTa$PbTa, k = width, fill = NA)
wapPbTa %>%
pivot_longer(cols = -Depth) %>%
ggplot(aes(x = value, y = Depth, colour = name)) +
geom_lineh(size = 1) +
geom_point(size = 2) +
scale_y_reverse() +
scale_colour_manual(
breaks = c("PbTa", "RM"),
values = c("black", "blue")
) +
labs(y = "Depth (cm)") +
theme_classic()

Related

Plot the confidence band with ggplot2

I have a dataset that is similar to this :
x <- data.frame(date = c(20190902, 20190903, 20190904),
Group = c(rep("A", 3)),
mean = c(2.5, 3.4, 4.6),
ci_upper = c(1.2, 0.5, 0.3),
ci_lower = c(0.5, 0.4, 0.25))
y <- data.frame(date= c(20190902, 20190903, 20190904),
Group = c(rep("B", 3)),
mean = c(0.4, 3.8, 6.2),
ci_upper = c(1.9, 0.9, 0.5),
ci_lower = c(0.5, 0.8, 0.8))
df <- rbind(x, y)
I would like to plot the confidence band across the timeframe, with 2 different groups (A and B).
Currently I'm using this method but didn't work:
p <- ggplot(df) +
geom_line(aes(y = mean, x = date, group = type ))+
geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper, x = week, fill = "grey70"), alpha = 0.3)+
scale_colour_manual("", values = "blue")+
scale_fill_manual("", values = "grey12")
I wasn't sure how I can approach this.
You are almost there. Only some small corrections of the aes() are needed.
But first I would slightly modify the input just to make the result looking prettier (now the ci_upper/ci_lower are not always more/less as compared with a corresponding mean value):
# to ensure reproducibility of the samples
set.seed(123)
df$ci_lower <- df$mean - sample(nrow(x))
df$ci_upper <- df$mean + sample(nrow(x))
The main thing which should be changed in your ggplot() call is definition of the aesthetics which will be used for plotting. Note, please, that default aesthetics values should be set only once.
p <- ggplot(df,
aes(x = as.Date(as.character(date), format = "%Y%m%d"),
y = mean,
group = Group, col = Group, fill = Group)) +
geom_line() +
geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper), alpha = 0.3)+
scale_colour_manual("", values = c("red", "blue")) +
scale_fill_manual("", values = c("red", "blue"))
The result is as follows:
Actually, the last two code rows are even not necessary, as the default ggplot-color scheme (which you have used to show the desired result) looks very nice, also.

How to avoid expand = c(0,0) to crop off axis ticks labels

I am constructing a Cleveland dot-plot displaying values between 0.0 and 1.0.
I am close to having the plot that I want, but a tiny detail is bothering me. Because I use expand = c(0,0) the last 0 in 1.00 on the x-axis is cut off.
I have tried changing all kinds of settings but without luck.
This question with a similar title to this post does, unfortunately, not help.
Can you help me keep the borders of the plot without cutting off the last zero in 1.00 on x-axis?
reprex:
library(tidyverse)
df <- tibble(
Tastyness = c(0.6, 0.7, 0.9, 0.95, 0.98),
Fruit = c("Bananas", "Apples", "Oranges", "Mango", "Peach")
)
ggplot(df, aes(x = Tastyness, y = Fruit)) +
geom_point(size = 4) +
theme_bw() +
scale_x_continuous(
limits = c(0.0, 1.0),
expand = c(0, 0),
breaks = c(0, 0.5, 0.75, 0.9, 1.00)
)
A solution that works on margins around plot:
ggplot(df, aes(x = Tastyness, y = Fruit)) +
geom_point(size = 4) +
theme_bw() +
scale_x_continuous(
limits = c(0.0, 1.0),
expand = c(0, 0),
breaks = c(0, 0.5, 0.75, 0.9, 1.00)
) +
theme(plot.margin=unit(c(.2,.5,.2,.2),"cm"))
Maybe change the labels, so that there are no decimals on 0 and 1:
ggplot(df, aes(x = Tastyness, y = Fruit)) +
geom_point(size = 4) +
theme_bw() +
scale_x_continuous(
limits = c(0.0, 1),
expand = c(0, 0),
breaks = c(0, 0.5, 0.75, 0.9, 1.00),
labels = c(0, 0.5, 0.75, 0.9, 1))
Or shift labels horizontally:
ggplot(df, aes(x = Tastyness, y = Fruit)) +
geom_point(size = 4) +
theme_bw() +
scale_x_continuous(
limits = c(0.0, 1),
expand = c(0, 0),
breaks = c(0, 0.5, 0.75, 0.9, 1.00)) +
theme(axis.text.x = element_text(hjust = 1))

Individual axes labels in facet_wrap without scales="free"

My data looks like this:
df <- data.frame(Year = as.factor(c(rep(2015, 3), rep(2016, 3), rep(2017,3))),
Tax = as.factor(c(rep(c("A", "B", "C"), 3))),
Depth = as.factor(c(10, 30, 50, 20,30,50,10,30,40)),
values= c(0.5, 0.25, 0.25, 0.1, 0.4, 0.5, 0.2, 0.6, 0.2))
I want to plot it with gaps for missing data and individual axis labels.
library(ggplot2)
The scale argument of facet_wrap gives individual axes, but is not performing as desired, as missing data is not reflected.:
ggplot(df, aes(Depth, values, fill=Tax)) + geom_bar(stat="identity")+
facet_wrap(~Year, scale="free") +
coord_flip()
Without scales:
ggplot(df, aes(Depth, values, fill=Tax)) + geom_bar(stat="identity")+
facet_wrap(~Year) +
coord_flip()
The missing data is represented (which i want!), but it lacks axis labels (which i need).
is there anything i can do?
It looks like this can be done using the lemon package:
library(tidyverse)
library(lemon)
df <- data.frame(Year = as.factor(c(rep(2015, 3), rep(2016, 3), rep(2017,3))),
Tax = as.factor(c(rep(c("A", "B", "C"), 3))),
Depth = as.factor(c(10, 30, 50, 20,30,50,10,30,40)),
values= c(0.5, 0.25, 0.25, 0.1, 0.4, 0.5, 0.2, 0.6, 0.2))
ggplot(df, aes(Depth, values, fill=Tax)) + geom_bar(stat="identity")+
facet_rep_wrap(~Year,repeat.tick.labels = T) +
coord_flip()

Repel geom label and text in ggplot. And ordering geom points based on size

I have 2 data frames such as these:
df1 <- data.frame(
party = c("Blue Party", "Red Party"),
dim1 = c(0.03, -0.04),
dim2 = c(-0.05, 0.02),
sz = c(34, 42)
)
df2 <- data.frame(
var = c("Economic", "Gov trust", "Inst trust", "Nationalism", "Religiosity"),
dim1 = c(0.1, -0.5, 0, 0.6, 0.4),
dim2 = c(0.1, 0.6, 0, 0, 0.3)
)
I want to plot the parties from df1 as points defined by size and include arrows based on df2 on the same graph. I've used ggplot to do this:
ggplot(df1, aes(x = dim1, y = dim2, color = party)) +
geom_point(size = df1$sz) +
scale_size_area() +
scale_x_continuous(limits = c(-1.5, 1.5)) +
scale_y_continuous(limits = c(-1.5, 1.5)) +
geom_label_repel(aes(label = party),
box.padding = 1,
point.padding = 1.5,
force = 1) +
geom_segment(aes(xend=0, yend=0, x=dim1, y=dim2), data=df2,
arrow=arrow(length=unit(0.20,"cm"), ends="first", type = "closed"), color="black") +
geom_text_repel(aes(x=dim1, y=dim2, label=var),
data = df2, color = "black", size = 3, force = 1)
Resulting in this:
The functions geom_label_repel and geom_text_repel prevent the party labels and the texts from overlapping, but how can I repel the labels and texts from each other?
My second problem is that I want to order the points, with the smallest in the front and the largest at the back. How could this be done?
Appreciate the help!

Adjust size of pie charts

I've made these pie charts:
df <- expand.grid(log.var = c(TRUE, FALSE), zone = 1:4)
df$proportion <- c(0.3, 0.7, 0.4, 0.6, 0.2, 0.8, 0.5, 0.5)
df$size = sample(1:20, 8)
library(ggplot2)
ggplot(df, aes(factor(1), proportion, fill = log.var)) +
geom_bar(stat = "identity") + coord_polar(theta = "y") + facet_grid(.~zone)
Is there any way of adjusting the size of each pie chart according to the sum of size in each zone?
#lukeA's suggestion is sensible but doesn't quite work:
library("ggplot2"); theme_set(theme_bw())
library("dplyr") ## for mutate()
set.seed(101)
df <- expand.grid(log.var = c(TRUE, FALSE), zone = 1:4)
df <- mutate(df,
proportion=c(0.3, 0.7, 0.4, 0.6, 0.2, 0.8, 0.5, 0.5),
size = sample(1:20, 8),
totsize=ave(size, zone,FUN=sum))
g0 <- ggplot(df, aes(x=factor(1), y=proportion, fill = log.var))
g0 + geom_bar(stat="identity",aes(width=totsize))+facet_grid(.~zone)+
coord_polar(theta = "y")
The problem here is that the bars are drawn (in rectangular coordinates) in the middle of the x-axis; we'd like them to be drawn with their x-coordinates running from 0 to the full width, but I'm not sure how to do that. The alternative would be to do a bunch of the cumulative proportion/stacking computations by hand (or at least outside ggplot2), then use geom_rect() ...
Here's how:
df <- df %>% group_by(zone) %>%
mutate(cp1=c(0,head(cumsum(proportion),-1)),
cp2=cumsum(proportion))
ggplot(df) + geom_rect(aes(xmin=0,xmax=totsize,ymin=cp1,ymax=cp2,
fill=log.var)) + facet_grid(.~zone)+
coord_polar(theta = "y")

Resources