I'm doing a rpois simulation and I want to create a function to automate variable change (lambda).
My function should be able to change the lambda value. For example, here I want to change three variables n1 (175), n2 (11) and n3 (14)
and the number of poison random depending on the number of row of the concatenate input data frame like in my example.
library(tidyverse)
library(ggrepel)
library(broom)
set.seed(12358)
pois_1 <- tidy(summary(as.factor(rpois(n = 1000000, lambda = (1/336981)*175*11*14)))/1000000)
pois_2 <- tidy(summary(as.factor(rpois(n = 1000000, lambda = (1/336981)*500*11*14)))/1000000)
pois_3 <- tidy(summary(as.factor(rpois(n = 1000000, lambda = (1/336981)*900*11*14)))/1000000)
df_info <- data.frame(pois_1[1:5, ], pois_2[1:5, 2], pois_3[1:5, 2])
names(df_info) <- c("occurence", "175",
"500", "900")
df_info %>%
gather(fl, proba, "175":"900") -> df_info
ggplot(data = df_info, aes(x = fl,
y = proba,
group = occurence)) +
geom_point(size = 2) +
geom_label_repel(aes(label = ifelse(proba > 0.02, as.character(round(proba, 2)), "")),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50') +
geom_line(aes(linetype = occurence, color = occurence), size = 1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
axis.title.x = element_text(hjust = 0.5, face = "bold"),
axis.title.y = element_text(hjust = 0.5, face = "bold"))
Here I wanted to create a function like that with a for loop but it seems to be quite complicated:
Edit : done with only a vector but I want to do it for n1> 1
vizFun <- function(n1, n2, n3){
df_info <- cbind(n1, n2, n3)
names(df_info) <- c("n1", "n2", "n3")
if (nrow(df_info) == 1){
for (i in seq_along(nrow(df_info))){
lambda <- (1/336981)*df_info[i,"n1"]*df_info[i, "n2"]*df_info[i, "n3"]
pois <- tidy(summary(as.factor(rpois(n = 1000000, lambda = lambda)))/1000000)
df_info <- data.frame(pois[, ])
names(df_info) <- c("occurence", "175")
df_info %>%
gather(fl, proba, "175") -> df_info
}
ggplot(data = df_info, aes(x = fl,
y = proba,
group = occurence)) +
geom_point(size = 2) +
geom_label_repel(aes(label = ifelse(proba > 0.02, as.character(round(proba, 2)), "")),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50') +
geom_line(aes(linetype = occurence, color = occurence), size = 1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
axis.title.x = element_text(hjust = 0.5, face = "bold"),
axis.title.y = element_text(hjust = 0.5, face = "bold"))
}
}
vizFun(500, 11, 14)
Try this:
yourfunction<-function(x=c(),seed=12358){
set.seed(seed)
require(tidyverse)
require(ggrepel)
require(broom)
listdata<-list()
for (i in 1:length(x)) {
listdata[[i]]<- assign(paste("pois_",i),tidy(summary(as.factor(rpois(n = 1000000, lambda = (1/336981)*x[i]*11*14)))/1000000)[1:5,]) }
df_info<-cbind.data.frame(occurence=as.character(rep(0:(5-1)), length(x)),fl=as.character(rep(x,each=5)) ,proba=dplyr::bind_rows(listdata)[,2])
names(df_info)<-c("occurence" , "fl" ,"proba")
ggplot(data = df_info, aes(x = fl,
y = proba,
group = occurence)) +
geom_point(size = 2) +
geom_label_repel(aes(label = ifelse(proba > 0.02, as.character(round(proba, 2)), "")),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50') +
geom_line(aes(linetype = occurence, color = occurence), size = 1) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
axis.title.x = element_text(hjust = 0.5, face = "bold"),
axis.title.y = element_text(hjust = 0.5, face = "bold"))
}
yourfunction(x=c(175,500,700,900))
Related
I verified that when using geom_line the resulting graph is not formed by solid lines, the lines look like a ladder.
Would anyone know how to get solid lines?
Here is the code used.
data base
0;0
0.000700;1.050
0.001750;1.100
0.003800;1.150
0.029110;1.200
0.130900;1.240
0.341428;1.303
`library(tidyverse)
library(reprex)
SAT <- read.delim("Curva_Tipica.txt", header = FALSE, encoding = "UTF-8")
SAT <- str_split(SAT[[1]],";")
length_SAT <- length(SAT) + 1
curva_VxI <- tibble(
.rows = length_SAT,
I = 0,
V = 0,
)
for (linha in 2:length_SAT) {
curva_VxI$I[linha] <- as.double(SAT[[linha-1]][1])
curva_VxI$V[linha] <- as.double(SAT[[linha-1]][2])
}
Xac <- (curva_VxI$V[length_SAT] - curva_VxI$V[length_SAT-1])/(curva_VxI$I[length_SAT] - curva_VxI$I[length_SAT-1])
Vj <- curva_VxI$V[length_SAT] - Xac*curva_VxI$I[length_SAT]
Xac_grafico <- tibble(
x = c(0, 1),
y = c(0, Xac)
)
Vj_grafico <- tibble(
x = c(0),
y = c(Vj)
)
theme_set(theme_bw())
ggplot() +
geom_point(
data = curva_VxI,
aes(x = I, y = V),
color = "orange",
size = 2
) +
geom_line(
data = curva_VxI,
aes(x = I, y = V),
color = "orange",
linewidth = 1
) +
geom_point(
data = Xac_grafico,
aes(x = x, y = y),
color = "blue",
) +
geom_line(
data = Xac_grafico,
aes(x = x, y = y),
color = "blue",
linewidth = 1,
linetype = "solid",
) +
labs(
y = "V (pu)",
x = "I (pu)"
) +
coord_cartesian(
xlim = c(0, 1),
ylim = c(0, 1.5),
expand = FALSE,
clip = "off"
) +
scale_y_continuous(sec.axis = sec_axis(~.*1, "Xac (pu)")) +
theme(
axis.title.x = element_text(margin = margin(t = 10), size = 12, face = "bold"),
axis.title.y = element_text(margin = margin(r = 10), size = 12, face = "bold"),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
legend.position = "top",
# plot.background = element_rect(fill = "gray60"),
plot.margin = margin(t = 1, r = 1, b = 0.5, l = 0.5, unit = "cm"),
)`
code output
expected result
result obtained
I tried to produce 12 boxplots per ggplots stat_summary() functions, as you can see below in the reproducible example. I used stat_summary() instead of geom_boxplot(), because I want to whiskers to end at the 1st and 99th percentile of the data or to be individualized so to speak. I coded two functions, one for the whiskers and one for the outliers and used them as arguments in stat_summary(). This is the result:
I see two problems with this plot:
Not all outliers are coloured in red.
Outliers cut the whiskers, which is not supposed to happen by definition of my functions.
The help file has not been helping me in solving this issue. Comments are welcome.
The code:
library(stats)
library(ggplot2)
library(dplyr)
# Example Data
{
set.seed(123)
indexnumber_of_entity = rep(c(1:30),
each = 12)
month = rep(c(1:12),
each = 1,
times = 30)
variable_of_interest = runif(n = 360,
min = 0,
max = 100)
Data = as.data.frame(cbind(indexnumber_of_entity,
month,
variable_of_interest)) %>% mutate_at(.vars = c(1,2,3),
as.numeric)
Data_Above_99th_Percentile = filter(Data,
variable_of_interest > stats::quantile(Data$variable_of_interest,
0.99))
Data_Below_1st_Percentile = filter(Data,
variable_of_interest < stats::quantile(Data$variable_of_interest,
0.01))
}
# Functions that enable individualizing boxplots
{
Individualized_Boxplot_Quantiles <- function(x){
d <- data.frame(ymin = stats::quantile(x,0.01),
lower = stats::quantile(x,0.25),
middle = stats::quantile(x,0.5),
upper = stats::quantile(x,0.75),
ymax = stats::quantile(x,0.99),
row.names = NULL)
d[1, ]
}
Definition_of_Outliers = function(x)
{
subset(x,
stats::quantile(x,0.99) < x | stats::quantile(x,0.01) > x)
}
}
# Producing the ggplot
ggplot(data = Data) +
aes(x = month,
y = variable_of_interest,
group = month) +
stat_summary(fun.data = Individualized_Boxplot_Quantiles,
geom="boxplot",
lwd = 0.5) +
stat_summary(fun.y = Definition_of_Outliers,
geom="point",
size = 1) +
labs(title = "Distributions of Variable of Interest based on months",
x = "Month",
y = "Variable of Interest") +
theme(plot.title = element_text(size = 20,
hjust = 0.5,
face = "bold"),
axis.ticks.x = element_blank(),
axis.text.x = element_text(size = 12,
face = "bold"),
axis.text.y = element_text(size = 12,
face = "bold"),
axis.title.x = element_text(size = 16,
face = "bold",
vjust = -3),
axis.title.y = element_text(size = 16,
face = "bold",
vjust = 3)) +
scale_x_continuous(breaks = c(seq(from = 1,
to = 12,
by = 1))) +
scale_y_continuous(breaks = c(seq(from = 0,
to = 100,
by = 10))) +
geom_point(data = Data_Above_99th_Percentile,
colour = "red",
size = 1) +
geom_point(data = Data_Below_1st_Percentile,
colour = "red",
size = 1)
You can simplify the functions a little bit like this:
boxplot_quantiles <- function(x) {
y <- as.data.frame(t(stats::quantile(x, c(0.01, 0.25, 0.5, 0.75, 0.99))))
setNames(y, c('ymin', 'lower', 'middle', 'upper', 'ymax'))
}
outliers <- function(x) {
subset(x, stats::quantile(x,0.99) < x | stats::quantile(x,0.01) > x)
}
You can rely on the summary functions, since the Data_above_99th_Percentile and Data_Below_1st_Percentile were not groupwise calculations in your own code.
ggplot(data = Data, aes(x = month, y = variable_of_interest, group = month)) +
stat_summary(fun = outliers, geom = "point", col = 'red', size = 1) +
stat_summary(fun.data = boxplot_quantiles, geom = "boxplot", lwd = 0.5) +
scale_x_continuous('Month', breaks = 1:12) +
scale_y_continuous('Variable of Interest' , breaks = 0:10 * 10) +
labs(title = "Distributions of Variable of Interest based on months") +
theme(text = element_text(face = 'bold', size = 12),
plot.title = element_text(size = 20, hjust = 0.5),
axis.ticks.x = element_blank(),
axis.title.x = element_text(size = 16, margin = margin(20, 0, 0, 0)),
axis.title.y = element_text(size = 16, vjust = 3))
Edit
As long as you perform groupwise operations on the filtered data frames, your alternative method of drawing the outliers will work too. Note that I have added these in colored layers above the existing plot so that the red points are overplotted with blue and green dots:
Data_Above_99th_Percentile <- Data %>%
group_by(month) %>%
filter(variable_of_interest > quantile(variable_of_interest,0.99))
Data_Below_1st_Percentile <- Data %>%
group_by(month) %>%
filter(variable_of_interest < quantile(variable_of_interest, 0.01))
ggplot(data = Data, aes(x = month, y = variable_of_interest, group = month)) +
stat_summary(fun = outliers, geom = "point", col = 'red', size = 1) +
stat_summary(fun.data = boxplot_quantiles, geom = "boxplot", lwd = 0.5) +
scale_x_continuous('Month', breaks = 1:12) +
scale_y_continuous('Variable of Interest' , breaks = 0:10 * 10) +
labs(title = "Distributions of Variable of Interest based on months") +
theme(text = element_text(face = 'bold', size = 12),
plot.title = element_text(size = 20, hjust = 0.5),
axis.ticks.x = element_blank(),
axis.title.x = element_text(size = 16, margin = margin(20, 0, 0, 0)),
axis.title.y = element_text(size = 16, vjust = 3)) +
geom_point(data = Data_Below_1st_Percentile, color = 'green') +
geom_point(data = Data_Above_99th_Percentile, color = 'blue')
Let
df <- data.frame("Method" = rep(c("Method1", "Method2", "Method3", "Method4", "Method5"), each = 3, times = 1),
"Type" = rep(c("A", "B", "C"), 5),
"Value" = c(runif(5, 0, 1), runif(5, 0.2, 1.2), runif(5, 0.4, 1.4)))
I created a boxplot
get_box_stats <- function(y, upper_limit = max(df$Value) * 1.42) {
return(data.frame(
y = upper_limit,
label = paste(
length(y), "\n",
round(quantile(y, 0.25), 2), "\n",
round(median(y), 2), "\n",
round(quantile(y, 0.75), 2), "\n"
)
))
}
ggplot(df, aes(factor(Type), Value)) +
labs(fill = "Method") +
stat_summary(size = 4.6, fun.data = get_box_stats, geom = "text", position = position_dodge(.9),
hjust = 0.5, vjust = 1, aes(group = factor(Type)))+
geom_boxplot(coef = 0, aes(fill = factor(Type))) + theme_classic()+
theme(legend.position = "top", axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15),
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
legend.title=element_text(size = 15),
legend.text=element_text(size = 15)) +
geom_dotplot(aes(fill = factor(Type)), dotsize = 0.8, binaxis = 'y', stackdir = 'center',
position = position_dodge(0.75))+
xlab("Method")
This results in a boxplot
QUESTION: As you can see, for stats are not perfectly centered, i.e for Method B -- values 1 and 5. Is there a way to fix this?
The problem lies in your use of paste in your summary function. By default, paste adds a space character between each element you want to paste together. Your summary string therefore has a space before and after every line break, but not before the first line. Since a space takes up some room, the aligment is off. Instead of adding in all those newline characters, specify that you want to use just a newline character as a separator using the sep argument:
get_box_stats <- function(y, upper_limit = max(df$Value) * 1.42) {
return(data.frame(
y = upper_limit,
label = paste(
length(y),
round(quantile(y, 0.25), 2),
round(median(y), 2),
round(quantile(y, 0.75), 2), sep = "\n"
)
))
}
ggplot(df, aes(factor(Type), Value)) +
labs(fill = "Method") +
stat_summary(size = 4.6, fun.data = get_box_stats, geom = "text",
hjust = 0.5, vjust = 1, aes(group = factor(Type)))+
geom_boxplot(coef = 0, aes(fill = factor(Type))) + theme_classic()+
theme(legend.position = "top", axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15),
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
legend.title=element_text(size = 15),
legend.text=element_text(size = 15)) +
geom_dotplot(aes(fill = factor(Type)), dotsize = 0.8, binaxis = 'y',
stackdir = 'center',
position = position_dodge(0.75))+
xlab("Method")
I have a dataframe like this
id <- c(5738180,51845,167774,517814,1344920)
amount <- c(3.76765976,0.85195407,1.96821355,0.01464609,0.57378284)
outlier <- c("TRUE","FALSE","FALSE","FALSE","FALSE")
df.sample <- data.frame(id,amount,outlier)
I am trying to plot the points and add an id label to any point that is above the limit. In this case (id=5738180)
I am plotting like this
library(tidyverse)
library(ggplot2)
library(ggrepel) # help avoid overlapping text labels
library(gridExtra) # adds custom table inside ggplot
library(scales) # breaks and labels for axes and legends
library(ggthemes) # Adding desired ggplot themes
df.sample %>%
ggplot(aes(x = as.numeric(row.names(df.sample)),
y = amount, label = as.character(id))) +
geom_point(alpha = 0.6, position = position_jitter(w = 0.05, h = 0.0),
aes(colour = (amount < 3)), size = 2) +
geom_hline(aes(yintercept = 3, linetype = "Limit"),
color = 'black', size = 1) +
geom_text(aes(y = 3,x = amount[4],
label = paste("Limit = ", round(3, 3)),
hjust = 0, vjust = 1.5)) +
geom_text_repel(data = subset(df.sample, outlier == 'TRUE'),
nudge_y = 0.75,
size = 4,
box.padding = 1.5,
point.padding = 0.5,
force = 100,
segment.size = 0.2,
segment.color = "grey50",
direction = "x") +
geom_label_repel(data = subset(df.sample, outlier == 'TRUE'),
nudge_y = 0.75,
size = 4,
box.padding = 0.5,
point.padding = 0.5,
force = 100,
segment.size = 0.2,
segment.color = "grey50",
direction = "x")
labs(title = "Outlier Detection",
y = "amount",
x = "") +
theme_few() +
theme(legend.position = "none",
axis.text = element_text(size = 10, face = "bold"),
axis.title = element_text(size = 10, face = "bold"),
plot.title = element_text(colour = "blue", hjust = 0.5,
size = 15, face = "bold"),
strip.text = element_text(size = 10, face = "bold")) +
scale_colour_manual(values = c("TRUE" = "green","FALSE" = "red"))
I am running into an error "Error: Aesthetics must be either length 1 or the same as the data (1): x"
Can someone point me in the right direction?
The issue is with geom_text_repel() and geom_label_repel(). You subset the data, which now only includes 1 row, but the aes() are inheriting from the original data which have 5 rows, hence the error. To fix this, subset the data outside of the ggplot() call, and change the aesthetics for it. You are also missing a + after geom_label_repel() and the result below modifies the nudge_y to nudge_x and removes the geom_text_repel().
outliers <- subset(df.sample, outlier == TRUE)
ggplot(data = df.sample,
aes(x = as.numeric(row.names(df.sample)),
y = amount,
label = as.character(id))) +
geom_point(alpha = 0.6,
position = position_jitter(w = 0.05, h = 0.0),
aes(colour = (amount < 3)),
size = 2) +
geom_hline(aes(yintercept = 3,
linetype = "Limit"),
color = 'black',
size = 1) +
geom_text(aes(y = 3,x = amount[4],
label = paste("Limit = ",
round(3, 3)),
hjust = 0,
vjust = 1.5)) +
geom_label_repel(data = outliers,
aes(x = as.numeric(rownames(outliers)),
y = amount,
label = amount),
nudge_x = 0.75,
size = 4,
box.padding = 0.5,
point.padding = 0.5,
force = 100,
segment.size = 0.2,
segment.color = "grey50",
direction = "x",
inherit.aes = F) +
labs(title = "Outlier Detection",
y = "amount",
x = "") +
theme_few() +
theme(legend.position = "none",
axis.text = element_text(size = 10, face = "bold"),
axis.title = element_text(size = 10, face = "bold"),
plot.title = element_text(colour = "blue", hjust = 0.5,
size = 15, face = "bold"),
strip.text = element_text(size = 10, face = "bold")) +
scale_colour_manual(values = c("TRUE" = "green","FALSE" = "red"))
I am trying to create a polygon on one of my graphs I create in ggplot2 in order to denote a "compositional field". I understand that I should be using the geom_polygon function, but I was unable to successfully input this function. The code I have for my graph is below (less geom_polyon). I included a photo of what I am trying to do on my graph. Essentially, I want to create a polyogon between (x = 8, y = 0.8) and (x = 100, y = 2). Any help is greatly appreciate, thank you.
Image
[1]: https://i.stack.imgur.com/MR8pj.png
Graph script
ggplot(data = ZonesPd, aes(x = Pd_ppm, y = Cu.Pd)) +
geom_point(aes(color = Mineralized.Zone, size = Mineralized.Zone), alpha = 0.5) +
scale_x_log10(limit = c(1e-3, 1e3), expand = c(0, 0)) +
scale_y_log10(limit = c(1e2, 1e5), expand = c(0, 0)) +
scale_size_manual(values = c(0.5, 0.5, 3)) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks.length = unit(-0.24, "cm"),
axis.text.x = element_text(size = 15, color = "black", margin = margin(0.5, 0.5, 0.5, 0.5,
"cm")),
axis.text.y = element_text(size = 15, color = "black", margin = margin(0.5, 0.5, 0.5, 0.5,
"cm")),
axis.title = element_text(size = 15),
aspect.ratio = 0.75,
legend.position = c(0.8, 0.15),
legend.text = element_text(size = 15),
legend.title = element_blank())
As suggested by r2evans in the comments, geom_rect is the way to go here:
library(ggplot2)
ZonesPd <- data.frame(Pd_ppm <- 10^rnorm(100), Cu.Pd <- 10^rnorm(100),
Mineralized.Zone=sample(gl(5,20)))
ggplot() +
geom_point(data = ZonesPd, aes(x = Pd_ppm, y = Cu.Pd, color = Mineralized.Zone, size = Mineralized.Zone), alpha = 0.5) +
scale_x_log10() + scale_y_log10() +
geom_rect(aes(xmin=8, xmax=100, ymin=0.8, ymax=8))
If you are happy with your plot and just want to add a rectangle with corners at (x = 8, y = 0.8) and (x = 100, y = 2), try adding:
+ geom_polygon(data = data.frame(x = c(8, 8, 100, 100), y = c(.8, 2, 2, .8)),
aes(x = x, y = y, alpha = 0.5))