How to insert specific IDs in the scatterplot in R - r

Here an example of my data:
mydf <- tibble(
ID = c(1, 2, 9,4,5,15),
M1 = c(60,50,40,20,30,45),
M2 = c(90,80, 30, 33,70,40)
)
I want to insert only IDs in the plot in which M1<M2.
Here are the codes that I have used:
library(ggrepel)
res1<- mydf %>%
filter(M1< M2)
p <- ggplot(data = mydf, aes(x = M1, y = M2,label = ID))+
geom_text_repel() +
geom_point(data=res1,
aes(x=M1,y= M2),
color='red',
size=3)
I want to remove IDs 9,15 as M1>M2.

Does this give you the desired plot?
mydf <- mydf %>%
mutate(M1lower = M1 < M2)
res1<- mydf %>%
filter(M1lower == T)
ggplot(data = mydf, aes(x = M1, y =M2,label = ID, color = M1lower))+
geom_point(size=3) +
geom_text_repel(data=res1,
aes(x=M1,y= M2))

Related

How to adjust the the y axis like this in ggplot2?

Here is the codes and the present outplot
df <- data.frame(state = c('0','1'),
male = c(26287942,9134784),
female = c(16234000,4406645))
#output
> df
state male female
1 0 26287942 16234000
2 1 9134784 4406645
library(ggplot2)
library(tidyr)
df_long <- pivot_longer(df, cols = c("female","male"))
names(df_long) <- c('state','sex','observations')
ggplot(data = df_long) +
geom_col(aes(x = sex, y =observations, fill = state)) +
theme(legend.position = c(0.1,0.9),
legend.background = element_rect(fill='lightgrey') )
I want to adjust the plots like this. (I marked what I want to change.)
Simplify the scientific records in y-axis.
Count the ratio (the number of state 1)/(the number of state 0 + state 1) and plot like this.
It may be a little complicated, and I don't know which functions to use. If possible, can anyone tell me some related functions or examples?
You can set options(scipen = 99) to disable scientific notation on y-axis. We can create a separate dataset for label data.
library(tidyverse)
options(scipen = 99)
long_data <- df %>%
pivot_longer(cols = c(male, female),
names_to = "sex",
values_to = "observations")
label_data <- long_data %>%
group_by(sex) %>%
summarise(perc = observations[match(1, state)]/sum(observations),
total = sum(observations), .groups = "drop")
ggplot(long_data) +
geom_col(aes(x = sex, y = observations, fill = state)) +
geom_text(data = label_data,
aes(label = round(perc, 2), x = sex, y = total),
vjust = -0.5) +
theme(legend.position = c(0.1,0.9),
legend.background = element_rect(fill='lightgrey'))
By searching the Internet for about two days, I have finished the work!
sex <- c('M','F')
y0 <- c(26287942,16234000)
y1 <- c(9134784, 4406645)
y0 <- y0*10^{-7}
y1 <- y1*10^{-7}
ratio <- y1/(y0+y1)
ratio <- round(ratio,2)
m <- t(matrix(c(y0,y1),ncol=2))
colnames(m) <- c(as.character(sex))
df <- as.data.frame(m)
df <- cbind(c('0','1'),df)
colnames(df)[1] <- 'observations'
df
df_long <- pivot_longer(df, cols = as.character(sex))
names(df_long) <- c('state','sex','observations')
df_r <- as.data.frame(df_long)
df_r <- data.frame(df_r,ratio=rep(ratio,2))
ggplot(data = df_r) +
geom_col(aes(x =sex, y = observations, fill = state))+
theme(legend.position = c(0.1,0.9),
legend.background = element_rect(fill=NULL) )+
geom_line(aes(x=sex,y=ratio*10),group=1)+
geom_point(aes(x=sex,y=ratio*10))+
geom_text(aes(x=sex,y=ratio*10+0.35),label=rep(ratio,2))+
scale_y_continuous(name =expression(paste('observations(','\u00D7', 10^7,')')),
sec.axis = sec_axis(~./10,name='ratio'))
The output:

Is it possible to use geom_table() along facet_grid()?

Recently I discovered the function geom_table(), from ggpmisc package, which allows you to put a table inside a plot. But I don't know how to put different tables into a grid plot.
I have this df and plot:
library(lubridate)
library(ggplot2)
library(ggpmisc)
Date <- c("2010-01-28", "2010-02-28", "2010-03-28",
"2010-04-28", "2010-05-28", "2010-06-28",
"2010-07-28", "2010-08-28", "2010-09-28",
"2010-10-28")
Date <- as_date(Date)
Country <- rep("Japan", 10)
A <- runif(10, min=30, max=90)
B <- runif(10, min = 1, max = 15)
df <- data.frame(Date, Country, A, B)
df %>% pivot_longer(-c(Date, Country)) %>%
ggplot(aes(x=Date,y=value,group=1,color=Country))+
geom_line(size = 0.9) +
facet_grid(name~Country, scales = "free", switch = "y")
I also have these two tables, tableA and tableB:
Time <- c("Today", "Yesterday", "One week ago")
Value_A <- 10:12
Value_B <- 1:3
tableA <- data.frame(Time, Value_A)
tableB <- data.frame(Time, Value_B)
How I put tableA in the top graph and tableB in the bottom graph?
I appreciate it if someone can help :)
You need to create a little data frame that hosts your tableA and tableB in a list column:
d <- tibble(x = c(0.95, 0.95), y = c(0.95, 0.95),
name = c("A", "B"), tb = list(tableA, tableB))
df %>% pivot_longer(-c(Date, Country)) %>%
ggplot(aes(x=Date,y=value,group=1,color=Country))+
geom_line(size = 0.9) +
geom_table_npc(data = d, aes(npcx = x, npcy = y, label = tb)) +
facet_grid(name~Country, scales = "free", switch = "y")

Create a three panel plot with one panel spanning 2 columns using ggplot2

I have following data:
df <- data.frame("Stat" = c("Var1","Var1","Var1","Var1","Var1","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var2","Var3","Var3","Var3","Var3","Var3","Var3","Var3","Var3","Var3","Var3"),
"Value" = c(0,1,2,3,4,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,1,2,3,4,5,6,7,8,9,10),
"n" = c(33,120,223,63,20,17,28,33,22, 35,41,53,44,55,59,39,33, 46,30,29,23,21,14,6,18,7,29,50,80,86,91,83,35,34, 20))
What I wanted to do is to plot the above data as bar plot in one canvas but in three rows (1 columns x 3 rows) and each panel should contain plot for only one variable (Stat) eg. Var1 in first panel, Var2 in second and Var3 in the third panel, using the following code:
library(multipanelfigure)
fig1 <- multi_panel_figure(columns = 2, rows = 2, panel_label_type = "none")
# fit the plots on the panels
fig1 %<>%
fill_panel(Var1Plot, column = 1, row = 1) %<>%
fill_panel(Var2Plot, column = 2, row = 1) %<>%
fill_panel(Var3Plot, column = 1:2, row = 2)
fig1
Issue is how to get the Var1Plot, Var2Plot and Var3Plot so that these can be placed in respective panels above. I used the below code, but not able to get the results into above panels:
library(tidyverse)
df %>% ggplot(aes(x = Value, y = n)) +
geom_bar(stat='identity') + facet_wrap(~ Stat)
Expected plot should look something like this :
Here's an approach with cowplot.
library(cowplot)
figure.list <- map(unique(df$Stat), ~
ggplot(data = subset(df, df$Stat == .x), aes(x = Value, y = n)) +
geom_bar(stat='identity') +
ggtitle(.x))
top <- plot_grid(figure.list[[1]], figure.list[[2]], ncol = 2)
bottom <- plot_grid(figure.list[[3]], ncol = 1)
plot_grid(top, bottom,
ncol=1, rel_heights=c(1,1))
If you really want some to be coord_flip-ed, you could make the list manually:
figure.list <- list()
figure.list[[1]] <- ggplot(data = subset(df, df$Stat == "Var1"), aes(x = Value, y = n)) +
geom_bar(stat='identity') + coord_flip()
figure.list[[2]] <- ggplot(data = subset(df, df$Stat == "Var2"), aes(x = Value, y = n)) +
geom_bar(stat='identity') + coord_flip()
figure.list[[3]] <- ggplot(data = subset(df, df$Stat == "Var3"), aes(x = Value, y = n)) +
geom_bar(stat='identity')
top <- plot_grid(figure.list[[1]], figure.list[[2]], ncol = 2)
bottom <- plot_grid(figure.list[[3]], ncol = 1)
plot_grid(top, bottom,
ncol=1, rel_heights=c(1,1))

adding different curves to plots in R

I know I can add the same curve to each plot pane as a layer using lattice and latticeExtra R packages (see blow).
But suppose we wanted to add different curves to each plot pane.
For example, in the below example, I wonder how to add v1 only to the top plot, v2 to the bottom-left plot, and v3 to the bottom-right plot?
library(lattice)
library(latticeExtra)
set.seed(24)
v1 <- density(rnorm(1e3, 3.5))
v2 <- density(rnorm(1e3, 3))
v3 <- density(rnorm(1e3, 2.75))
foo <- xyplot((1:32*.01)~wt|gear , data = mtcars)
foo + layer(panel.polygon(v1, col = 2, alpha = 0.3))
Here is one option
foo +
layer(if(panel.number() == 3) {
panel.polygon(v1, col = 2, alpha = 0.3)
} else if(panel.number() == 1) {
panel.polygon(v2, col = 2, alpha = 0.3)
} else{
panel.polygon(v3, col = 2, alpha = 0.3)
})
data
library(lattice)
library(latticeExtra)
set.seed(24)
v1 <- density(rnorm(1e3, 3.5))
v2 <- density(rnorm(1e3, 3))
v3 <- density(rnorm(1e3, 2.75))
foo <- xyplot((1:32*.01)~wt|gear , data = mtcars)
You can do it with ggplot2 and facet_grid:
library(ggplot2)
library(dplyr)
# user data
v1 <- (rnorm(1e3, 3.5))
v2 <- (rnorm(1e3, 3))
v3 <- (rnorm(1e3, 2.75))
# Make df1 from user data and associate with add gear value in `mtcars`
df1 <- data.frame(wt = c(v1, v2, v3),
gear = as.factor(c(rep(3, 1000),
rep(4, 1000),
rep(5, 1000))))
# select data from mtcars and add user defined values (`val`)
df2 <- mtcars %>%
mutate(val = 1:32 * 0.01) %>%
remove_rownames() %>%
mutate(gear = as.factor(gear)) %>%
select(c(val, gear, wt))
ggplot(df2, aes(x = wt, y = val, #set up mapping with df2
fill = gear)) +
geom_density(data = df1, #make density plots of df1
aes(x = wt,
fill = gear),
inherit.aes = FALSE) + #next add points from df1
geom_point(data = df2, aes(x = wt, y = val), inherit.aes = FALSE) +
facet_grid(cols = vars(gear))
You can make the fill color more transparent by adding an alpha value to geom_density.

ggplot2 histogram binwidth [duplicate]

This question already has answers here:
Different breaks per facet in ggplot2 histogram
(4 answers)
Closed 8 years ago.
I would like to create multiple histograms within one plot (using facet_wrap).
This could be an example code:
df <- data.frame(p1 = rnorm(100,5,2), p2 = rnorm(100,80,20), group = rep(LETTERS[1:4],25))
library(ggplot2)
library(reshape)
plotData <- melt(df, id.vars = "group", measure.vars = c("p1","p2") )
m <- ggplot(plotData, aes(x = value, color = group, fill = group, group = group))
m <- m + geom_bar(position=position_dodge())
m <- m + facet_wrap( ~ variable,scales = "free_x")
print(m)
Now, I would like to modify the plot that it creates per parameter ("p1,"p2") let's say 10 bins.
Up to now, I could not find a way to do this as binwidth/breaks calculation should be dependent on a subset of data.
Is it possible at all?
I want to share my solution (taken from the answered question linked above) extended by the possibility to overlay the histograms with density curves scaled to histogram counts:
df <- data.frame(p1 = rnorm(1000,5,2), p2 = rnorm(1000,80,20), group = rep(LETTERS[1:4],25))
library(ggplot2)
library(reshape)
library(plyr)
plotData <- melt(df, id.vars = "group", measure.vars = c("p1","p2") )
nBins <- 10
groupedData <- dlply(plotData, .(variable))
groupedBinWidth <- llply(groupedData, .fun = function(data, nBins) {
r <- range(data$value, na.rm = TRUE, finite = TRUE)
widthOfBins = (r[2] - r[1])/nBins
if (is.na(widthOfBins) || is.infinite(widthOfBins) || (widthOfBins <= 0)) widthOfBins <- NULL
widthOfBins
}, nBins = nBins)
densData <- dlply(plotData, .(variable, group), .fun = function(subData){
param <- subData$variable[1]
group <- subData$group[1]
d <- density(subData$value)
bw <- groupedBinWidth[[param]]
data.frame(x = d$x, y = d$y * nrow(subData) * bw , group = group, variable = param)
})
hls <- mapply(function(x, b) geom_bar(aes(x = value), position = position_dodge(), data = x, binwidth = b),
groupedData, groupedBinWidth)
dLay <- mapply(function(data) geom_density(data = data, aes(x = x, y = y), stat = "identity", fill = NA, size = 1),
densData)
m <- ggplot(plotData, aes(x = value, color = group, fill = group, group = group))
m <- m + hls
m <- m + dLay
m <- m + facet_wrap( ~ variable,scales = "free")
print(m)
Try this - really ugly code, but works if I understand you correctly. You might want to play with geom_density and maybe remove fill to make it more readable.
nbin<- 5
m <- ggplot(plotData, aes(x = value, color = group, fill = group, group = group))
m <- m + geom_histogram(data = subset(plotData, variable == "p1"), binwidth=diff(range(subset(plotData, variable == "p1")$value))/nbin)
m <- m + geom_histogram(data = subset(plotData, variable == "p2"), binwidth=diff(range(subset(plotData, variable == "p2")$value))/nbin)
m <- m + facet_wrap( ~ variable,scales = "free_x")
print(m)

Resources