I have been trying to produce a scatter plot with two levels of alpha applied to dots that are above or below a score threshold. To do so, I am storing the alpha value for each point in a vector, item_alpha, within the data frame and supplying this vector as the argument for alpha in my call to geom_point:
library( ggplot2 );
library( scales );
one.data <- read.table("test.data", header = TRUE)
p1 <- ggplot( data = one.data )
p1 <- p1 + geom_point( aes( plot_X, plot_Y, colour = log10_p_value, size = plot_size, alpha = item_alpha ) )
p1 <- p1 + scale_colour_gradientn( colours = c("red", "yellow", "green", "blue"), limits = c( min(one.data$log10_p_value), max(one.data$log10_p_value)));
p1 <- p1 + geom_point( aes(plot_X, plot_Y, size = plot_size), shape = 21, fill = "transparent", colour = I (alpha ("black", 0.6) ));
p1 <- p1 + scale_size( range=c(5, 30)) + theme_bw();
one.x_range = max(one.data$plot_X) - min(one.data$plot_X);
one.y_range = max(one.data$plot_Y) - min(one.data$plot_Y);
p1 <- p1 + xlim(min(one.data$plot_X) one.x_range/10,max(one.data$plot_X)+one.x_range/10);
p1 <- p1 + ylim(min(one.data$plot_Y)one.y_range/10,max(one.data$plot_Y)+one.y_range/10);
p1
However, it seems alpha is only being set properly for the eight points with the smaller value, while the remaining points remain opaque. I've consulted the ggplot documentation, played with the examples and tried some other variations which have mostly produced various errors and I'm really hoping someone will have some insight on this! Thanks in advance!
Contents of test.data:
"plot_X" "plot_Y" "plot_size" "log10_p_value" "item_alpha"
5.326 3.194 4.411 -27.3093 0.6
-2.148 7.469 3.434 -12.3487 0.6
-6.14 -2.796 3.062 -22.8069 0.6
3.648 6.091 3.597 -15.5032 0.6
0.356 -6.925 3.95 -10.4754 0.6
5.532 -0.135 3.246 -19.2883 0.6
3.794 -2.279 3.557 -16.4438 0.6
-3.784 1.42 2.914 -17.9687 0.6
-7.645 -1.571 3.163 -12.4498 0.6
-1.526 -4.756 3.509 -10.8972 0.6
-6.461 2.293 2.962 -13.4306 0.6
-5.806 0.983 4.38 -24.5422 0.6
-3.592 0.769 2.971 -17.8119 0.6
0.127 3.572 3.603 -11.4277 0.6
-0.566 0.706 3.77 -13.0952 0.3
2.25 -2.604 0.845 -11.7949 0.3
-7.845 -0.927 3.21 -12.6408 0.3
1.084 -6.691 3.654 -10.7319 0.3
-3.546 6.46 2.994 -11.6777 0.3
-5.478 -0.645 4.256 -17.7344 0.3
-6.251 -0.418 4.273 -19.29 0.3
-3.855 5.969 3.236 -10.9057 0.3
0.345 0.971 3.383 -11.5973 0.6
0.989 0.345 2.959 -10.8252 0.6
You're using a distinctly base plotting approach with ggplot2, which is obviously not the right way to go. Here are two options:
dat <- read.table(text = "plot_X plot_Y plot_size log10_p_value item_alpha
5.326 3.194 4.411 -27.3093 0.6
-2.148 7.469 3.434 -12.3487 0.6
-6.14 -2.796 3.062 -22.8069 0.6
3.648 6.091 3.597 -15.5032 0.6
0.356 -6.925 3.95 -10.4754 0.6
5.532 -0.135 3.246 -19.2883 0.6
3.794 -2.279 3.557 -16.4438 0.6
-3.784 1.42 2.914 -17.9687 0.6
-7.645 -1.571 3.163 -12.4498 0.6
-1.526 -4.756 3.509 -10.8972 0.6
-6.461 2.293 2.962 -13.4306 0.6
-5.806 0.983 4.38 -24.5422 0.6
-3.592 0.769 2.971 -17.8119 0.6
0.127 3.572 3.603 -11.4277 0.6
-0.566 0.706 3.77 -13.0952 0.3
2.25 -2.604 0.845 -11.7949 0.3
-7.845 -0.927 3.21 -12.6408 0.3
1.084 -6.691 3.654 -10.7319 0.3
-3.546 6.46 2.994 -11.6777 0.3
-5.478 -0.645 4.256 -17.7344 0.3
-6.251 -0.418 4.273 -19.29 0.3
-3.855 5.969 3.236 -10.9057 0.3
0.345 0.971 3.383 -11.5973 0.6
0.989 0.345 2.959 -10.8252 0.6",header = TRUE)
dat$alpha_grp <- ifelse(dat$item_alpha == 0.6,'High','Low')
#If you want a legend; although you can suppress the legend
# here if you want.
ggplot(data = dat,aes(x = plot_X,y = plot_Y)) +
geom_point(aes(alpha = alpha_grp)) +
scale_alpha_manual(values = c(0.3,0.6))
#If you don't care about a legend
ggplot() +
geom_point(data = dat[dat$alpha_grp == 'High',],
aes(x = plot_X,y = plot_Y),alpha = 0.6) +
geom_point(data = dat[dat$alpha_grp == 'Low',],
aes(x = plot_X,y = plot_Y),alpha = 0.3)
Related
I have a lm model results containing R2 and pvalue, and I plotted them in a bar plot. I have then facetted them using two discrete variables.
I want to put * on the top of bars to flag statistical significance (pvlue <= 0.05), as shown on the bottom-left-most panel of the below image.
I have not found an insightful tutorial on how to do this.
Any way to do this, please?
Here is some code I used
> head(res_all_s2)
WI aggre_per Season yield_level slope Intercept r.squared
1 R IDW2 Dec Season2 Region II -7.06 6091 0.41
2 R IDW2 Dec Season2 Region I -7.29 6280 0.40
3 GDD AS OND Season2 Region II 14.23 -18270 0.34
4 GDD AS Nov Season2 Region II 36.84 -14760 0.33
5 SPI1 IDW2 Dec Season2 Region II -405.10 5358 0.31
6 SPI1 IDW2 Dec Season2 Region I -421.70 5523 0.32
adj.r.squared fstatistic.value pval pearson
1 0.36 9.58 0.01 -0.64
2 0.36 9.49 0.01 -0.64
3 0.29 7.09 0.02 0.58
4 0.28 6.97 0.02 0.58
5 0.26 6.40 0.02 -0.56
6 0.27 6.51 0.02 -0.56
> # significance (pval <= 0.05)
> signif_reg <- res_all_s2 %>% filter(pval <= 0.05)
> head(signif_reg)
WI aggre_per Season yield_level slope Intercept r.squared
1 R IDW2 Dec Season2 Region II -7.06 6091 0.41
2 R IDW2 Dec Season2 Region I -7.29 6280 0.40
3 GDD AS OND Season2 Region II 14.23 -18270 0.34
4 GDD AS Nov Season2 Region II 36.84 -14760 0.33
5 SPI1 IDW2 Dec Season2 Region II -405.10 5358 0.31
6 SPI1 IDW2 Dec Season2 Region I -421.70 5523 0.32
adj.r.squared fstatistic.value pval pearson
1 0.36 9.58 0.01 -0.64
2 0.36 9.49 0.01 -0.64
3 0.29 7.09 0.02 0.58
4 0.28 6.97 0.02 0.58
5 0.26 6.40 0.02 -0.56
6 0.27 6.51 0.02 -0.56
>
> # Plot R2
>
> r <- res_all_s2 %>% ggplot(aes(x=aggre_per,
+ y=r.squared )) +
+ geom_bar(stat="identity", width=0.8) +
+ facet_grid(yield_level ~ WI,
+ scales = "free_y",
+ switch = "y") +
+ scale_y_continuous(limits = c(0, 1)) +
+ xlab("Aggregation period") +
+ ylab(expression(paste("R-squared"))) +
+ theme_bw() +
+ theme(axis.title = element_text(size = 12), # all titles
+ axis.text = element_text(colour = "black"),
+ axis.text.x = element_text(angle = 90, vjust = 0.5,
+ hjust = 1, color = "black"),
+ strip.text.y.left = element_text(angle = 0),
+ panel.border = element_rect(color = "black",
+ size = .5))
> r
And, here is the link to my res_all_s2 dataset https://1drv.ms/u/s!Ajl_vaNPXhANgckJeqDKA0fzfFEbhg?e=VfoFaB
Technically, you can always add an appropriate geom with its independent dataset (that would be your data filtered to exclude pval > .05):
df_filtered <- res_all_s2 %>% filter(...)
## ggplot(...) +
geom_point(data = df_filtered, pch = 8)
## pch = point character, no. 8 = asterisk
or
## ... +
geom_text(data = df_filtered, aes(label = '*'), nudge_y = .05)
## nudge_y = vertical offset
or color only significant columns:
## ... +
geom_col(aes(fill = c('grey','red')[1 + pval <= .05]))
So, yes, technically that's feasible. But before throwing the results of 13 x 7 x 5 = 455 linear models at your audience, please consider the issues of p-hacking, the benefits of multivariate analysis and the viewers' ressources ;-)
I have a data set of UK earthquakes that I want to plot by location on a map. (Hopefully I then want to change the size to be representative of the magnitude). I have made a map of the uk using ggmap, but I am struggling to then add the points to a map.
I however keep getting 2 errors, and cannot plot my points on the map. The errors are either
- Error: Aesthetics must be either length 1 or the same as the data (990): x, y
or
- Error in FUN(X[[i]], ...) : object 'group' not found
depending on how I try to plot the points.
this is what I have so far:
table <- data.frame(long2, lat2, mag1)
table
long2 lat2 mag1
1 -2.62 52.84 1.9
2 1.94 57.03 4.2
3 -0.24 51.16 0.6
4 -2.34 53.34 0.8
5 -3.16 55.73 2.0
6 -0.24 51.16 1.0
7 -4.11 53.03 1.5
8 -0.24 51.16 0.2
9 -0.24 51.16 1.1
10 -5.70 57.08 1.6
11 -2.40 53.00 1.4
12 -1.19 53.35 1.2
13 -1.02 53.84 1.7
14 -4.24 52.62 0.8
15 -3.23 54.24 0.3
16 -2.06 52.62 1.0
17 1.63 54.96 1.7
18 -5.24 56.05 0.7
19 -5.86 55.84 1.3
20 -3.22 54.23 0.3
21 -0.24 51.16 -1.4
22 -0.24 51.16 -0.7
23 -4.01 55.92 0.3
24 -5.18 50.08 2.3
25 -1.95 54.44 1.0
library(ggplot2)
library(maps)
w <- map_data("world", region = "uk")
uk <- ggplot(data = w, aes(x = long, y = lat, group=group)) + geom_polygon(fill = "seagreen2", colour="white") + coord_map()
uk + geom_point(data=table, aes(x=long2, y=lat2, colour="red", size=2), position="jitter", alpha=I(0.5))
Is it the way I have built my map, or how I am plotting my points? And how do I fix it?
I've made three changes to your code, and one or more of them solved the problems you were having. I'm not sure exactly which—feel free to experiment!
I named your data pdat (point data) instead of table. table is the name of a built-in R function, and it's best to avoid using it as a variable name.
I have placed both data= expressions inside the geom function that needs that data (instead of placing the data= and aes() inside the initial ggplot() call.) When I use two or more data.frames in a single plot, I do this defensively and find that it avoids many problems.
I have moved colour="red" and size=2 outside of the aes() function. aes() is used to create an association between a column in your data.frame and a visual attribute of the plot. Anything that's not a name of a column doesn't belong inside aes().
# Load data.
pdat <- read.table(header=TRUE,
text="long2 lat2 mag1
1 -2.62 52.84 1.9
2 1.94 57.03 4.2
3 -0.24 51.16 0.6
4 -2.34 53.34 0.8
5 -3.16 55.73 2.0
6 -0.24 51.16 1.0
7 -4.11 53.03 1.5
8 -0.24 51.16 0.2
9 -0.24 51.16 1.1
10 -5.70 57.08 1.6
11 -2.40 53.00 1.4
12 -1.19 53.35 1.2
13 -1.02 53.84 1.7
14 -4.24 52.62 0.8
15 -3.23 54.24 0.3
16 -2.06 52.62 1.0
17 1.63 54.96 1.7
18 -5.24 56.05 0.7
19 -5.86 55.84 1.3
20 -3.22 54.23 0.3
21 -0.24 51.16 -1.4
22 -0.24 51.16 -0.7
23 -4.01 55.92 0.3
24 -5.18 50.08 2.3
25 -1.95 54.44 1.0")
library(ggplot2)
library(maps)
w <- map_data("world", region = "uk")
uk <- ggplot() +
geom_polygon(data = w,
aes(x = long, y = lat, group = group),
fill = "seagreen2", colour = "white") +
coord_map() +
geom_point(data = pdat,
aes(x = long2, y = lat2),
colour = "red", size = 2,
position = "jitter", alpha = 0.5)
ggsave("map.png", plot=uk, height=4, width=6, dpi=150)
I have the following exemplary data:
88 0 -3.944 669.8 6.33 637.55 setosa
60 0 -3.477 651.81 6.19 618.55 setosa
4.4 0.001 -2.944 570.7 6.28 544.49 setosa
5000 0.003 -2.585 420.52 5.27 404.39 setosa
116 0.004 -2.365 761.97 6.18 714.59 setosa
300 0.008 -2.079 731.9 5.59 690.57 setosa
70 0.011 -1.942 761.97 6.36 714.59 setosa
121 0.014 -1.852 775.95 5.71 730.59 versicolor
55 0.02 -1.699 681.88 5.64 638.54 versicolor
92.1 0.028 -1.549 653.87 6.25 610.53 versicolor
75 0.041 -1.384 653.83 5.39 614.52 versicolor
20 0.065 -1.187 711.95 6.8 662.56 versicolor
10000 0.075 -1.125 394.48 4.95 380.37 virginica
130 0.085 -1.073 779 7.23 732.63 virginica
400 0.097 -1.012 662.79 5.61 628.52 virginica
99 0.111 -0.954 864.54 9.88 814.14 virginica
400 0.135 -0.87 869.49 7.06 816.06 virginica
I have the following code:
iris=read.table('file', header=FALSE)
log.ir <- (iris[, 1:6])
ir.species <- iris[, 7]
ir.pca <- prcomp(log.ir, center = TRUE, scale. = TRUE)
library(devtools)
library(ggbiplot)
g <- ggbiplot(ir.pca, obs.scale = 1, var.scale = 1, groups = ir.species, ellipse = TRUE, circle = FALSE, varname.size=0)
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal', legend.position = 'top')
print(g)
My question is "How can I modify it so the arrows are not drawn?"
So I had to install ggbiplot via devtools and manually update package::digest before I could get your example code to reproduce, but var.axes will do the trick:
g <- ggbiplot(ir.pca, obs.scale = 1, var.scale = 1, groups = ir.species,
ellipse = TRUE, circle = FALSE, varname.size=0, var.axes = F)
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal', legend.position = 'top')
I have a small table of summary data with the odds ratio, upper and lower confidence limits for four categories, with six levels within each category. I'd like to produce a chart using ggplot2 that looks similar to the usual one created when you specify a lm and it's se, but I'd like R just to use the pre-specified values I have in my table. I've managed to create the line graph with error bars, but these overlap and make it unclear. The data look like this:
interval OR Drug lower upper
14 0.004 a 0.002 0.205
30 0.022 a 0.001 0.101
60 0.13 a 0.061 0.23
90 0.22 a 0.14 0.34
180 0.25 a 0.17 0.35
365 0.31 a 0.23 0.41
14 0.84 b 0.59 1.19
30 0.85 b 0.66 1.084
60 0.94 b 0.75 1.17
90 0.83 b 0.68 1.01
180 1.28 b 1.09 1.51
365 1.58 b 1.38 1.82
14 1.9 c 0.9 4.27
30 2.91 c 1.47 6.29
60 2.57 c 1.52 4.55
90 2.05 c 1.31 3.27
180 2.422 c 1.596 3.769
365 2.83 c 1.93 4.26
14 0.29 d 0.04 1.18
30 0.09 d 0.01 0.29
60 0.39 d 0.17 0.82
90 0.39 d 0.2 0.7
180 0.37 d 0.22 0.59
365 0.34 d 0.21 0.53
I have tried this:
limits <- aes(ymax=upper, ymin=lower)
dodge <- position_dodge(width=0.9)
ggplot(data, aes(y=OR, x=days, colour=Drug)) +
geom_line(stat="identity") +
geom_errorbar(limits, position=dodge)
and searched for a suitable answer to create a pretty plot, but I'm flummoxed!
Any help greatly appreciated!
You need the following lines:
p<-ggplot(data=data, aes(x=interval, y=OR, colour=Drug)) + geom_point() + geom_line()
p<-p+geom_ribbon(aes(ymin=data$lower, ymax=data$upper), linetype=2, alpha=0.1)
Here is a base R approach using polygon() since #jmb requested a solution in the comments. Note that I have to define two sets of x-values and associated y values for the polygon to plot. It works by plotting the outer perimeter of the polygon. I define plot type = 'n' and use points() separately to get the points on top of the polygon. My personal preference is the ggplot solutions above when possible since polygon() is pretty clunky.
library(tidyverse)
data('mtcars') #built in dataset
mean.mpg = mtcars %>%
group_by(cyl) %>%
summarise(N = n(),
avg.mpg = mean(mpg),
SE.low = avg.mpg - (sd(mpg)/sqrt(N)),
SE.high =avg.mpg + (sd(mpg)/sqrt(N)))
plot(avg.mpg ~ cyl, data = mean.mpg, ylim = c(10,30), type = 'n')
#note I have defined c(x1, x2) and c(y1, y2)
polygon(c(mean.mpg$cyl, rev(mean.mpg$cyl)),
c(mean.mpg$SE.low,rev(mean.mpg$SE.high)), density = 200, col ='grey90')
points(avg.mpg ~ cyl, data = mean.mpg, pch = 19, col = 'firebrick')
I have the folloing R code which visualizes a multiline graph where each line corresponds to a category of data. In the code the categories are given my the variable nk:
My dataset looks like this :
k precision recall
0.25 0.02 1.011
0.25 0.04 1.011
0.5 0.15 0.941
0.5 0.17 0.931
0.5 0.18 0.921
0.5 0.19 0.911
1.0 0.36 0.831
1.0 0.39 0.811
1.0 0.41 0.801
The problem is that it only visualizes the lines for k = 1.0 and not the lines for k = 0.5 and 0.25
My question is ? How can i use a nk variable which is not
an integer in order to visualize lines for k = 0.5 or 0.25?
dtf$k <- as.numeric(dtf$k)
nk <- max(dtf$k)
xrange <- range(dtf$precision)
yrange <- range(dtf$recall)
plot(xrange, yrange,
type="n",
xlab="Precision",
ylab="Recall"
)
colors <- rainbow(nk)
linetype <- c(1:nk)
plotchar <- seq(18, 18+nk, 1)
for (i in 1:nk) {
Ki <- subset(dtf, k==i)
lines(Ki$precision, Ki$recall,
type="b",
lwd=2,
lty=linetype[i],
col=colors[i],
pch=plotchar[i]
)
}
title("Methods varying K", "Precision Recall")
legend(xrange[1], yrange[2],
1:nk,
cex=1.0,
col=colors,
inset=c(-0.2,0),
pch=plotchar,
lty=linetype,
title="k"
)
Data
dtf <- read.table(header = TRUE, text = 'k precision recall
0.25 0.02 1.011
0.25 0.04 1.011
0.5 0.15 0.941
0.5 0.17 0.931
0.5 0.18 0.921
0.5 0.19 0.911
1.0 0.36 0.831
1.0 0.39 0.811
1.0 0.41 0.801')
dtf$k <- factor(dtf$k)
ggplot2 solution
require(ggplot2)
ggplot(dtf, aes(x = precision, y = recall, col = k)) +
geom_line()
base solution
plot(recall ~ precision, data = dtf, type = 'n')
cols = c('red', 'blue', 'green')
levs <- levels(df$k)
for(i in seq_along(levs)){
take <- df[df$k == levs[i], ]
lines(take$precision, take$recall, col = cols[i])
}