Symmetric y-axis limits for barchart in ggplot2 - r

I would like to make the y-axis of a bar chart symmetric, so that it's easier to see if positive or negative changes are bigger. Since otherwise this is a bit distorted. I do have working code although it's a bit clumsy and I thought it would be great if I could directly do this in the first ggplot() call. So as to say that ylim directly is symmetrical.
set.seed(123)
my.plot <- ggplot( data = data.table(x = 1:10,
y = rnorm(10,0, 2)), aes(x=x, y=y)) +
geom_bar(stat="identity")
rangepull <- layer_scales(my.plot)$y
newrange <- max(abs(rangepull$range$range))
my.plot +
ylim(newrange*-1, newrange)

What about this :
library(ggplot2)
library(data.table)
set.seed(123)
my.data = data.table(x = 1:10, y = rnorm(10,0, 2))
my.plot <- ggplot(data = my.data)+aes(x=x, y=y) +
geom_bar(stat="identity")+ylim((0-abs(max(my.data$y))),(0+max(abs(my.data$y))))
my.plot

You may want to consider using ceiling:
set.seed(123)
library(ggplot2)
library(data.table)
dT <- data.table(x = 1:10, y = rnorm(10,0, 2))
my.plot <- ggplot(dT, aes(x=x, y=y)) +
geom_bar(stat="identity") +
ylim(-ceiling(max(abs(dT$y))), ceiling(max(abs(dT$y))))
This will give you:
> my.plot

Related

Smooth 2D surface

I am wondering about the possibility to smooth the plot or make it somehow better, since now the pixels are too big.
library(ggplot2)
library(reshape2)
# plot2d = melt(c)
plot2d = melt(matrix(rnorm(20), 5)) # fake data
names(plot2d) <- c("x", "y", "z")
v <- ggplot(plot2d, aes(x, y, z = z))
v + geom_tile(aes(fill = z)) +
scale_alpha_continuous(limits=c(start.point, end.point)) +
scale_fill_gradient2('TYYYT',low="green", mid = "white", high="red")
library(ggplot2)
library(reshape2)
set.seed(101)
## set dimnames so that melt() picks them up
m <- matrix(rnorm(20),5,dimnames=list(x=1:5,y=1:4))
plot2d_1 <- melt(m,value.name="z")
gg0 <- ggplot(plot2d_1, aes(x,y,z=z,fill=z))
The easiest way to smooth this plot is to use geom_raster() with interpolate=TRUE (see ?geom_tile for other advantages).
gg0 + geom_raster(interpolate=TRUE)
You can also do (bilinear) interpolation by hand, using the fields package (there are lots of options: e.g. library(sos); findFn("{bilinear interpolation}").
library(fields)
m2 <- interp.surface.grid(list(x=1:5,y=1:4,z=m),
grid.list=list(x=seq(1,5,length=101),
y=seq(1,4,length=101)))
dimnames(m2$z) <- list(x=m2$x,y=m2$y)
Now melt it and replot:
plot2d_2 <- melt(m2,value.name="z")
gg0 %+% plot2d_2 + geom_tile()
Hmm, the interpolation seems to have changed the z-scale - you should be careful with that ...

Setting z-limits in ggplot2 `geom_hex()`

Is there a way to manually set z-limits in ggplot2 2d bin functions such as geom_hexbin()?
For example;
library(ggplot2)
dat <- data.frame(
x = rnorm(1000),
y = rnorm(1000)
)
ggplot(dat, aes(x, y)) +
geom_hex()
Is there a way to manually set the z-limits? The intended effect is to manually control the points at which the colour scales begin and end.
Thank you!
For extra points, I would also like to change the colours of the colour scale.
You want scale_fill_gradient:
library(ggplot2)
dat <- data.frame(
x = rnorm(1000),
y = rnorm(1000)
)
ggplot(dat, aes(x, y)) +
geom_hex() +
scale_fill_gradient(limits = c(1, 2))

How to plot three point lines using ggplot2 instead of the default plot in R

I have three matrix and I want to plot the graph using ggplot2. I have the data below.
library(cluster)
require(ggplot2)
require(scales)
require(reshape2)
data(ruspini)
x <- as.matrix(ruspini[-1])
w <- matrix(W[4,])
df <- melt(data.frame(max_Wmk, min_Wmk, w, my_time = 1:10), id.var = 'my_time')
ggplot(df, aes(colour = variable, x = my_time, y = value)) +
geom_point(size = 3) +
geom_line() +
scale_y_continuous(labels = comma) +
theme_minimal()
I want to add the three plots into one plot using a beautiful ggplot2.
Moreover, I want to make the points with different values have different colors.
I'm not quite sure what you're after, here's a guess
Your data...
max <- c(175523.9, 33026.97, 21823.36, 12607.78, 9577.648, 9474.148, 4553.296, 3876.221, 2646.405, 2295.504)
min <- c(175523.9, 33026.97, 13098.45, 5246.146, 3251.847, 2282.869, 1695.64, 1204.969, 852.1595, 653.7845)
w <- c(175523.947, 33026.971, 21823.364, 5246.146, 3354.839, 2767.610, 2748.689, 1593.822, 1101.469, 1850.013)
Slight modification to your base plot code to make it work...
plot(1:10,max,type='b',xlab='Number',ylab='groups',col=3)
points(1:10,min,type='b', col=2)
points(1:10,w,type='b',col=1)
Is this what you meant?
If you want to reproduce this with ggplot2, you might do something like this...
# ggplot likes a long table, rather than a wide one, so reshape the data, and add the 'time' variable explicitly (ie. my_time = 1:10)
require(reshape2)
df <- melt(data.frame(max, min, w, my_time = 1:10), id.var = 'my_time')
# now plot, with some minor customisations...
require(ggplot2); require(scales)
ggplot(df, aes(colour = variable, x = my_time, y = value)) +
geom_point(size = 3) +
geom_line() +
scale_y_continuous(labels = comma) +
theme_minimal()
UPDATE after the question was edited and the example data changed, here's an edit to suit the new example data:
Here's your example data (there's scope for simplification and speed gains here, but that's another question):
library(cluster)
require(ggplot2)
require(scales)
require(reshape2)
data(ruspini)
x <- as.matrix(ruspini[-1])
wss <- NULL
W=matrix(data=NA,ncol=10,nrow=100)
for(j in 1:100){
k=10
for(i in 1: k){
wss[i]=kmeans(x,i)$tot.withinss
}
W[j,]=as.matrix(wss)
}
max_Wmk <- matrix(data=NA, nrow=1,ncol=10)
for(i in 1:10){
max_Wmk[,i]=max(W[,i],na.rm=TRUE)
}
min_Wmk <- matrix(data=NA, nrow=1,ncol=10)
for(i in 1:10){
min_Wmk[,i]=min(W[,i],na.rm=TRUE)
}
w <- matrix(W[4,])
Here's what you need to do to make the three objects into vectors so you can make the data frame as expected:
max_Wmk <- as.numeric(max_Wmk)
min_Wmk <- as.numeric(min_Wmk)
w <- as.numeric(w)
Now reshape and plot as before...
df <- melt(data.frame(max_Wmk, min_Wmk, w, my_time = 1:10), id.var = 'my_time')
ggplot(df, aes(colour = variable, x = my_time, y = value)) +
geom_point(size = 3) +
geom_line() +
scale_y_continuous(labels = comma) +
theme_minimal()
And here's the result:

Put whisker ends on boxplot

I would like to put perpendicular lines at the ends of the whiskers like the boxplot function automatically gives.
As hinted but not implemented by #Roland, you can use stat_boxplot to implement this. The trick calling _boxplot twice and is to set the geom to errorbar for one of the calls.
Note that as R uses a pen and paper approach it is advisable to implement the error bars first the draw the traditional boxplot over the top.
Using #Roland's dummy data df
ggplot(df, aes(x=cond, y = value)) +
stat_boxplot(geom ='errorbar') +
geom_boxplot() # shorthand for stat_boxplot(geom='boxplot')
The help for stat_boxplot (?stat_boxplot) detail the various values computed and saved in a data.frame
To resize the whiskers lines we can use the argument width = 0.5 inside the function: stat_boxplot
set.seed(42)
df <- data.frame(cond = factor(rep(c("A", "B"), each = 500)),
value = c(rnorm(500, mean = 1, sd = 0.2),
rnorm(500, mean = 1.5, sd = 0.1)))
library(ggplot2)
ggplot(df, aes(x = cond, y = value)) +
stat_boxplot(geom = "errorbar", width = 0.5) +
geom_boxplot()
It might be possible to use stat_boxplot to calculate the whisker ends, but I am not enough of a ggplot2 wizard, so I use the base function for that.
set.seed(42)
df <- data.frame(cond = factor( rep(c("A","B"), each=500) ),
value = c(rnorm(500,mean=1,sd=0.2),rnorm(500, mean=1.5,sd=0.1)))
whisk <- function(df,cond_col=1,val_col=2) {
require(reshape2)
condname <- names(df)[cond_col]
names(df)[cond_col] <- "cond"
names(df)[val_col] <- "value"
b <- boxplot(value~cond,data=df,plot=FALSE)
df2 <- cbind(as.data.frame(b$stats),c("min","lq","m","uq","max"))
names(df2) <- c(levels(df$cond),"pos")
df2 <- melt(df2,id="pos",variable.name="cond")
df2 <- dcast(df2,cond~pos)
names(df2)[1] <- condname
df2
}
library(ggplot2)
plot1 <- ggplot(df, aes(x=cond))
plot1 <- plot1 + geom_errorbar(aes(ymin=min,ymax=max),data=whisk(df),width = 0.5)
plot1 <- plot1 + geom_boxplot(aes(y=value))
plot1

ggplot2 Scatter Plot Labels

I'm trying to use ggplot2 to create and label a scatterplot. The variables that I am plotting are both scaled such that the horizontal and the vertical axis are plotted in units of standard deviation (1,2,3,4,...ect from the mean). What I would like to be able to do is label ONLY those elements that are beyond a certain limit of standard deviations from the mean. Ideally, this labeling would be based off of another column of data.
Is there a way to do this?
I've looked through the online manual, but I haven't been able to find anything about defining labels for plotted data.
Help is appreciated!
Thanks!
BEB
Use subsetting:
library(ggplot2)
x <- data.frame(a=1:10, b=rnorm(10))
x$lab <- letters[1:10]
ggplot(data=x, aes(a, b, label=lab)) +
geom_point() +
geom_text(data = subset(x, abs(b) > 0.2), vjust=0)
The labeling can be done in the following way:
library("ggplot2")
x <- data.frame(a=1:10, b=rnorm(10))
x$lab <- rep("", 10) # create empty labels
x$lab[c(1,3,4,5)] <- LETTERS[1:4] # some labels
ggplot(data=x, aes(x=a, y=b, label=lab)) + geom_point() + geom_text(vjust=0)
Subsetting outside of the ggplot function:
library(ggplot2)
set.seed(1)
x <- data.frame(a = 1:10, b = rnorm(10))
x$lab <- letters[1:10]
x$lab[!(abs(x$b) > 0.5)] <- NA
ggplot(data = x, aes(a, b, label = lab)) +
geom_point() +
geom_text(vjust = 0)
Using qplot:
qplot(a, b, data = x, label = lab, geom = c('point','text'))

Resources