I have got a df with over 400 datapoints and want to colour those accourding to the RColorBrewer package with the palette = "Blues".
I plotted my data and even expanded the color input maximum of the palette to the length of my data points to avoid getting "Error messages" (see below), but the colors in the plot aren't changing (only a black line).
Error: Aesthetics must be either length 1 or the
same as the data (427): fill
I've created a dummy df to make my problem reproducible:
library(ggplot2)
library(RColorBrewer)
color = colorRampPalette(rev(brewer.pal(n = 9, name = "Blues")))(300)
df = (curve(3*x^2 + x, from=1, to=10, n=300, xlab="xvalue", ylab="yvalue",
col="blue", lwd=2, main="Plot of (3x^2 + x)"))
dfx = matrix(data = df$x, ncol = 1)
dfy = matrix(data = df$y, ncol = 1)
dfa = cbind(dfx,dfy)
DF = ggplot(dfa, aes(x = dfx, y = dfy)) +
geom_point(fill = color)
I expect the curve to change into a light blue at the start (1,4) with an increase in darkness till the end (dark blue at the end (10,310)).
Thanks in advance!
color_seq = colorRampPalette(brewer.pal(n = 9, name = "Blues"))(300)
df = (curve(3*x^2 + x, from=1, to=10, n=300, xlab="xvalue", ylab="yvalue",
col="blue", lwd=2, main="Plot of (3x^2 + x)"))
dfx = matrix(data = df$x, ncol = 1)
dfy = matrix(data = df$y, ncol = 1)
dfa = as.data.frame(cbind(dfx , dfy, color_seq), stringsAsFactors = FALSE)
dfa$V1 <- as.numeric(dfa$V1) ## convert both to numeric so the scale is continous
dfa$V2 <- as.numeric(dfa$V2)
ggplot(dfa, aes(x = V1, y = V2, color = color_seq)) +
geom_point() +
scale_color_identity()
exDF <- data.frame(dataX = seq(1, 10, .1),
dataY = sapply(seq(1, 10, .1), function(x) 3*x^2 + x))
exDFcolors <- colorRampPalette(brewer.pal(9, "Blues"))(nrow(exDF))
ggplot(exDF, aes(dataX, dataY)) +
geom_line(size = 2, color = exDFcolors)
Related
I'm trying to re-create a plot like this in ggplot:.
This graph takes the residuals from a regression output, and plots them in order (with the X-axis being a rank of residuals).
My best attempt at this was something like the following:
library(ggplot2)
library(modelr)
d <- d %>% add_residuals(mod1, var = "resid")
d$resid_rank <- rank(d$resid)
ggplot(data = d, aes(x = resid_rank, y = resid)) +
geom_bar(stat="identity") +
theme_bw()
However, this yields a completely blank graph. I tried something like this:
ggplot(data = d, aes(x = resid_rank, y = resid)) +
geom_segment(yend = 0, aes(xend=resid)) +
theme_bw()
But this yields the segments that go in the wrong direction. What is the right way to do this, and to color those lines by a third factor?
FAKE DATASET:
library(estimatr)
library(fabricatr)
#simulation
dat <- fabricate(
N = 10000,
y = runif(N, 0, 10),
x = runif(N, 0, 100)
)
#add an outlier
dat <- rbind(dat, c(300, 5))
dat <- rbind(dat, c(500, 3))
dat$y_log <- log(dat$y)
dat$x_log <- log(dat$x)
dat$y_log_s <- scale(log(dat$y))
dat$x_log_s <- scale(log(dat$x))
mod1 <- lm(y_log ~ x_log, data = dat))
I used the build in dataset from the help page on lm() to create this example. I also just directly used resid() to get the residuals. It's unclear where / why the colored bars would be different, but basically you'd need to add a column to your data.frame that specificies why they are red or blue, then pass that to fill.
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 3.4.4
#example from lm
ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
group <- gl(2, 10, 20, labels = c("Ctl","Trt"))
weight <- c(ctl, trt)
lm.D9 <- lm(weight ~ group)
resids <- data.frame(resid = resid(lm.D9))
#why are some bars red and some blue? No clue - so I'll pick randomly
resids$group <- sample(c("group 1", "group 2"), nrow(resids), replace = TRUE)
#rank
resids$rank <- rank(-1 * resids$resid)
ggplot(resids, aes(rank, resid, fill = group)) +
geom_bar(stat = "identity", width = 1) +
geom_hline(yintercept = c(-1,1), colour = "darkgray", linetype = 2) +
geom_hline(yintercept = c(-2,2), colour = "lightgray", linetype = 1) +
theme_bw() +
theme(panel.grid = element_blank()) +
scale_fill_manual(values = c("group 1" = "red", "group 2" = "blue"))
Created on 2019-01-24 by the reprex package (v0.2.1)
I just encountered such graph attached where two colors of geom_point are used (I believe it is made by ggplot2). Similarly, I would like to have dots of one color to range from size 1 to 5, and have another color for a series of dots for the range 10 to 50. I have however no clue on how to add two different ranges of point in one graph.
At the basic step I have:
a <- c(1,2,3,4,5)
b <- c(10,20,30,40,50)
Species <- factor(c("Species1","Species2","Species3","Species4","Species5"))
bubba <- data.frame(Sample1=a,Sample2=b,Species=Species)
bubba$Species=factor(bubba$Species, levels=bubba$Species)
xm=melt(bubba,id.vars = "Species", variable.name="Samples", value.name = "Size")
str(xm)
ggplot(xm,aes(x= Samples,y= fct_rev(Species)))+geom_point(aes(size=Size))+scale_size(range = range(xm$Size))+theme_bw()
Any would have clues where I should look into ? Thanks!
I've got an approach that gets 90% of the way there, but I'm not sure how to finish the deed. To get a single legend for size, I used a transformation to convert input size to display size. That makes the legend appearance conform to the display. What I don't have figured out yet is how to apply a similar transformation to the fill so that both can be integrated into the same legend.
Here's the transformation, which in this case shrinks everything 10 or more:
library(scales)
shrink_10s_trans = trans_new("shrink_10s",
transform = function(y){
yt = if_else(y >= 10, y*0.1, y)
return(yt)
},
inverse = function(yt){
return(yt) # Not 1-to-1 function, picking one possibility
}
)
Then we can use this transformation on the size to selectively shink only the dots that are 10 or larger. This works out nicely for the legend, aside from integrating the fill encoding with the size encoding.
ggplot(xm,aes(x= Samples,y= fct_rev(Species), fill = Size < 10))+
geom_point(aes(size=Size), shape = 21)+
scale_size_area(trans = shrink_10s_trans, max_size = 10,
breaks = c(1,2,3,10,20,30,40),
labels = c(1,2,3,10,20,30,40)) +
scale_fill_manual(values = c(rgb(136,93,100, maxColorValue = 255),
rgb(236,160,172, maxColorValue = 255))) +
theme_bw()
a <- c(1, 2, 3, 4, 5)
b <- c(10, 20, 30, 40, 50)
Species <- factor(c("Species1", "Species2", "Species3", "Species4", "Species5"))
bubba <- data.frame(Sample1 = a, Sample2 = b, Species = Species)
bubba$Species <- factor(bubba$Species, levels = bubba$Species)
xm <- reshape2::melt(bubba, id.vars = "Species", variable.name = "Samples", value.name = "Size")
ggplot(xm, aes(x = Samples, y = fct_rev(Species))) +
geom_point(aes(size = Size, color = Size)) +
scale_color_continuous(breaks = c(1,2,3,10,20,30), guide = guide_legend()) +
scale_size(range = range(xm$Size), breaks = c(1,2,3,10,20,30)) +
theme_bw()
Here's a cludge. I haven't got time to figure out the legend at the moment. Note that 1 and 10 are the same size, but a different colour, as are 3 and 40.
# Create data frame
a <- c(1, 2, 3, 4, 5)
b <- c(10, 20, 30, 40, 50)
Species <- factor(c("Species1", "Species2", "Species3", "Species4", "Species5"))
bubba <- data.frame(Sample1 = a, Sample2 = b, Species = Species)
# Restructure data
xm <- reshape2::melt(bubba, id.vars = "Species", variable.name = "Samples", value.name = "Size")
# Calculate bubble size
bubble_size <- function(val){
ifelse(val > 3, (1/15) * val + (1/3), val)
}
# Calculate bubble colour
bubble_colour <- function(val){
ifelse(val > 3, "A", "B")
}
# Calculate bubble size and colour
xm %<>%
mutate(bub_size = bubble_size(Size),
bub_col = bubble_colour(Size))
# Plot data
ggplot(xm, aes(x = Samples, y = fct_rev(Species))) +
geom_point(aes(size = bub_size, fill = bub_col), shape = 21, colour = "black") +
theme(panel.grid.major = element_line(colour = alpha("gray", 0.5), linetype = "dashed"),
text = element_text(family = "serif"),
legend.position = "none") +
scale_size(range = c(1, 20)) +
scale_fill_manual(values = c("brown", "pink")) +
ylab("Species")
I think you are looking for bubble plots in R
https://www.r-graph-gallery.com/bubble-chart/
That said, you probably want to build the right and left the side of the graphic separately and then combine.
So, I've a t-dist plot created in R using curve and adding on the polygons onto that. It gives me a basic looking plot.
What I need is a more good looking plot where
X-axis starts from -6
Y-axis starts from 0
Background of the plot(except under the curve) is filled with some color which I need
I think I need to use the ggplot2 package for this, so answers based on ggplot2 usage is what I need. Or any answer that would return me that output is appreciated.
Here is my code
curve(dt(x, df = 7), from = -6, to = 6)
x <- seq(-1.96, -6, len = 100)
y <- dt(x, 7)
x1 <- seq(1.96, 6, len = 100)
y1 <- dt(x1, 7)
polygon(c(x1[1], x1, x1[100]), c(dt(-6, 7), y1, dt(6, 7)),
col = "#b14025", border = "black")
polygon(c(x[1], x, x[100]), c(dt(-6, 7), y, dt(6, 7)),
col = "#b14025", border = "black")
First Image is the current Output
Second Image is what I think it should look like
Here is one way to obtain a similar result using the ggplot2 package:
library(ggplot2)
dt_tails <- function(x){
y <- dt(x,7)
y[abs(x) < 1.96] <- NA
return(y)
}
dt_7 <- function(x) dt(x,7)
p <- ggplot(data.frame(x=c(-6,6)),aes(x=x)) +
stat_function(fun=dt_7, geom="area", fill="white", colour="black")
p <- p + stat_function(fun=dt_tails, geom="area", fill='#b14025')
p <- p + theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
panel.background=element_rect(fill="#eae9c8") )
plot(p)
Since you expected a ggplot answer, just add + theme(panel.background = element_rect(fill = "yellow")) to your plot or what ever color you like.
I finally managed to do it with the base plotting functions only.
For Shading the area outside curve: I just added one more polygon tracing the area outside the curve.
For fixing the graph to start at the required X and Y, I used another parameter of plot function xaxs & yaxs from this Link
Here is my attached code
curve(dt(x, df = 7), from = -6, to = 6,xaxs="i",yaxs="i",ylim=c(0,0.4))
t = seq(-6,6,len = 100)
yt = dt(t,7)
x <- seq(-1.96, -6, len = 100)
y <- dt(x, 7)
x1 <- seq(1.96, 6, len = 100)
y1 <- dt(x1, 7)
polygon(x = c(-6,-6,t,6,6),
y = c(0.4,0,yt,0,0.4),
col = "#eae9c8",
border = "black")
polygon(x = c(x1[1], x1, x1[100]),
y = c(dt(-6, 7), y1, dt(6, 7)),
col = "#b14025",
border = "black")
polygon(x = c(x[1], x, x[100]),
y = c(dt(-6, 7), y, dt(6, 7)),
col = "#b14025",
border = "black")
Here is the attached output
I have this dataframe:
set.seed(1)
x <- c(rnorm(50, mean = 1), rnorm(50, mean = 3))
y <- c(rep("site1", 50), rep("site2", 50))
xy <- data.frame(x, y)
And I have made this density plot:
library(ggplot2)
ggplot(xy, aes(x, color = y)) + geom_density()
For site1 I need to shade the area under the curve that > 1% of the data. For site2 I need to shade the area under the curve that < 75% of the data.
I'm expecting the plot to look something like this (photoshopped). Having been through stack overflow, I'm aware that others have asked how to shade part of the area under a curve, but I cannot figure out how to shade the area under a curve by group.
Here is one way (and, as #joran says, this is an extension of the response here):
# same data, just renaming columns for clarity later on
# also, use data tables
library(data.table)
set.seed(1)
value <- c(rnorm(50, mean = 1), rnorm(50, mean = 3))
site <- c(rep("site1", 50), rep("site2", 50))
dt <- data.table(site,value)
# generate kdf
gg <- dt[,list(x=density(value)$x, y=density(value)$y),by="site"]
# calculate quantiles
q1 <- quantile(dt[site=="site1",value],0.01)
q2 <- quantile(dt[site=="site2",value],0.75)
# generate the plot
ggplot(dt) + stat_density(aes(x=value,color=site),geom="line",position="dodge")+
geom_ribbon(data=subset(gg,site=="site1" & x>q1),
aes(x=x,ymax=y),ymin=0,fill="red", alpha=0.5)+
geom_ribbon(data=subset(gg,site=="site2" & x<q2),
aes(x=x,ymax=y),ymin=0,fill="blue", alpha=0.5)
Produces this:
The problem with #jlhoward's solution is that you need to manually add goem_ribbon for each group you have. I wrote my own ggplot stat wrapper following this vignette. The benefit of this is that it automatically works with group_by and facet and you don't need to manually add geoms for each group.
StatAreaUnderDensity <- ggproto(
"StatAreaUnderDensity", Stat,
required_aes = "x",
compute_group = function(data, scales, xlim = NULL, n = 50) {
fun <- approxfun(density(data$x))
StatFunction$compute_group(data, scales, fun = fun, xlim = xlim, n = n)
}
)
stat_aud <- function(mapping = NULL, data = NULL, geom = "area",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, n = 50, xlim=NULL,
...) {
layer(
stat = StatAreaUnderDensity, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(xlim = xlim, n = n, ...))
}
Now you can use stat_aud function just like other ggplot geoms.
set.seed(1)
x <- c(rnorm(500, mean = 1), rnorm(500, mean = 3))
y <- c(rep("group 1", 500), rep("group 2", 500))
t_critical = 1.5
tibble(x=x, y=y)%>%ggplot(aes(x=x,color=y))+
geom_density()+
geom_vline(xintercept = t_critical)+
stat_aud(geom="area",
aes(fill=y),
xlim = c(0, t_critical),
alpha = .2)
tibble(x=x, y=y)%>%ggplot(aes(x=x))+
geom_density()+
geom_vline(xintercept = t_critical)+
stat_aud(geom="area",
fill = "orange",
xlim = c(0, t_critical),
alpha = .2)+
facet_grid(~y)
You need to use fill. color controls the outline of the density plot, which is necessary if you want non-black outlines.
ggplot(xy, aes(x, color=y, fill = y, alpha=0.4)) + geom_density()
To get something like that. Then you can remove the alpha part of the legend by using
ggplot(xy, aes(x, color = y, fill = y, alpha=0.4)) + geom_density()+ guides(alpha='none')
I am trying hard to figure out to add color gridient label to my plot ( link to previous question). Sorry for keep asking but this is maximum I could push me forward.
#data 1:
lab1 <- 1:10
group <- rep(1:3, each = length (lab1))
label <- rep(lab1, 3)
avar <- rep(c(0, 1, 4, 5, 6, 8, 10, 11, 12, 13), 3)
myd <- data.frame (group, label, avar)
# data 2
fillcol <- rep(rnorm(length(lab1)-1, 0.5, 0.2), 3)
group1 <- rep(1:3, each = length(fillcol)/3)
# this variable will be used to fill color in bars
filld <- data.frame(group1, fillcol)
colbarplot <- function(group) {
myd1 <- myd[myd$group == group,]
filld1 <- filld[filld$group1 == group,]
blues <- colorRampPalette(c("yellow", "blue"))
barplot(as.matrix(diff(myd1$avar)), horiz=T,
col=blues(10)[10* filld1$fillcol],
axes=F, xlab="Mark")
axis(1, labels=myd$label, at=myd$avar)
axis(3, labels=myd$avar, at=myd$avar)
}
par(mfrow = c(4, 1))
par(mar = c(2.5, 1, 2.5, 1))
sapply(unique(myd$group),function(x) colbarplot(x))
Now I am struggling to add legend, sorry for this new user.
blues <- colorRampPalette(c("yellow", "blue"))
colors <- blues(10)
count <- length(colors)
m <- matrix(1:count, count, 1)
m1 <- m
image(m, col=colors, ylab="", axes=FALSE)
I produced color scale that is not what I am expecting, I am trying plot a smaller legend, less in width and height, along with original scale use in color coding.
Here are some unsuccessful trials for labeling:
colab <- c(round (min(filld$fillcol), 2), round(max(filld$fillcol), 2))
colpos <- c(0.33 * max(mapdat$position),0.66 * max(mapdat$position))
axis(1, labels=colab, at=colpos)
Getting a decent legend is much easier with ggplot2
library(plyr)
myd$group <- factor(myd$group)
gData <- ddply(myd, .(group), function(x){
data.frame(delta = diff(x$avar), label = paste(head(x$label, -1), tail(x$label, -1), sep = "-"))
})
gData$FillCol <- rnorm(nrow(gData))
ggplot(gData, aes(x = group, y = delta, fill = FillCol, label = label)) + geom_bar(stat = "identity") + coord_flip() + scale_fill_gradient(low = "blue", high = "yellow") + geom_text(position = "stack")