Why does my ggplot have the wrong y-axis scale? - r

I'm using a concatenated version of frankbi's Price of weed data (https://github.com/frankbi/price-of-weed for the original, https://github.com/Travis-Barton/Github_code for the cleaned version). I turn the histograms of HighQ/MedQ/Lowq into density lines. Then I try to plot those density lines on top of one another in a ggplot environment, but my axes are coming out all wrong.
This is what I should have:
This is what I end up getting:
Why is ggplot scaling my x-axis? Below is my code
dat <- marijuana.street.price.clean
hist(dat$HighQ)
hist(dat$MedQ)
hist(dat$LowQ)
plot(1, ylim = c(0, .02), xlim = c(0, 800))
lines(density(dat$HighQ), lwd = 3, col = 'green', lty = 2)
lines(density(dat$MedQ), lwd = 3, col = 'yellow', lty = 2)
lines(density(dat$LowQ, na.rm = T), lwd = 3, col = 'red', lty = 2)
dat2 <- data.frame(indexH = density(dat$LowQ, na.rm = T)$x,
propH = density(dat$HighQ)$y,
indexM = density(dat$MedQ)$x,
propM = density(dat$MedQ)$y,
indexL = density(dat$LowQ, na.rm = T)$x,
propL = density(dat$LowQ, na.rm = T)$y
)
ggplot(dat2) +
geom_line(aes(y = propH, x = indexH, colour = "High")) +
geom_line(aes(y = propL, x= indexL, colour = "Low")) +
geom_line(aes(y = propM, x = indexM, colour = "Medium"))

Related

Specify model order in multiplot() function

I am using the multiplot() function in R with the coefplot() function to create a plot with coefficients from several models.
multiplot(mod1, mod2, mod3, mod4, mod5, mod6, mod7, mod8, mod9,
intercept = F, zeroType = 1,
zeroColor = 'black',
title = 'The Effect of Masculinity on Sexism Items',
ylab = 'Masculinity Index',
plot.shapes = T,
xlab = 'Sexism',
sort = 'natural',
legend.reverse = F,
names = c('Control', 'Offend', 'Prob', 'Protect', 'Rescue',
'R/W', 'Equal', 'Hire', 'Harass'),
newNames = c(masc_ind = 'Masculinity'))+
scale_color_manual(values = c('green', 'green', 'green', 'blue', 'blue',
'blue', 'red', 'red', 'red'))+
scale_shape_manual(values = c(15,16,17,18,19,0,1,2,5))
Which produces the above plot:
I noticed that the order of the models does not follow the order they are entered into the code (ie: mod1, mod2, mod3). Instead, the models appear to be listed in reverse alphabetical order. I know there are some ways to override this in ggplot() for factor level variables in a single model, but I haven't been able to find much in the way of reordering models in ggplot(). Is there a way to override this so I can list the models in a preferred order?
The difficulty with ggplot wrappers like multiplot is that often what you gain in ease-of-use, you lost in flexibility. However, the data frame that multiplot produces for its plot data is a very nice little summary table. This can be used to plot directly in ggplot however you choose.
Obviously, we don't have your data, but I have created a data set with all the same names and structure as your own (see below), so the following code should work for you:
library(coefplot)
var_names <- c('Control', 'Offend', 'Prob', 'Protect', 'Rescue',
'R/W', 'Equal', 'Hire', 'Harass')
plot_data <- multiplot(mod1, mod2, mod3, mod4, mod5, mod6, mod7, mod8, mod9,
intercept = F, zeroType = 1,
zeroColor = 'black',
title = 'The Effect of Masculinity on Sexism Items',
ylab = 'Masculinity Index',
plot.shapes = T,
xlab = 'Sexism',
sort = 'natural',
legend.reverse = F,
names = var_names,
newNames = c(masc_ind = 'Masculinity'))$data
p <- ggplot(plot_data, aes(Value, factor(Model, rev(var_names)), color = Model)) +
geom_vline(xintercept = 0) +
geom_errorbarh(aes(xmin = LowOuter, xmax = HighOuter), height = 0) +
geom_errorbarh(aes(xmin = LowInner, xmax = HighInner), height = 0, size = 1) +
geom_point(aes(shape = var_names), size = 4) +
labs(x = "Sexism", y = "Masculinity Index") +
theme(legend.position = "bottom")
p + scale_color_manual(values = setNames(c('green', 'green', 'green', 'blue',
'blue', 'blue', 'red', 'red', 'red'),
var_names), name = "Variable") +
scale_shape_manual(values = setNames(c(15, 16, 17, 18, 19, 0, 1, 2, 5),
var_names), name = "Variable")
And of course you can style this however you choose:
p +
theme_minimal(base_size = 16) +
scale_color_manual(values = setNames(rep(c("red3", "orange2",
"steelblue4"), each = 3), var_names),
name = NULL) +
scale_shape_manual(values = setNames(c(15, 16, 17, 18, 19, 0, 1, 2, 5),
var_names), name = NULL) +
labs(x = "Sexism", y = "Masculinity Index") +
theme(legend.position = "bottom")
Data used
set.seed(1)
var_names <- c('Control', 'Offend', 'Prob', 'Protect', 'Rescue',
'RW', 'Equal', 'Hire', 'Harass', 'masc_ind')
df <- setNames(as.data.frame(matrix(rbinom(500, 1, 0.5), 50)), var_names)
list2env(setNames(lapply(head(var_names, -1), function(x) {
df2 <- data.frame(masculinity = df[[x]], masc_ind = df$masc_ind)
do.call("glm",
list(formula = masc_ind ~ masculinity,
data = quote(df2),
family = quote(binomial)))
}), paste0("mod", 1:9)), globalenv())

How to add gradient to a geom_line while using multiple colour scales and date_breaks in ggplot

I want to create a ggplot that includes a gradient colour fill for a geom_line, but also includes other things like manual colour and shape overwrites, and (discrete?) date breaks.
If I only plot a geom_line with a scale_colour_gradient everything works fine:
set.seed(1)
# dataframe
df <- data.frame (UP = c(5, 5, 3, 2),
Date = c(sort(sample(seq(as.Date("2018-06-01"), as.Date("2019-06-01"), by = "day"), 4))))
# dataframe for gradient
df2 <- data.frame (UP = seq(from = df[2, 1], to = df[3, 1], length.out = 100),
Date = seq.Date(from = df[2, 2], to = df[3, 2], length.out = 100))
ggplot() +
geom_line (data = df[1:2, ], aes (Date, UP), lty = "longdash", col = "blue", size = 1) +
geom_line (data = df[3:4, ], aes (Date, UP), lty = 1, col = "red", size = 1) +
geom_point (data = df[1:2, ], aes (Date, UP), col = "blue", shape = 4, stroke = 2, size = 3) +
geom_point (data = df[3:4, ], aes (Date, UP), col = "red", size = 3) +
geom_line (data = df2, aes (Date, UP, color = as.integer(UP)), size = 1) +
scale_colour_gradient(name = "UP",
low = "red", high = "blue")
From here, I want to add a sequences of statistics that have certain colours and shapes, and also break the x-axis by years using the scales package.
set.seed(1)
# dataframe for statistics
df3 <- data.frame (Stat = c(sample(2:4, 4, replace = TRUE)),
Date = df$Date,
Shape = c("DO", "DO", "RE", "MI"))
# plot including statistics tags
p <- ggplot() +
geom_line (data = df[1:2, ], aes (Date, UP), lty = "longdash", col = "blue", size = 1) +
geom_line (data = df[3:4, ], aes (Date, UP), lty = 1, col = "red", size = 1) +
geom_point (data = df[1:2, ], aes (Date, UP), col = "blue", shape = 4, stroke = 2, size = 3) +
geom_point (data = df[3:4, ], aes (Date, UP), col = "red", size = 3) +
geom_line (data = df3, aes (Date, Stat), lty = 2, size = 1) +
geom_point (data = df3[1:2, ], aes (Date, Stat, col = Shape, shape = Shape), size = 7) +
geom_point (data = df3[3, ], aes (Date, Stat, col = Shape, shape = Shape), size = 7) +
geom_point (data = df3[4, ], aes (Date, Stat, col = Shape, shape = Shape), size = 7) +
scale_colour_manual(name=bquote(bold ("TITLE")),
values= c("red3", "black", "springgreen4"),
labels = c("DO","RE", "MI")) +
scale_shape_manual(name=bquote(bold ("TITLE")),
values= c(15:17),
labels = c("DO","RE", "MI")) +
scale_x_date(date_breaks = "year",
labels = scales::date_format ("%Y"),
limits = as.Date(c(df[1,2], df[4,2])))
The problems start when I try to add the gradient geom_line to this latter plot, resulting in the following errors:
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Error: Discrete value supplied to continuous scale
p + geom_line (data = df2, aes (Date, UP, color = as.integer(UP)), size = 1) +
scale_colour_gradient(name = "UP",
low = "red", high = "blue")
Do you know how to tackle this problem? Any help is very much appreciated.

My layout doesn't allow me to show xlab and ylab

It looks like something simple I am missing but have no idea how to deal with this.
So I used a layout() function and I managed to get the layout as I wanted as below picture. Iris data was used in my coding.
Problem is, it does not show me the x label and y label on the output when I use plot() functions after this. And xaxis and yaxis for plot() looks overlapping. I am not sure how to deal with this problem.
There was no problem for x and y labelling before introducing plot.new() and par() to set up the main name of my diagram. (i.e. before I use the code from plot.new() to title(), xlab and ylab were shown)
I used 6 different plots in my original code, including, the plot.new() for title(), but I omitted the rest of them for convenience
Here is my code below,
x <- iris$Sepal.Length
y <- iris$Species
x_min <- min(iris$Sepal.Length)
x_max <- max(iris$Sepal.Length)
y_min <- min(iris$Sepal.Width)
y_max <- max(iris$Sepal.Width)
layout(matrix(c(1,1,1,1,1,1,
2,2,3,3,4,4,
5,5,5,6,6,6), nc=6, byrow = TRUE), heights=c(lcm(1),1,1,1,1))
layout.show(6)
par("mar"=c(1,1,1,1,1,1))
plot.new()
plot.window(xlim=c(0,1), ylim=c(0,1))
text(x=0.5,y=0.5,"scatter and density plots for Sepal and Length and Sepal Width" ,font=2, cex=1.5)
plot(...)
You can use the xlab and ylab arguments in title. However, the way you have constructed the plot means that when you reset par at the end, these are drawn off the page due ti their position relative to your custom axis. If you simply leave par alone, you get:
den1 = density(CDE1$V1)
den2 = density(CDE1$V2)
col1 = hsv(h = 0.65, s = 0.6, v = 0.8, alpha = 0.5)
col2 = hsv(h = 0.85, s = 0.6, v = 0.8, alpha = 0.5)
plot.new()
plot.window(xlim = c(25,65), ylim = c(0, 0.14))
axis(side = 1, pos = 0, at = seq(from = 25, to = 65, by = 5), col = "gray20",
lwd.ticks = 0.25, cex.axis = 1, col.axis = "gray20", lwd = 1.5)
axis(side = 2, pos = 25, at = seq(from = 0, to = 0.14, by = 0.02),
col = "gray20", las = 2, lwd.ticks = 0.5, cex.axis = 1,
col.axis = "gray20", lwd = 1.5)
polygon(den1$x, den1$y, col = col1, border ="black",lwd = 2)
polygon(den2$x, den2$y, col = col2, border ="black",lwd = 2)
text(52, 0.10, labels ="CDET", col =col1, cex = 1.25,font=2)
text(35, 0.03, labels ="SDFT", col =col2, cex = 1.25,font=2)
title(main = "Gestational Day 100/283",
xlab = "Fibril Diameter (nm)",
ylab = "density")
Of course, you could get a similar plot with less code and much easier adjustments using ggplot:
library(ggplot2)
ggplot(tidyr::pivot_longer(CDE1, 1:2), aes(value, fill = name)) +
geom_density() +
scale_fill_manual(values = c(col1, col2), labels = c("CDET", "SDFT")) +
scale_x_continuous(breaks = seq(25, 65, 5), limits = c(25, 65)) +
scale_y_continuous(breaks = seq(0, 0.14, 0.02), limits = c(0, 0.14)) +
theme_classic(base_size = 16) +
labs(title = "Gestational Day 100/283", x = "Fibril Diameter (nm)",
fill = NULL) +
theme(plot.title = element_text(hjust = 0.5))
Data used
Obviously, we don't have your data, so I had to create a reproducible approximation:
set.seed(123)
CDE1 <- data.frame(V1 = rnorm(20, 47.5, 4), V2 = rnorm(20, 44, 5))

How to remove the zero labels in Histogram plot in R?

Any tips to remove the zero labels in between the histogram bars?
hist(links$Survey_Duration, breaks = seq(0,50,5), main = "Survey Duration",
labels = TRUE, border = "black",
xlab = "Survey", ylim = c(0, 15), col = "gray", las = 1, xaxt='n')
axis(side=1, at=seq(0,50,5), labels=seq(0,50,5))
abline(v = mean(links$Survey_Duration), col = "royalblue", lwd = 1.5)
abline(v = median(links$Survey_Duration), col = "red", lwd = 1.5)
legend(x = "topright", c("Mean", "Median"), col = c("royalblue","red"),
lwd = c(1.5,1.5))
How about this?
# modify data so there's zero in one of the bins
mtcars$mpg <- ifelse(mtcars$mpg >= 25 & mtcars$mpg <= 30, NA, mtcars$mpg)
# save plot parameters
h <- hist(mtcars$mpg, plot = FALSE)
# produce plot
plot(h, ylim = c(0, 14))
# add labels manually, recoding zeros to nothing
text(h$mids, h$counts + 1, ifelse(h$counts == 0, "", h$counts))
A slightly different answer using the labeling in hist instead of adding text afterwards.
You do not provide your data, so I will use some data that is handy to illustrate.
The labels argument can specify the individual labels
H1 = hist(iris$Sepal.Length, breaks = 3:8, plot=FALSE)
BarLabels = H1$counts
BarLabels[BarLabels == 0] = ""
hist(iris$Sepal.Length, breaks = 3:8, labels = BarLabels)
Thanks #Daniel Anderson, it Ok now (Thumbs Up)
links$Survey_Duration <- ifelse(links$Survey_Duration > 15 &
links$Survey_Duration <= 25,
NA,
links$Survey_Duration)
h <- hist(links$Survey_Duration, breaks = seq(0,50,5), plot = FALSE)
plot(h, ylim = c(0, 14), main = "Survey Duration", xlab = "Time", col = "gray", las = 1)
text(h$mids, h$counts + 1, ifelse(h$counts == 0, "", h$counts))
axis(side=1, at=seq(0,50,5), labels=seq(0,50,5))
abline(v = mean(links$Survey_Duration), col = "royalblue", lwd = 1.5)
abline(v = median(links$Survey_Duration), col = "red", lwd = 1.5)
legend(x = "topright",
c("Mean", "Median"),
col = c("royalblue","red"),
lwd = c(1.5,1.5))

regresio line didn't display correctly

lab <- data.frame( Month = c("2016-01-01", "2016-02-01", "2016-03-01", "2016-04-01", "2016-05-01", "2016-06-01"), AccNumber = c(5683,5418,6001,6184,6001,6184), OCTAT = c(40.20,50.52,47.15,45.03,47.15,45.03), Default = c(30,30,30,30,30,30))
id <- rownames(lab)
lab <- cbind(id = id, lab)
max <- max(lab$AccNumber)
max2 <- max(lab$OCTAT)
p <- barplot(lab$AccNumber, names.arg = lab$Month, xlab = "Month", col = "blue" ,ylim = c(0, max + 2000))
par(new=TRUE)
plot(x = id, y = lab$OCTAT, type = "l", col = "red", axes = FALSE, ylim = c(0, 60), ann = FALSE)
plot(x = id, y = lab$OCTAT, type = "l", col = "green", axes = FALSE, ylim = c(0, 60), ann = FALSE, lwd = 2)
p <- barplot(lab$AccNumber, names.arg = lab$Month, xlab = "Month", col = "blue" ,ylim = c(0, max + 2000))
par(new=TRUE)
plot(x = id, y = lab$OCTAT, type = "l", col = "green", axes = FALSE, ylim = c(0, 60), ann = FALSE, lwd = 2)
axis(4, at=seq(0, max2 + 10, 10))
abline(h = 30, lwd = 2) # Default blue line (30)
abline(lm(lab$OCTAT ~ lab$id), col = "red", lty = 2) #Regression line trend
After running the code this is what I got below. I am totally new to R. How can I get a regression line that look like the second pic.
If you want the dashed red line to span from the left axis to the right axis, change the last line of code to:
abline(lm(lab$OCTAT ~ as.numeric(lab$id)), col = "red", lty = 2)
This is because lab$id is of class character.
If you want the red dashed line to span the same width as the green line, change the last line of code to:
lines(x = c(1,6), y = c(44.9953, 46.6983), col = 'red', lty = 2)
The values 44.9953 and 46.6983 are obtained from putting id = 1 and id = 6 into the model fitted from lm(lab$OCTAT ~ as.numeric(lab$id).
Hope this is what you were looking for.

Resources