Not able to merge two legends - r

enter image description here
I have plotted stacked bar plot and line graph in one which have two different data sets. I am getting two separate legends for both of them. I have tried all the possible things.
Please find the attached code.
alldata = data.frame(x, aircargo, autototal, govtreceipts,
iipconsumer,nongimports, railfreight)
linedata = data.frame(x,ceii)
melteddata = melt(alldata,id.vars="x")
plotS1 <- ggplot(melteddata)
plotS1 + geom_bar(aes(x=ordered_x,y=value,factor=variable,fill=variable,
order=-as.numeric(variable)), stat="identity") +
geom_line(data=linedata, aes(x=as.numeric(ordered_x),y=ceii, color = "CEII"), lwd=1.5) +
scale_color_manual( values = c("#000000")) +
scale_fill_manual(name = "Components", values = c("#0000FF", "#FFC0CB", "#00FFFF", "#00FF00", "#FF00FF", "#20B2AA", "#000000")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + theme(plot.background = element_rect(fill = "#BFD5E3")) +
ggtitle("Monthly Contribution by Components (3 month MA)") +
theme( panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + labs( y = "", x = "") +
scale_y_continuous(labels = c("-0.30","-0.25","-0.2","-0.15","-0.10","-0.05", "0.00", "0.05", "0.10", "0.15", "0.20", "0.25", "0.30"), breaks = c(-0.30, -0.25, -0.20, -0.15, -0.10, -0.05, 0.00, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30))
Dataset -
aircargo <- c(-0.027, 0.028, 0.044, 0.011, 0.041, 0.030, -0.028, 0.017, 0.001, 0.060, -0.040, 0.016, 0.006, -0.040, -0.003, 0)
autototal <- c(0.061, -0.004, 0.009, 0.024, -0.026, 0.025, -0.029, 0.000, -0.015, -0.016, 0.026, -0.062, 0.034, 0.002, -0.081, -0.005)
govtreceipts <- c(-0.001, 0.001, -0.005, 0.031, -0.023, 0.000, -0.009, 0.005, 0.002, -0.005, 0.004, 0.000, 0.004, -0.003, 0, 0)
iipconsumer <- c(0.043, -0.014, 0.041, -0.035, 0.001, 0.001, 0.040, 0.010, -0.006, 0.013, 0.001, -0.006, -0.002, -0.011, -0.033, 0)
nongimports <- c(0.018, -0.008, 0.015, -0.004, 0.019, -0.010, 0.008, 0.007, -0.021, 0.006, -0.002, -0.007, 0.009, -0.017, 0.005, 0)
railfreight <- c(0.014, -0.015, 0.031, 0.103, -0.041, 0.025, -0.044, 0.061, -0.050, 0.092, -0.045, 0.011, -0.007, 0.050, 0.100, -0.015)
x <- c("Jan-18", "Feb-18", "Mar-18", "Apr-18", "May-18", "Jun-18", "Jul-18", "Aug-18", "Sep-18", "Oct-18", "Nov-18", "Dec-18", "Jan-19", "Feb-19", "Mar-19", "Apr-19")
ceii <- c(0.108, -0.012, 0.134, 0.131, -0.030, 0.072, -0.062, 0.100, -0.089, 0.149, -0.070, -0.047, 0.043, -0.019, -0.012, -0.020)
Please help in combining the legend. Thanks in advance.

One option is to get the same levels for the two factors. This takes some up front work with the data.frames.
For example, here's one way to do this, adding a variable named variable to linedata and then matching the factor levels.
melteddata = reshape2::melt(alldata, id.vars = "x")
# Add CEII to levels of "variable"
melteddata$variable = factor(melteddata$variable,
levels = c(levels(melteddata$variable), "CEII") )
linedata = data.frame(x, ceii, variable = "CEII")
# Same levels in linedata as melteddata
linedata$variable = factor(linedata$variable,
levels = levels(melteddata$variable) )
Then I made a vector for the colors outside of the plot so it can be used for both colors and fills. I made this a named vector since I find this best practice in case the order ever changes.
# Vector of colors
fillcol = c("#0000FF", "#FFC0CB", "#00FFFF", "#00FF00", "#FF00FF", "#20B2AA", "#000000")
names(fillcol) = levels(melteddata$variable)
Then you get a combined legend if you use drop = FALSE in the scale layers.
To get filled boxes plus a line box for the line you need override.aes within guide_legend(). I removed the fill from the last box so the line shows.
Note I didn't have your ordered_x variable so this is likely not exactly the plot you were looking for.
ggplot(melteddata) +
geom_col(aes(x = x, y = value, fill = variable) ) +
geom_line(data = linedata, aes(x = x, y = ceii,
color = variable,
group = 1), lwd = 1.5) +
scale_color_manual(name = "Components", drop = FALSE,
values = fillcol ) +
scale_fill_manual(name = "Components", drop = FALSE,
values = fillcol ) +
guides(fill = guide_legend(override.aes = list(fill = c(fillcol[1:6], NA) ) ) )

Related

Need Help Making an Ordihull

I have been collaborating on this code that creates an NMDS plot and I want to add shaded polygons of the points. However, the ordihull code keeps returning the following error. Why would the argument be of length zero?
Error in if (n < 4) return(colMeans(x[-n, , drop = FALSE])) : argument is of length zero
> m1 <- metaMDS(d1)
> m2 <- metaMDS(d2)
> m3 <- metaMDS(d3)
> mdat <- data.frame(m3$points)
> mdat$site <- substr(rownames(mdat), 1, 1) mdat$col <- ifelse(mdat$site == "D", "red",
ifelse(mdat$site == "H", "blue", "green"))
> plot(mdat[,1], mdat[,2], pch=16, col=mdat$col, display = "sites",
xlab="NMDS1", ylab="NMDS2", xlim=c(-0.2, 0.2),
ylim=c(-0.2, 0.2), main= "Phylum")
> ordihull(mdat[,1], mdat[,2], display="sites", label=T,
lwd=2, draw="polygon",col= c("blue", "red", "green"))
Here is the Dput:
> structure(list(p__Proteobacteria = c(44.807, 40.907, 36.558,36.811,
39.401, 40.114, 45.911, 43.133, 30.137, 27.734, 26.722,
31.261), p__Actinobacteria = c(26.819, 34.651, 40.904, 38.847,
39.446, 37.523, 29.881, 29.251, 31.783, 23.641, 34.918, 31.308
), p__Acidobacteria = c(8.48, 6.6, 5.934, 6.609, 5.89, 7.567,
5.795, 6.666, 10.616, 10.709, 8.988, 11.794), p__Bacteroidetes =
c(7.56, 8.189, 5.363, 6.223, 4.716, 3.613, 4.65, 5.2, 4.281, 2.785,
2.808, 3.271), p__Gemmatimonadetes = c(3.529, 2.108, 1.213, 1.193,
1.541, 1.439, 1.006, 1.171, 5.794, 4.107, 4.001, 2.747),
p__Chloroflexi = c(2.686, 2.987, 2.979, 3.049, 4.128, 4.564, 5.304,
4.624, 3.669, 2.775, 4.534, 4.94), p__Bacteria_unclassified =
c(2.38, 1.869, 1.579, 1.247, 2.3, 2.108, 1.36, 1.193, 3.126, 1.885,
2.987, 2.37), p__Firmicutes = c(0.998, 0.807, 2.76, 2.962, 0.866,
1.32, 1.651, 2.073, 1.099, 1.046, 1.3, 1.302), p__Verrucomicrobia =
c(0.676, 0.404, 0.32, 0.35, 0.293, 0.239, 0.188, 0.261, 0.521,
0.726, 0.52, 0.397), p__Nitrospirae = c(0.464, 0.244, 0.198, 0.208,
0.016, 0.032, 0.024, 0.042, 0.296, 0.103, 0.229, 0.211),
p__Candidatus_Saccharibacteria = c(0.421, 0.511, 0.456, 0.552,
0.523, 0.6, 0.842, 1.016, 0.672, 0.636, 0.465, 0.736),
p__Planctomycetes = c(0.392, 0.267, 0.354, 0.285, 0.275, 0.356,
0.285, 0.276, 0.33, 0.438, 0.552, 0.365), p__Fibrobacteres = c(0.14,
0.074, 0.007, 0.009, 0.072, 0.044, 0.136, 0.079, 0.117, 0.018,
0.167, 0.065), p__Candidatus_Latescibacteria = c(0.113, 0.059,
0.017, 0.005, 0.004, 0.017, 0.015, 0.009, 0, 0.011, 0.007, 0.018
), p__Latescibacteria = c(0.085, 0.04, 0.01, 0.004, 0.012, 0.015,
0.033, 0.015, 0.012, 0.016, 0.011, 0.018), p__Cyanobacteria =
c(0.079, 0.048, 1.071, 1.372, 0.32, 0.19, 2.629, 4.689, 7.133,
22.963, 11.417, 8.767), p__Thermodesulfobacteria = c(0.068, 0.057,
0.115, 0.103, 0.008, 0.01, 0.015, 0.007, 0.01, 0.003, 0.002, 0.013),
p__Elusimicrobia = c(0.059, 0.021, 0.012, 0.001, 0.004, 0.002,
0.015, 0.017, 0, 0.002, 0.005, 0.006), p__Chlorobi = c(0.052,
0.025, 0.002, 0.012, 0.029, 0.046, 0.033, 0.04, 0.05, 0.02,
0.046, 0.025), p__Armatimonadetes = c(0.046, 0.053, 0.051,
0.072, 0.076, 0.095, 0.048, 0.053, 0.197, 0.159, 0.128, 0.125
), p__Spirochaetes = c(0.035, 0.021, 0.002, 0.001, 0, 0.002,
0.024, 0.039, 0, 0, 0, 0), p__Parcubacteria = c(0.03, 0.013,
0, 0, 0.01, 0.015, 0.042, 0.037, 0.032, 0.059, 0.053, 0.011
), p__Chlamydiae = c(0.028, 0.017, 0.046, 0.05, 0.014, 0.007,
0.021, 0.022, 0.07, 0.074, 0.08, 0.152)), class = "data.frame",
row.names = c("D15B", "D610B", "D15F", "D610F", "HR15B", "HR610B",
"HR15F", "HR610F", "C15B", "C610B", "C15F", "C610F"))
Here are the codes:
> phylum.dat <- dput
> x <- data.frame(tax=names(phylum.dat), nsites=apply(phylum.dat, 2, function(x){length(which(x>0))}))
> d1 <- vegdist(phylum.dat, method = "jaccard", binary = TRUE)
> d2 <- vegdist(log1p(phylum.dat, method = "jaccard"))
> logit_phylum <- as.matrix(phylum.dat+1)/100
> d3 <- qlogis(logit_phylum)
> d3 <- d3+abs(min(d3))
> d3 <- vegdist(d3, method = "jaccard")
> m1 <- metaMDS(d1)
> m2 <- metaMDS(d2)
> m3 <- metaMDS(d3)
> e1 <- envfit(m3, phylum.dat)
> exy <- data.frame(tax=names(phylum.dat),
> x=e1$vectors$arrows[,1],
> y=e1$vectors$arrows[,2],
> pval=e1$vectors$pvals,
> r=e1$vectors$r)
> rownames(exy) <- NULL
> exy <- exy[order(-exy$r),]
> mdat <- data.frame(m3$points)
> mdat$site <- substr(rownames(mdat), 1, 1)
> mdat$col <- ifelse(mdat$site == "D", "red",
> ifelse(mdat$site == "H", "blue", "green"))
> mdat$rad <- sqrt((mdat$MDS1^2) + (mdat$MDS2^2))
> max(mdat$rad)
> exy$x2 <- 0.17 * exy$r * exy$x
> exy$y2 <- 0.17 * exy$r * exy$y
> exy$adj <- ifelse(exy$x < 0, 1, 0)
> plot(mdat[,1], mdat[,2], pch=16, col=mdat$col,
> xlab="NMDS1", ylab="NMDS2", xlim=c(-0.2, 0.2),
> ylim=c(-0.2, 0.2), main= "Phylum")
> ordihull(mdat[,1], mdat[,2], display="sites", label=T,
> lwd=2, draw="polygon",col= c("blue", "red", "green"))

How to turn the row values in a dataframe into NA when the values of one column are greater than another column in r?

My data looks like this:
> dput(head(CORt, 5))
structure(list(rDate = structure(c(1438019100, 1438019400, 1438019700,
1438020000, 1438020300), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
I630 = c(0.536, 0.506, 0.419, 0.456, 0.427), I800 = c(0.414,
0.388, 0.339, 0.351, 0.331), I532 = c(0.547, 0.534, 0.463,
0.488, 0.464), I570 = c(0.522, 0.508, 0.467, 0.468, 0.445
), WR630 = c(0.0127, 0.0573, 0.0083, 0.0057, 0.0053), WR800 = c(0.0144,
0.0506, 0.0249, 0.0163, 0.0159), WR532 = c(0.0139, 0.0394,
0.006, 0.005, 0.0049), WR570 = c(0.0176, 0.0379, 0.0094,
0.0054, 0.0049), NR630 = c(0.006, 0.034, 0.006, 0.004, 0.004
), NR800 = c(0.007, 0.04, 0.019, 0.02, 0.019), NR532 = c(0.007,
0.072, 0.01, 0.007, 0.007), NR570 = c(0.009, 0.077, 0.008,
0.007, 0.007), ER630 = c(0.0351, 0.0746, 0.0116, 0.0055,
0.0052), ER800 = c(0.0278, 0.0596, 0.03, 0.0324, 0.0303),
ER532 = c(0.04, 0.085, 0.013, 0.008, 0.008), ER570 = c(0.034,
0.083, 0.013, 0.009, 0.008)), row.names = c(NA, 5L), class = "data.frame")
In the CORt dataframe when the values of WR630 > I630 I want to turn all values of that row(s) into NA but I want to preserve the rDate column dates and the ER532 values of that row(s).
I have been using this code (example):
which(CORt$WR630>CORt$I630)
CORt[c(7632, 12530, 13684, 14260, 18295, 19735, 23770, 24634, 27529, 44055), setdiff(names(CORt), c("rDate", "ER532"))] <- NA
but this is not handy when I have 200 lines, for example. I'm looking for a code that will turn the row values when WR630 > I630 into NA directly.
Any help is much appreciated.
You can use the which command instead of typing output all the row numbers manually.
CORt[which(CORt$WR630>CORt$I630),setdiff(names(CORt), c("rDate", "ER532"))] <- NA
If you don't have any missing values in the data you can also skip which.
CORt[CORt$WR630>CORt$I630,setdiff(names(CORt), c("rDate", "ER532"))] <- NA
How does this work for you?
nrow(data) %>% map(
.f = function(i) {
if (data[i,"WR630"] > data[i,"I630"] ) {
data[i,-c(1,16)] <- NA
}
data
}
)
Please note that it uses index numbering, instead of names to avoid setting rDate and ER532 to NA. In the data you provided, I didnt find any cases where you condition held true, so I tested it reversely to be certain it works.

xlim geom_histogram Error: Aesthetics must be either length 1 or the same as the data

I am trying to plot a histogram with a custom colour palette. The problem arises when I set the xlim of the histogram.
Please see below the reproducible example:
# sample dataframe
test_dt <- structure(list(col_1 = c(0.057, -0.063, -0.319, 0.02, 0.079,
0.007, -0.105, -0.084, 0.019, 0.28, -0.064, -0.243, -0.116, 0.079,
0.07, -0.187, -0.725, 0.134, 0.062, -0.056, -0.074, 0.392, -0.014,
-0.062, 0.214, 0.371, 0.069, -0.03, 0.036, -0.175, 0.097, 0.358,
0.153, -0.092, -0.038, -0.051, 0.017, -0.108, 0.133, 0.105, 0.187,
-0.056, -0.316, 0.15, -0.142, 0.076, 0.242, -0.069, 0.155, 0.214,
0.162, -0.037, -0.109, 0.111, -0.077, -0.435, 0.003, 0.187, 0.134,
0.027, 0.107, 0.175, -0.355, -0.572, 0.038, -0.209, -0.263, -0.147,
-0.23, -0.174, 0.203, -0.118, 0.008, -0.268, -0.001, 0.227, -0.019,
0.08, 0.044, -0.065, -0.131, 0.093, 0.127, -0.131, 0.039, 0.045,
0.032, 0.343, 0.053, -0.033, 0.453, 0.07, -0.225, 0.094, 0.002,
-0.119, 0.014, -0.125, 0.003, -0.48)), row.names = c(NA, -100L
), class = "data.frame")
# colour palette
RBW <- colorRampPalette(c("darkred","white","darkblue"))
# plot histogram without xlim
ggplot(test_dt) +
geom_histogram(aes(x=col_1),
position = "identity",
bins = 60,
color = "grey10",
fill = RBW(60))
When I run the following lines is when I get the error:
Aesthetics must be either length 1 or the same as the data
# plot histogram with xlim
ggplot(test_dt) +
geom_histogram(aes(x=col_1),
position = "identity",
bins = 60,
color = "grey10",
fill = RBW(60)) +
xlim(-2,2)
instead of xlim, add + coord_cartesian(xlim = c(-2,2))
library(ggplot2)
``` r
ggplot(test_dt) +
geom_histogram(aes(x=col_1),
position = "identity",
bins = 60,
color = "grey10",
fill = RBW(60)) +
coord_cartesian(xlim = c(-2,2))
Created on 2020-02-11 by the reprex package (v0.3.0)

Using geom_line and geom_point for different series in one facet

I'm using ggplot2 to create a plot like the following one:
The problem I'm encountering is combining points and lines in the upper facet. As in the image, I want means to be represented by lines and the data by points.
I can produce lines for both the means and the data using geom_line and putting the means and data in one group, and the probabilities in another, as seen here:
But I cannot combine lines and points in a single facet. Because of this, I didn't reshape the data provided below nor group them.
years <- c(1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969)
a.data <- c(0.105, 0.075, 0.107, 0.112, 0.116, 0.062, 0.044, 0.073)
b.data <- c(-0.039, -0.022, -0.070, -0.062, -0.067, -0.065, -0.023, -0.015)
a.mean <- c(0.0893, 0.0893, 0.0893, 0.0893, 0.0893, 0.0763, 0.0757, 0.0757)
b.mean <- c(-0.040, -0.040, -0.040, -0.040, -0.040, -0.030, -0.029, -0.029)
prob <- c(0.0, 0.0, 0.0, 0.0, 0.348 0.020, 0.002, 0.0)
Any assistance would be greatly appreciated.
Trying to solve your problem I find my way to do the plot, not with diferent facet but with two graph in the same grid, using the package gridExtra.
As you can see, I had to restore the data in diferent objet to do each part of the plot.
resulting_plot
library(tidyverse)
library(gridExtra)
data <- data.frame(
years = c(1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969),
a.data = c(0.105, 0.075, 0.107, 0.112, 0.116, 0.062, 0.044, 0.073),
b.data = c(-0.039, -0.022, -0.070, -0.062, -0.067, -0.065, -0.023, -0.015),
a.mean = c(0.0893, 0.0893, 0.0893, 0.0893, 0.0893, 0.0763, 0.0757, 0.0757),
b.mean = c(-0.040, -0.040, -0.040, -0.040, -0.040, -0.030, -0.029, -0.029),
prob = c(0.0, 0.0, 0.0, 0.0, 0.348, 0.020, 0.002, 0.0)
)
data_points <- data %>% select(a.data, b.data, years) %>% gather(key = "a_b", value = "data", -years)
data_lines <- data %>% select(a.mean, b.mean, years) %>% gather(key = "a_b", value = "mean", -years)
p1 <- ggplot(data_points) +
geom_point(aes(x = years, y = data, color = factor(a_b))) +
geom_line(data = data_lines, aes(x = years, y = mean, color = factor(a_b))) +
theme(legend.position = "none")
p2 <- ggplot(data) + geom_line(aes(x = years, y = prob))
grid.arrange(p1, p2)

Area under a density plot not equal to 1

I am trying to chart a probability density plot using ggplot. My problem is that the area under the curve is not equal to one. Advice appreciated.
Sample chart... the code that produced this chart follows... The Y axis looks like it is a count for small sized bins, rather than a probability for falling into that bin. The example code here, is one of the sources I drew on in the preparation of this chart.
Sample code... most of which is data... the key bit of code is at the bottom...
library(ggplot2)
library(reshape)
library(plyr)
library(scales)
Date <- as.Date(
c("1976-01-16", "1976-02-15", "1976-03-16", "1976-04-15", "1976-05-16",
"1976-06-15", "1976-07-16", "1976-08-16", "1976-09-15", "1976-10-16",
"1976-11-15", "1976-12-16", "1977-01-16", "1977-02-14", "1977-03-16",
"1977-04-15", "1977-05-16", "1977-06-15", "1977-07-16", "1977-08-16",
"1977-09-15", "1977-10-16", "1977-11-15", "1977-12-16", "1978-01-16",
"1978-02-14", "1978-03-16", "1978-04-15", "1978-05-16", "1978-06-15",
"1978-07-16", "1978-08-16", "1978-09-15", "1978-10-16", "1978-11-15",
"1978-12-16", "1979-01-16", "1979-02-14", "1979-03-16", "1979-04-15",
"1979-05-16", "1979-06-15", "1979-07-16", "1979-08-16", "1979-09-15",
"1979-10-16", "1979-11-15", "1979-12-16", "1980-01-16", "1980-02-15",
"1980-03-16", "1980-04-15", "1980-05-16", "1980-06-15", "1980-07-16",
"1980-08-16", "1980-09-15", "1980-10-16", "1980-11-15", "1980-12-16",
"1981-01-16", "1981-02-14", "1981-03-16", "1981-04-15", "1981-05-16",
"1981-06-15", "1981-07-16", "1981-08-16", "1981-09-15", "1981-10-16",
"1981-11-15", "1981-12-16", "1982-01-16", "1982-02-14", "1982-03-16",
"1982-04-15", "1982-05-16", "1982-06-15", "1982-07-16", "1982-08-16",
"1982-09-15", "1982-10-16", "1982-11-15", "1982-12-16", "1983-01-16",
"1983-02-14", "1983-03-16", "1983-04-15", "1983-05-16", "1983-06-15",
"1983-07-16", "1983-08-16", "1983-09-15", "1983-10-16", "1983-11-15",
"1983-12-16", "1984-01-16", "1984-02-15", "1984-03-16", "1984-04-15",
"1984-05-16", "1984-06-15", "1984-07-16", "1984-08-16", "1984-09-15",
"1984-10-16", "1984-11-15", "1984-12-16", "1985-01-16", "1985-02-14",
"1985-03-16", "1985-04-15", "1985-05-16", "1985-06-15", "1985-07-16",
"1985-08-16", "1985-09-15", "1985-10-16", "1985-11-15", "1985-12-16"))
GOLD <- c(
-0.104, 0.051, 0.011, -0.035, -0.008, -0.010, -0.065, -0.067, 0.041, 0.017,
0.126, 0.023, -0.011, 0.029, 0.087, 0.007, -0.016, -0.044, 0.048, -0.013,
0.030, 0.062, -0.029, 0.042, 0.078, 0.028, 0.031, -0.045, 0.005, 0.043,
0.028, 0.090, 0.030, 0.072, -0.094, 0.009, 0.093, 0.080, -0.014, -0.013,
0.077, 0.084, 0.058, 0.021, 0.184, 0.097, 0.002, 0.169, 0.474, -0.014,
-0.168, -0.067, -0.007, 0.169, 0.071, -0.025, 0.077, -0.022, -0.059, -0.044,
-0.063, -0.103, -0.003, -0.008, -0.031, -0.040, -0.113, 0.005, 0.081, -0.014,
-0.057, -0.009, -0.062, -0.026, -0.117, 0.061, -0.046, -0.058, 0.080, 0.076,
0.190, -0.031, -0.019, 0.074, 0.079, 0.022, -0.144, 0.030, 0.013, -0.057,
0.026, -0.017, -0.012, -0.042, -0.030, 0.015, -0.043, 0.041, 0.022, -0.032,
-0.011, 0.001, -0.083, 0.004, -0.019, -0.002, 0.003, -0.065, -0.063, 0.017,
-0.044, 0.134, -0.022, -0.014, -0.008, 0.033, -0.014, 0.017, -0.004, -0.023)
df <- data.frame(Date=Date, GOLD=GOLD)
p <- ggplot(data=df, aes(x=GOLD, y=..density..)) +
stat_density(fill='grey50') +
xlab('Percent change on previous month') +
ylab('Density') +
opts(title='Change in Gold Price in the US')
ggsave(p, width=8, height=4, filename='plot.png', dpi=125)
I don't think this is a problem with ggplot, but with your understanding of the y-axis in a density plot. The base plotting functions in R plot the same thing. You can set the call to y=..scaled.. to give you a relative density, but if you use stat_bin() you'll see the actual histogram and notice it's not the counts. If you want you could normalize your data with something like this:
GOLD_N <- (GOLD- mean(GOLD))/sd(GOLD)
df <- data.frame(Date=Date, GOLD=GOLD,GOLD_N=GOLD_N)
Then run your plot it will look something like this:
You should watch this video about how to interpret density functions http://www.youtube.com/watch?v=Fvi9A_tEmXQ But normalizing your data will give you the plot that's a bit more intuitive if you're used to staring at PDF's and will sum to 1. But don't misinterpret the y axis. y IS NOT the probability of a randomly drawn value from the density being equal to x.

Resources