I would like to plot a barplot but I have dates on the x axis and I want those dates to be correctly spaced (as it is NON categorical)
set.seed(1)
m = matrix(abs(rnorm(6)),3,2)
rownames(m) = as.Date(c('2011-01-01','2011-01-03','2011-01-10'))
barplot(t(m),beside=T,col=c('red','blue'),las=2)
On this example I would like 14984 to be offset on the right.
I'd rather a graphics solution but ggplot2 is fine too
Would you mind to use ´ggplot´ instead?
library(ggplot2)
set.seed(1)
df <- data.frame(y=abs(rnorm(6)),
x=rep(as.Date(c('2011-01-01','2011-01-03','2011-01-10')),
times = 2),
g = factor(rep(c(1,2), each = 3)))
ggplot(aes(x=x, y=y, group = g, fill = g), data = df) +
geom_bar(stat = 'identity', position = 'dodge')
You can improve axis formatting with `scale_x_date´
library(scales)
ggplot(aes(x=x, y=y, group = g, fill = g), data = df) +
geom_bar(stat = 'identity', position = 'dodge') +
scale_x_date(breaks = '1 day') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
And customize it to your purpose
ggplot(aes(x=x, y=y, group = g, fill = g), data = df) +
geom_bar(stat = 'identity', position = 'dodge') +
scale_x_date(breaks = '1 day') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
scale_fill_manual('My\nclasses', values = c('1'='red', '2' = 'blue')) +
labs(list(title = 'Barplot\n', x = ('Date'), y = 'Values'))
With graphics, you probably have to prepare the data appropriately (with missing values for dates you don't consider) in order to do this. Then you can use barplot.
# matrix definition
set.seed(1)
m = matrix(abs(rnorm(6)),3,2)
rownames(m) = as.Date(c('2011-01-01','2011-01-03','2011-01-10'))
# get all dates in between
dts <- do.call(":", as.list(range(rownames(m))))
dts <- dts[!dts%in%rownames(m)]
mat <- matrix(NA, nrow=length(dts), ncol=2, dimnames=list(dts, NULL))
# combine with original matrix
m <- rbind(m, mat)
m <- m[order(rownames(m)), ]
which(!is.na(m[,1]))
# plot
barplot(t(m), beside=T, col=c('red','blue'),las=2, axes=FALSE, axisnames=FALSE)
axis(2)
axis(1, at=3*which(!is.na(m[,1]))-1, labels=rownames(m[!is.na(m[,1]),]))
Related
I have the following graph and code:
Graph
ggplot(long2, aes(x = DATA, y = value, fill = variable)) + geom_area(position="fill", alpha=0.75) +
scale_y_continuous(labels = scales::comma,n.breaks = 5,breaks = waiver()) +
scale_fill_viridis_d() +
scale_x_date(date_labels = "%b/%Y",date_breaks = "6 months") +
ggtitle("Proporcions de les visites, només 9T i 9C") +
xlab("Data") + ylab("% visites") +
theme_minimal() + theme(legend.position="bottom") + guides(fill=guide_legend(title=NULL)) +
annotate("rect", fill = "white", alpha = 0.3,
xmin = as.Date.character("2020-03-16"), xmax = as.Date.character("2020-06-22"),
ymin = 0, ymax = 1)
But it has some sawtooth, how am I supposed to smooth it out?
I believe your situation is roughly analogous to the following, wherein we have missing x-positions for one group, but not the other at the same position. This causes spikes if you set position = "fill".
library(ggplot2)
x <- seq_len(100)
df <- data.frame(
x = c(x[-c(25, 75)], x[-50]),
y = c(cos(x[-c(25, 75)]), sin(x[-50])) + 5,
group = rep(c("A", "B"), c(98, 99))
)
ggplot(df, aes(x, y, fill = group)) +
geom_area(position = "fill")
To smooth out these spikes, it has been suggested to linearly interpolate the data at the missing positions.
# Find all used x-positions
ux <- unique(df$x)
# Split data by group, interpolate data groupwise
df <- lapply(split(df, df$group), function(xy) {
approxed <- approx(xy$x, xy$y, xout = ux)
data.frame(x = ux, y = approxed$y, group = xy$group[1])
})
# Recombine data
df <- do.call(rbind, df)
# Now without spikes :)
ggplot(df, aes(x, y, fill = group)) +
geom_area(position = "fill")
Created on 2022-06-17 by the reprex package (v2.0.1)
P.S. I would also have expected a red spike at x=50, but for some reason this didn't happen.
This is really basic. Still hope I can get your help. I need to superimpose two density plots. The first is a generated normal density plot given mean and sd of AAPL. >
x <- seq(-20, 20, length.out = 5113)
normAAPL<-data.frame(x, f = dnorm(x,mean = meanAAPL, sd = sdAAPL)) %>%
ggplot(aes(x, f)) +
geom_line() +
stat_function(fun=dnorm, geom="line", col=2, lty=2)+
ylim(0,0.2)
> meanAAPL
[1] 0.101133
> sdAAPL
[1] 2.461525
The next is the actual distribution
dAAPL <-density(oldandnew$AAPL)
Where the 20 first AAPL data is
c(-8.810021, 1.45281, -9.051401, 4.628075, -1.774445, -5.25055,
-6.181806, 10.40407, 3.74302, 3.425328, 2.48944, 6.309463, -1.948374,
-4.652429, 5.493372, -1.852238, -0.1725783, -7.924, 2.074379,
-3.431709)
Do I need to combine the data in one data frame to plot them in the same ggplot?
Hope you can help me out.
df <- data.frame(x = seq(-20, 20, length.out = 5113),
f = dnorm(x))
df2 <- data.frame(x = c(-8.810021, 1.45281, -9.051401, 4.628075, -1.774445, -5.25055,
-6.181806, 10.40407, 3.74302, 3.425328, 2.48944, 6.309463, -1.948374,
-4.652429, 5.493372, -1.852238, -0.1725783, -7.924, 2.074379,
-3.431709))
ggplot() +
geom_line(data = df, aes(x, f, colour = "Normal")) +
geom_density(data = df2, aes(x, colour = "Actual")) +
ylim(0,0.2) +
scale_color_manual(name = "Distribution", values = c("Normal" = "Blue", "Actual" = "Red")) +
theme_minimal() + theme(legend.position = "top", aspect.ratio = 1)
Produces:
I have a time series with forecast and confidence interval data, I wanted to plot them simultaneously with a legend using ggplot2. I'm doing it by the code below:
set.seed(321)
library(ggplot2)
#create some dummy data similar to mine
sample <- rnorm(350)
forecast <- rnorm(24)
upper <- forecast+2*sd(forecast)
lower <- forecast-2*sd(forecast)
## wrap data into a data.frame
df1 = data.frame(time = seq(325,350,length=26), M = sample[325:350], isin = "observations")
df2 = data.frame(time = seq(351,374,length=24), M = forecast , isin = "my_forecast")
df3 = data.frame(time = seq(351,374,length=24), M = upper ,isin = "upper_bound")
df4 = data.frame(time = seq(351,374,length=24), M = lower, isin = "lower_bound")
df = rbind(df1, df2, df3, df4)
In a previous question #Matthew Plourde suggested me a nice answer:
ggplot(df1, aes(x = time, y = M)) + geom_line(colour='blue') +
geom_smooth(aes(x=time, y=M, ymax=upper, ymin=lower),
colour='red', data=df2, stat='identity')
Now, I wanted to include a legend with "observations" and "my_forecast". I tryed with
ggplot(df1, aes(x = time, y = M)) + geom_line(colour='blue') +
geom_smooth(aes(x=time, y=M, ymax=upper, ymin=lower),
colour='red', data=df2, stat='identity')+ scale_colour_manual(values=c(observations='blue', my_forecast='red'))
but it doesn't display a legend.
You need to move the colour parameters into aes to create a legend.
ggplot(df1, aes(x = time, y = M)) + geom_line(aes(colour = 'blue')) +
geom_smooth(aes(x = time, y = M, ymax = upper, ymin = lower, colour = 'red'),
data = df2, stat = 'identity') +
scale_colour_manual("", values = c("blue", "red"),
labels = c("observations", "my forecast"))
Say I created a heatmap using the function geom_raster() (from ggplot2).
What's a smart way to add a row at the bottom of the table showing (in my case) the 'Mean return' for each month on the period considered ?
It would be nice there is some space left between the 1985-2013 period and the row for the average, and maybe police color and 'cases' could be customized.
The core of my code is as follows (the object molten contains the my data, originally a matrix passed through the melt() function of reshape2.
hm <- ggplot(data = molten, aes(x = factor(Var2, levels = month.abb), y=Var1, fillll=value)) + geom_raster()
hm <- hm + scale_fill_gradient2(low=LtoM(100), mid=Mid, high=MtoH(100))
hm <- hm + labs(fill='% Return')
hm <- hm + geom_text(aes(label=paste(sprintf("%.1f %%", value))), size = 4)
hm <- hm + scale_y_continuous(breaks = 1985:2013)
hm <- hm + xlab(label = NULL) + ylab(label = NULL)
hm <- hm + theme_bw()
hm <- hm + theme(axis.text.x = element_text(size = 10, hjust = 0, vjust = 0.4, angle=90))
It's not very concise, but I think this should do what you need.
You didn't provide a data set, so I just made some up. Also, the LtoM and MtoH functions are not included in any R package I could find, so I did a quick Google search and found them here
The following code produces a plot hm2 with facets to make the "Mean Return" row at the bottom:
require(reshape2)
require(ggplot2)
# Random data
set.seed(100)
casted = data.frame(Var1 = rep(1985:2013, times=12), Var2 = rep(month.abb, each=29), return = rnorm(12*29, 0, 9))
molten = melt(casted, id.vars = c("Var1", "Var2"))
LtoM <-colorRampPalette(c('red', 'yellow' ))
Mid <- "snow3"
MtoH <-colorRampPalette(c('lightgreen', 'darkgreen'))
# Averages
monthly.avg = cbind(Var1 = rep("Mean", 12), dcast(molten, Var2 ~ ., mean))
colnames(monthly.avg)[3] = "Mean"
molten2 = merge(molten, melt(monthly.avg), all.x = TRUE, all.y = TRUE)
# New plot
hm2 =
ggplot(data = molten2, aes(x = factor(Var2, levels = month.abb), y=Var1, fill=value)) +
geom_raster() +
scale_fill_gradient2(low=LtoM(100), mid=Mid, high=MtoH(100)) +
labs(fill='% Return') +
geom_text(aes(label=paste(sprintf("%.1f %%", value))), size = 4) +
xlab(label = NULL) + ylab(label = NULL) +
theme_bw() +
theme(axis.text.x = element_text(size = 10, hjust = 0, vjust = 0.4, angle=90)) +
facet_grid(variable ~ ., scales = "free_y", space = "free_y") + # grid layout
theme(strip.background = element_rect(colour = 'NA', fill = 'NA'), strip.text.y = element_text(colour = 'white')) # remove facet labels
which gives the following plot:
How about this:
I created a grid to mock up your data
Main changes, are to precalculate the aggregate and "spacer" data rows, and add to molten,
then add scale_y_discrete so you can label the rows,
then make sure the format works for the grey spacer bar with no % label (comments in code)
Easier in future if you include the data (or a sample) in the question
require(ggplot2)
molten<-expand.grid(c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"),1985:2013,0)
colnames(molten)<-c("Var2","Var1","value")
molten$value=(runif(nrow(molten))*60)-30
#create means
means<-aggregate(molten[,c(1,3)], by=list(molten$Var2),FUN=mean, na.rm=TRUE)
colnames(means)<-c("Var2","Var1","value")
means$Var1<-"MEANS"
#create spacer bar
spacer<-means
spacer$Var1<-" "
spacer$value<-NA
#append them to the data
molten<-rbind(molten,spacer,means)
hm <- ggplot(data = molten, aes(x = Var2, y=Var1, fill=value)) +
geom_raster() +
# replaced your functions for ease of use
scale_fill_gradient2(low="red", mid="yellow", high="green",na.value="grey") +
labs(fill='% Return') +
# don't format the NA vals with %, return blank
geom_text(aes(label=ifelse((is.na(value)),"",paste(sprintf("%.1f %%", value)))), size = 4) +
# make the scale discrete to add labels and enforce order (use a blank space for the spacer)
scale_y_discrete(limits = c("MEANS"," ",1985:2013)) +
xlab(label = NULL) + ylab(label = NULL) +
theme_bw() +
theme(axis.text.x = element_text(size = 10, hjust = 0, vjust = 0.4, angle=90))
hm
I'm trying to annotate a bar chart with the percentage of observations falling into that bucket, within a facet. This question is very closely related to this question:
Show % instead of counts in charts of categorical variables but the introduction of faceting introduces a wrinkle. The answer to the related question is to use stat_bin w/ the text geom and then have the label be constructed as so:
stat_bin(geom="text", aes(x = bins,
y = ..count..,
label = paste(round(100*(..count../sum(..count..)),1), "%", sep="")
)
This works fine for an un-faceted plot. However, with facets, this sum(..count..) is summing over the entire collection of observations without regard for the facets. The plot below illustrates the issue---note that the percentages do not sum to 100% within a panel.
Here the actually code for the figure above:
g.invite.distro <- ggplot(data = df.exp) +
geom_bar(aes(x = invite_bins)) +
facet_wrap(~cat1, ncol=3) +
stat_bin(geom="text", aes(x = invite_bins,
y = ..count..,
label = paste(round(100*(..count../sum(..count..)),1), "%", sep="")
),
vjust = -1, size = 3) +
theme_bw() +
scale_y_continuous(limits = c(0, 3000))
UPDATE: As per request, here's a small example re-producing the issue:
df <- data.frame(x = c('a', 'a', 'b','b'), f = c('c', 'd','d','d'))
ggplot(data = df) + geom_bar(aes(x = x)) +
stat_bin(geom = "text", aes(
x = x,
y = ..count.., label = ..count../sum(..count..)), vjust = -1) +
facet_wrap(~f)
Update geom_bar requires stat = identity.
Sometimes it's easier to obtain summaries outside the call to ggplot.
df <- data.frame(x = c('a', 'a', 'b','b'), f = c('c', 'd','d','d'))
# Load packages
library(ggplot2)
library(plyr)
# Obtain summary. 'Freq' is the count, 'pct' is the percent within each 'f'
m = ddply(data.frame(table(df)), .(f), mutate, pct = round(Freq/sum(Freq) * 100, 1))
# Plot the data using the summary data frame
ggplot(data = m, aes(x = x, y = Freq)) +
geom_bar(stat = "identity", width = .7) +
geom_text(aes(label = paste(m$pct, "%", sep = "")), vjust = -1, size = 3) +
facet_wrap(~ f, ncol = 2) + theme_bw() +
scale_y_continuous(limits = c(0, 1.2*max(m$Freq)))