ggplot x axis for year - r

The following are my r code for the scatterplot.
library(tidyverse)
Pop <-c(24039274, 24854892, 25718048, 26624820, 27568436, 28543940, 29550662, 30590487, 31663896, 32771895)
Popu <- data.frame(Year = 2000:2009, lpop = log2(Pop))
ggplot(Popu, aes(Year, lpop)) +
geom_point(size=3, col = "steelblue") +
ylab("Log2 of Population")
My question is, why the x axis reflects 2000.0, 2002.5 and so on? How do I fix this one?
Thank you in advance.

Try this:
Popu <- data.frame(Year = factor(seq(2000,2009,1)), lpop = log2(Pop))
ggplot(Popu, aes(Year, lpop)) +
geom_point(size=3, col = "steelblue") +
ylab("Log2 of Population")

Related

Edit hover label text in ggplot to show percentage

I'm trying to update the hover labels in my plot to show "Percentage: XX%" (where XX is the value of the percentage of each bar).
Here is some reproducible code:
## Data from https://data.melbourne.vic.gov.au/People/Indicators-of-quality-of-life-and-city-services-by/e6er-4cb3
library(dplyr)
library(ggplot2)
library(plotly)
data <- read.csv("Indicators_of_quality_of_life_and_city_services_by_year.csv")
head(data)
#data$Indicator.Theme
#data$Type
data <- data[,c("Indicator.Theme", "Type")]
data
que_code <- data %>% mutate(newcat = Indicator.Theme)
response <- que_code$newcat
category <- factor(que_code$Type)
textfill= "Type"
plott <- ggplot(que_code, aes(x=response, fill=category)) +
geom_bar(position="dodge", width = 0.5, aes(y = (..count..)*100/sum(..count..), label="Percentage")) + labs(fill= textfill) + xlab("Response to survey questions")+ylab("Percentage")+
scale_fill_manual(values = c("#ae3cc6", "#9630a8", "#842791", "#6d1b73", "#780e55",
"#7e0643", "#820036", "#a11635", "#bb2835", "#d93d35",
"#e74735", "#fd5634", "#fe7c5b", "#ffa182"), drop=FALSE) + scale_x_discrete(drop=FALSE)+
theme(axis.text.x = element_text(size = 10, angle = 30))
plott <- ggplotly(plott, tooltip="y")
What my plot looks like
I would like to change the variable name in the hover label from (..count..)*100/sum(..count..) to "Percentage".
Any help would be greatly appreciated, I've been struggling with this for a while ahaha
One way would be to pre-calculate the values to plot instead of using (..count..)*100/sum(..count..). This would also need to change geom_bar to geom_col.
library(dplyr)
library(ggplot2)
library(plotly)
plott <- que_code %>%
count(Type, Indicator.Theme, name = 'Percentage') %>%
mutate(Percentage = prop.table(Percentage) * 100) %>%
ggplot(aes(x=Indicator.Theme, fill=Type)) +
geom_col(position="dodge", width = 0.5, aes(y = Percentage)) +
labs(fill= textfill) +
xlab("Response to survey questions")+
ylab("Percentage") +
scale_fill_manual(values = c("#ae3cc6", "#9630a8", "#842791", "#6d1b73", "#780e55",
"#7e0643", "#820036", "#a11635", "#bb2835", "#d93d35",
"#e74735", "#fd5634", "#fe7c5b", "#ffa182"), drop=FALSE) +
scale_x_discrete(drop=FALSE)+
theme(axis.text.x = element_text(size = 10, angle = 30))
plott <- ggplotly(plott, tooltip="y")
plott

Using geom_segment to connect a line to the y-axis

I want to use geom_segment to make a line intercept the y-axis, the problem I am experiencing becomes readily apparent. Starting off with sample data:
dat <- data.frame(Yaxis = c(100,200,300,400,500,600,700,800), Year = c(2012,2013,2014,2015,2016,2017,2018,2019))
p <- ggplot(data=dat, aes(x=Year)) + geom_line(aes(y=Yaxis))
and adding a geom_segment layer produces:
p + geom_segment(aes(x=0,xend=2012,y=75,yend=100))
my attempts to amend this haven't worked out so far:
p + geom_segment(aes(x=0,xend=2012,y=75,yend=100)) + scale_x_continuous(expand= c(0,0), breaks = c(2012,2013,2014,2015,2016,2017,2018,2019))
For reference, here is a crude drawing of what I intend the graph to look like:
Thanks to #Sathish 's answer, I was able to create the graph I intended using
ggplot(data=dat, aes(x=as.Date(paste(Year,'-01-01', sep=""), "%Y-%m-%d"), y=Yaxis)) +
geom_line() +
geom_segment(aes(
x=as.Date("2011-01-01", "%Y-%m-%d"),
xend=as.Date("2012-01-01", "%Y-%m-%d"),
y=75,
yend=100), color = "red") +
scale_x_date(expand = c(0,0))
Convert Year column to Date class.
library('ggplot2')
ggplot(data=dat, aes(x=as.Date(paste(Year, '-01-01', sep = ""), "%Y-%m-%d"), y=Yaxis)) +
geom_line() +
geom_segment(aes(x=as.Date("2012-01-01", "%Y-%m-%d"),xend=as.Date("2020-01-01", "%Y-%m-%d"),y=75,yend=100), color = "red") +
xlab(label = "Year")

How to highlight a column in ggplot2

I have the following graph and I want to highlight the columns (both) for watermelons as it has the highest juice_content and weight. I know how to change the color of the columns but I would like to WHOLE columns to be highlighted. Any idea on how to achieve this? There doesn't seems to be any similar online.
fruits <- c("apple","orange","watermelons")
juice_content <- c(10,1,1000)
weight <- c(5,2,2000)
df <- data.frame(fruits,juice_content,weight)
df <- gather(df,compare,measure,juice_content:weight, factor_key=TRUE)
plot <- ggplot(df, aes(fruits,measure, fill=compare)) + geom_bar(stat="identity", position=position_dodge()) + scale_y_log10()
An option is to use gghighlight
library(gghighlight)
ggplot(df, aes(fruits,measure, fill = compare)) +
geom_col(position = position_dodge()) +
scale_y_log10() +
gghighlight(fruits == "watermelons")
In response to your comment, how about working with different alpha values
ggplot(df, aes(fruits,measure)) +
geom_col(data = . %>% filter(fruits == "watermelons"),
mapping = aes(fill = compare),
position = position_dodge()) +
geom_col(data = . %>% filter(fruits != "watermelons"),
mapping = aes(fill = compare),
alpha = 0.2,
position = position_dodge()) +
scale_y_log10()
Or you can achieve the same with one geom_col and a conditional alpha (thanks #Tjebo)
ggplot(df, aes(fruits, measure)) +
geom_col(
mapping = aes(fill = compare, alpha = fruits == 'watermelons'),
position = position_dodge()) +
scale_alpha_manual(values = c(0.2, 1)) +
scale_y_log10()
You could use geom_area to highlight behind the bars. You have to force the x scale to discrete first which is why I've used geom_blank (see this answer geom_ribbon overlay when x-axis is discrete) noting that geom_ribbon and geom_area are effectively the same except geom_area always has 0 as ymin
#minor edit so that the level isn't hard coded
watermelon_level <- which(levels(df$fruits) == "watermelons")
AreaDF <- data.frame(fruits = c(watermelon_level-0.5,watermelon_level+0.5))
plot <- ggplot(df, aes(fruits)) +
geom_blank(aes(y=measure, fill=compare))+
geom_area(data = AreaDF, aes( y = max(df$measure)), fill= "yellow")+
geom_bar(aes(y=measure, fill=compare),stat="identity", position=position_dodge()) + scale_y_log10()
Edit to address comment
If you want to highlight multiple fruits then you could do something like this. You need a data.frame with where you want the geom_area x and y, including dropping it to 0 between. I'm sure there's slightly tidier methods of getting the data.frame but this one works
highlight_level <- which(levels(df$fruits) %in% c("apple", "watermelons"))
AreaDF <- data.frame(fruits = unlist(lapply(highlight_level, function(x) c(x -0.51,x -0.5,x+0.5,x+0.51))),
yval = rep(c(1,max(df$measure),max(df$measure),1), length(highlight_level)))
AreaDF <- AreaDF %>% mutate(
yval = ifelse(floor(fruits) %in% highlight_level & ceiling(fruits) %in% highlight_level, max(df$measure), yval)) %>%
arrange(fruits) %>% distinct()
plot <- ggplot(df, aes(fruits)) +
geom_blank(aes(y=measure, fill=compare))+
geom_area(data = AreaDF, aes(y = yval ), fill= "yellow")+
geom_bar(aes(y=measure, fill=compare),stat="identity", position=position_dodge()) + scale_y_log10()
plot

Time series and legend with ggplot2

Thi is my data:
x <- c("22-01-16","26-01-16","28-01-16","01-02-16","05-02-16","16-02-16","17-03-16","18-03-16","04-04-16","05-04-16","06-04-16","08-04-16")
y <- c(97.14,75,54.44,70.45,110.56,66.3,178.76,171.90,419.41,424,518.63,242.17)
z <- c("ADCP","ADCP","ADCP","ADCP","ADCP","ADCP","ADCP","ADCP","ADCP","ADCP","ADCP","ADCP")
So I make the dataframe
Datos <- data.frame(x)
Datos$Caudal <- y
Datos$Tipo <- z
Datos$Fecha <- as.Date(Datos$x, "%d-%m-%y")
and plot using ggplot2
Serie_Caudal <-
ggplot(Datos, aes(Fecha, Caudal)) +
geom_line(size=1, colour="red") +
geom_point(shape=23,size=1, colour="blue",fill = "blue") +
scale_x_date(date_breaks = "1 week",labels = date_format("%d/%b"))+
xlab("Fecha") + ylab(bquote('Caudal ('*m^3~s^-1*')')) +
ggtitle("Caudales Diarios (01-06/2016)")
Serie_Caudal
I try to plot a legend but i can´t the way, i try use Melt but my data change in a way i can´t plot. Also try scale_fill_manual but the legend don´t show up. I want to know if there is a way to put a legend manualy.
The legend must show a blue point and ADCP
This shows only a blue dot.
ggplot(aes(Fecha, Caudal, colour = "ADCP"), data = Datos) +
geom_point() +
geom_point(shape=23,size=1,color="blue",fill = "blue") +
scale_color_manual(values = c("ADCP"="blue"),name = "") +
geom_line(color="red", size=1) +
scale_x_date(date_breaks = "1 week",labels = date_format("%d/%b")) +
xlab("Fecha") + ylab(bquote('Caudal ('*m^3~s^-1*')')) +
ggtitle("Caudales Diarios (01-06/2016)")

R A simple univariate plot with ggplot

I would like to do a simple graph like this:
ff<-data.frame(Freq=c(rep(10000,10),rep(100,15),rep(10,50),rep(1,100)))
plot(log(ff$Freq),type="l")
is the only option to add a x variable?
require(ggplot2)
ff$Ord <- 1:nrow(ff)
ggplot(data=ff,aes(x=Ord,y=log(Freq))) + geom_line()
thanks in advance
Here's one approach with geom_step():
library(ggplot2)
library(scales)
ggplot(ff, aes(x = seq_along(Freq), y = log10(Freq))) + geom_step(size = 1) +
labs(x = "Index", y = "Freq") +
scale_y_continuous(labels = math_format(10^.x))

Resources