I have this simple example of a boxplot:
date.numeric <- c(98,105,110,120,75,35,200,167,365,425,400,398)
age.class <- c("juv","juv","juv","juv","juv","ad","ad","ad","ad","ad","ad","ad")
mytable <- data.frame(date.numeric,age.class)
ggplot(mytable, aes(x=age.class, y=date.numeric)) +
geom_boxplot()
My variable date.numeric is depicted as numbers in the plot, in which date number 1 represents date 1/1/2015 (reference date). How can I change the y-axis to show dates in format "month-year" instead of the numeric format?
try as.Date()
library(ggplot2)
date.numeric <- c(98,105,110,120,75,35,200,167,365,425,400,398)
age.class <- c("juv","juv","juv","juv","juv","ad","ad","ad","ad","ad","ad","ad")
mytable <- data.frame(date.numeric,age.class)
mytable$date <- (as.Date(date.numeric,origin = "2015/1/1"))
ggplot(mytable, aes(x=age.class, y=date)) +
geom_boxplot()
Created on 2018-07-17 by the reprex package (v0.2.0.9000).
try creating a date offset variable and add that to your y-axis.
date.start <- as.Date('2015-01-01')
date.numeric <- c(98,105,110,120,75,35,200,167,365,425,400,398)
age.class <- c("juv","juv","juv","juv","juv","ad","ad","ad","ad","ad","ad","ad")
mytable <- data.frame(date.numeric,age.class)
ggplot(mytable, aes(x=age.class, y=date.numeric+date.start)) + geom_boxplot()
The axis would then look like Apr 2015, etc.
Related
I have a problem with the order of the x-axis in my plot.
Download the data from the website of the Central Bank of Perú (GDP by quarter).
library(jsonlite)
library(rstudioapi)
library(ggplot2)
library(data.table)
PBI <- "PN02635BQ"
URL3 <- paste0("https://estadisticas.bcrp.gob.pe/estadisticas/series/api/",
PBI,"/json/2018-1-1/2021-7-31")
Use of json for download the data
l_json <- jsonlite::fromJSON(URL3)
dt_PBI <- data.table(l_json$periods)
sapply(dt_PBI,class)
dt_PBI[,values := round(as.numeric(values),4)]
colnames(dt_PBI)<- c("Quarter", "Millions")
As you see the plot is not in order in the x-axis.
ggplot(dt_PBI, aes(x=Quarter, y=Millions)) +
geom_point()
The x-axis is not in order because your character column becomes a factor and is sorted by alphanumeric ordering. So in your case "T1.19" would sort before "T2.18". To fix this, use library forcats and sort the factor based on the year. I split this out into its own column ("year") to make it clear what was being accomplished here.
library(jsonlite)
library(rstudioapi)
library(ggplot2)
library(data.table)
library(forcats)
PBI <- "PN02635BQ"
URL3 <- paste0("https://estadisticas.bcrp.gob.pe/estadisticas/series/api/",
PBI,"/json/2018-1-1/2021-7-31")
l_json <- jsonlite::fromJSON(URL3)
dt_PBI <- data.table(l_json$periods)
sapply(dt_PBI,class)
dt_PBI[,values := round(as.numeric(values),4)]
colnames(dt_PBI) <- c("Quarter", "Millions")
dt_PBI$year <- substr(dt_PBI$Quarter, 4, 5)
dt_PBI$Quarter <- fct_reorder(dt_PBI$Quarter, dt_PBI$year, min)
ggplot(dt_PBI, aes(x=Quarter, y=Millions)) +
geom_point()
I am trying to create a plot in R using Geom_Segment. I am stuck with an error that says I need to input yend but I am inputting it already... this is my code:
library(ggplot2)
library(data.table)
library(magrittr)
dataset$From<-Sys.Date()
format(dataset$From, format="%Y-%m-%dT%H:%M:%OS")
dataset$To<-Sys.Date()
format(dataset$To, format="%Y-%m-%dT%H:%M:%OS")
ggplot(dataset, aes(x=datetime_start, y=dataset$Audit_Title,
color=dataset$Employee_Name)) +
geom_segment(aes(x=dataset$From,xend=dataset$To,y=dataset$Audit_Title,yend=dataset$Audit_Title),size=20)+
scale_colour_discrete(guide=guide_legend(override.aes=list(size=15))) +
ggtitle("Audit by Employee Timeline") + xlab("") + ylab("") + theme_bw()
SAMPLE DATA:
Here is the sample data
This is how I changed the code below to take in the data from Excel I inputted into Power BI:
library(ggplot2)
library(dplyr)
# transform into date
dataset <- dataset %>%
mutate_at(vars(dataset$From, dataset$To),
.funs = function(tt) readr::parse_date(as.character(tt),
format = "%m/%d/%Y"))
ggplot(dataset)+
geom_segment(aes(x=dataset$From, xend=dataset$To,
y=dataset$Employee_Name, yend=dataset$Employee_Name))
First of all, ideally you would share your data as a dput(dataset). If you can't share real data, you should make a minimal reproducible example and share that. See here
Here's your data
library(ggplot2)
library(dplyr)
df <-
read.table(
text =
"01/03/2020 03/16/2020 Supply_Chain John_Smith
05/08/2020 08/20/2020 Business_Unit Karen_Scott")
names(df) <- c("From", "To", "Audit_Title", "Employee_Name")
# transform into date
df <- df %>%
mutate_at(vars(From, To),
.funs = function(tt) readr::parse_date(as.character(tt),
format = "%m/%d/%Y"))
Now do the actual plot by selecting the proper x xend and having y be the employee (y=yend).
ggplot(df)+
geom_segment(aes(x=From, xend=To,
y=Employee_Name, yend=Employee_Name))
Which produces
If you want fancy colors, labels and stuff go ahead and check the proper documentation for ggplot. See here
I have a huge data set containing bacteria samples (4 types of bacteria) from 10 water resources from 2010 until 2019. some values are missing so we need to not include them in the plot or analysis.
I want to plot a time series for each type of bacteria for each resource for all years.
What is the best way to do that?
library("ggplot2")
BactData= read.csv('Råvannsdata_Bergen_2010_2018a.csv', sep='\t',header=TRUE)
summary(BactData,na.rm = TRUE)
df$Date = as.Date( df$Date, '%d/%m/%Y')
#require(ggplot2)
ggplot( data = df, aes( Date,BactData$Svartediket_CB )) + geom_line()
#plot(BactData$Svartediket_CB,col='brown')
plot(BactData$Svartediket_CP,col='cyan')
plot(BactData$Svartediket_EC,col='magenta')
plot(BactData$Svartediket_IE,col='darkviolet')
using plot is not satisfactory because the x axis is just numbers not dates . Tried to use ggplot but got an error message. I am beginner in R.
Error message
Error in df$Date : object of type 'closure' is not subsettable
Data as CVS file with tab delimiter
This will do the trick
BactData = read.csv('Råvannsdata_Bergen_2010_2018a.csv', sep='\t',header=TRUE, stringsAsFactors = F)
colnames(BactData)[1] <- "Date"
library(lubridate)
BactData$Date = dmy(BactData$Date) # converts strings to date class
ggplot(data = BactData, aes(Date, Svartediket_CB )) + geom_line()
You can filter for any year using dplyr with lubridate. For example, 2017:
library(dplyr)
BactData %>% filter(year(Date) == 2017) %>%
ggplot(aes(Date, Svartediket_CB )) + geom_line()
Or for two years
library(dplyr)
BactData %>% filter(year(Date) == 2017 | year(Date) == 2018) %>%
ggplot(aes(Date, Svartediket_CB )) + geom_line()
I am trying to add ticks to my x-axis in this graph to show all the months of the year:
My code is as follows:
library(ggplot2)
library(scales)
p <- ggplot(df_test, aes(time, reading))
p + geom_point(alpha = 1/4) + geom_smooth()
I have tried to use scale_x_date but have come across the following error:
Error: Invalid input: date_trans works with objects of class Date only
Here's the data frame I'm using:
hour reading date time
1 53 1/1/15 2015-01-01 01:00:00
2 55 1/1/15 2015-01-01 02:00:00
3 56 1/1/15 2015-01-01 03:00:00
The class of my time variable:
class(df_test$time)
"POSIXct" "POSIXt"
Use scale_x_date(breaks="month", labels=date_format("%b%)). Here's an example.
library(quantmod)
sp500 <- getSymbols("SP500", src="FRED", auto.assign=FALSE)
sp500 <- sp500["2015-01-01::"]
sp500 <- data.frame(time=as.POSIXct(index(sp500), origin="1970-01-01"),sp500)
class(sp500$time)
# [1] "POSIXct" "POSIXt"
library(ggplot2)
library(scales) # for date_format(...)
ggplot(sp500, aes(x=as.Date(time), y=SP500))+
geom_line()+
scale_x_date(breaks="month", labels=date_format("%b"))
You are trying to use a scale specific for Date on a POSIXct object. The solution is to cast the POSIXct object to a date using as.date:
> Sys.time()
[1] "2015-09-16 09:52:42 CEST"
> as.Date(Sys.time())
[1] "2015-09-16"
To do this on your data.frame, I recommend using the dplyr package:
df_test = df_test %>% mutate(time = as.Date(time))
Combining the answers of both #PaulHiemstra and #jihoward, I've managed to figure out an answer.
First using the dplyr library to rework the data:
library(dplyr)
df_test1 = df_test %>% mutate(time = as.Date(time))
Then using scale_x_dates:
library(ggplot2)
library(scales)
p <- ggplot(df_test1, aes(time, reading))
p + geom_point(alpha = 1/4)+
scale_x_date(breaks="month", labels=date_format("%b"))
Gives the result:
Since the most recent answers (2015) date_format() is deprecated (I believe). Replace it with label_date() or scales::label_date(). It may not be loaded in your namespace but should come with ggplot so scales:: might be necessary.
Here's a copy and paste of the #timothyylim accepted answer with the change.
library(dplyr)
df_test1 = df_test %>% mutate(time = as.Date(time))
library(ggplot2)
library(scales)
p <- ggplot(df_test1, aes(time, reading))
p + geom_point(alpha = 1/4)+
scale_x_date(breaks="month", labels = scales::label_date("%b"))
I am always struggeling with this, so I think it is finally time to ask some help...
I tried to make a reproducible example, but for some reason I cannot get my x$monthday in the %m-%d format :(.
x<-data.frame(seq(as.POSIXct('2012-10-01'), as.POSIXct('2015-03-01'), by= "day"))
names(x)<- "date"
x$month<- months(x$date)
x$monthday<- as.POSIXct(x$date, format= "%m-%d")
x1<- x[x$month== 'October' |x$month== 'November' | x$month== 'December' |x$month== 'January'|x$month== 'February', ]
y<- 1: nrow(x1)
x2<-cbind(x1, y)
x3<- aggregate(list(y=x2$y), list(monthday=x2$monthday), mean)
plot(x3$monthday, x3$y)
The date has the format of %m/%d and is of a time series from October-March.
R orders the axis beautifully from January to December, which causes a big gap in between, because my data range from October-March.
How can I make my x axis order in the form from October-March?
Thank you very much in advance.
library(dplyr)
library(ggplot2)
library(lubridate)
# Fake data
dat <- data.frame(date=seq(as.POSIXct('2012-10-01'), as.POSIXct('2015-03-01'), by="day"))
set.seed(23)
dat$temperature = cumsum(rnorm(nrow(dat)))
# Subset to October - March
dat <- dat[months(dat$date) %in% month.name[c(1:2,10:12)], ]
# Calculate mean daily temperature
dat = dat %>% group_by(Month=month(date), Day=day(date)) %>%
summarise(dailyMeanTemp = mean(temperature)) %>%
mutate(newDate = as.Date(ifelse(Month %in% 10:12,
paste0("2014-", Month, "-", Day),
paste0("2015-", Month, "-", Day))))
The mutate function above creates a fake year, only so that we can keep the dates in "date" format and get them ordered from October to March. There's probably a better way to do it (maybe a function in the zoo or xts packages), but this seems to work.
ggplot(dat, aes(newDate, dailyMeanTemp)) +
geom_line() + geom_point() +
labs(y="Mean Temperature", x="Month")
Or, in base graphics:
plot(dat$newDate, dat$dailyMeanTemp)