I am trying to order a ggplot graph using facet_grid, example:
Sorry , I know that my data its no logic but its ok to show my problem with the facets...
In my real data I dont have this data, only i use this block below to get my data in this example:
################ only to get data in my example ######################
set.seed(12345)
Date <- seq(as.Date("2010/1/1"), as.Date("2013/1/1"), "6 months" )
Y <- rnorm(n=length(Date), mean=100, sd=1)
df <- data.frame(Date, Y)
df$Year <- format(df$Date, "%Y")
df$Sem <- format(df$Date, "%b")
df$Sem <- gsub("ene.", "1S",df$Sem )
df$Sem <- gsub("jul.", "2S",df$Sem )
df$MonthYear <- format(df$Date, "%b-%Y")
############## o #################
Variables that are in my real data are:
Sem, Semester and Y
I extract the year to sort in my second try:
df=df %>%
mutate(extract_year= sub('.*(\\d{4}).*', '\\1', MonthYear))
df$Semester=paste0(df$Sem,df$extract_year)
#Here I tried to sort the facet but I failed:
df2 <- within(df,Semester <- ordered(Semester, levels = rev(sort(unique(Semester)))))
df2 %>%
ggplot(aes(y = Y )) +
geom_bar() +
facet_grid(Semester~.)
I tried also with:
df2 %>%
arrange(extract_year) %>%
ggplot(aes(y = Y )) +
geom_bar() +
facet_grid(Semester~.)
I would like get this order:
1S2010/2S2010/1S2011/2S2011/1S2012/2S2012
Basically you have to set the order of the levels in your desired order. One option to achieve that would be to arrange the data in your desired order and make use of forcats::fct_inorder to set the order of the levels of your Semester variable.
library(ggplot2)
library(dplyr)
## Make Semester variable in the desired format
df <- mutate(df, Sem = recode(Sem, Jan = "1S", Jul = "2S"),
Semester = paste0(Sem, Year))
# Arrange and set order
df <- df %>%
arrange(gsub("^(\\dS)(\\d{4})$", "\\2\\1", Semester)) %>%
mutate(Semester = forcats::fct_inorder(Semester))
df %>%
ggplot(aes(y = Y )) +
geom_bar() +
facet_grid(Semester~.)
Related
df <- read.csv ('https://raw.githubusercontent.com/ulklc/covid19-
timeseries/master/countryReport/raw/rawReport.csv',
stringsAsFactors = FALSE)
df6 <- aggregate(recovered ~ region, subset(df), sum)
I calculated the number of regions recovered.
How do I create a line chart of this data.
If you want a line plot, you should probably keep the day variable.
df$day <- as.Date(df$day)
df6 <- aggregate(recovered ~ day+region, data=df, FUN=sum)
library(ggplot2)
ggplot(df6, aes(day, recovered, col=region)) +
geom_line()
Data:
df <- read.csv('https://raw.githubusercontent.com/ulklc/covid19-timeseries/master/countryReport/raw/rawReport.csv', stringsAsFactors = FALSE)
df6 <- aggregate(recovered ~ region, subset(df), sum)
Plot in base R:
barplot(df6$recovered, names.arg=df6$region)
Plot using tidyverse:
library(tidyverse)
df6 %>% ggplot(aes(region, recovered)) + geom_col()
I want to order each bar within year like this: A, B, C.
I have looked at this post: https://ilari.scheinin.fi/ggplot-2-0-and-the-missing-order-aesthetic/
but it doesn't work at all.
Here's my code:
# data --------------------------------------------------------------------
ID <- c('1','2','3','4','5','6','7','7','8','9','10','11')
TypeCourtier <- c('A','A','A','A','B','B','B','B','C','C','C','C')
année_survenance <- c('2009','2010','2011','2012','2009','2010','2011','2012','2009','2010','2011','2012')
moyenne_charge <- c('1515','1551','89754','9148','787','9848','8474','3465','7488','884','8948','8484')
mediane_charge <- c('8185','5919','20409','8979','7777','9294','87484','8488','1881','18819','8484','84444')
totalComptage <- c('9989','849444','848','684','9845','1448','9844','2151','7171','5051','3959','9896')
data <- data.frame(ID, TypeCourtier, année_survenance, moyenne_charge, mediane_charge,totalComptage)
# main --------------------------------------------------------------------
install.packages("ggplot2")
library(ggplot2)
library(magrittr)
library(dplyr)
data$TypeCourtier <- factor(data$TypeCourtier, levels = c("A","B","C"), ordered= TRUE)
data %>%
ungroup() %>%
arrange(as.integer(TypeCourtier)) %>%
ggplot( aes(x=année_survenance, y = moyenne_charge, fill=TypeCourtier)) +
geom_bar(stat="identity", position=position_dodge())+
geom_text(aes(label=TypeCourtier),position = position_dodge(width = 0.9),vjust=-0.25) + theme(legend.position = "none")
The variable for the y-axis has to be numeric:
data$moyenne_charge <- as.numeric(as.character(data$moyenne_charge))
I have been trying to plot min and max values of temperature. I actually wanted to plot using geom_area. My data can be downloaded from here.
library(dplyr)
library(ggplot2)
dat <- read.csv("energydata_complete.csv", stringsAsFactors = FALSE)
#renaming attributes meaningfully
#names(dat)[] <- 'temp_kitchen'
dat <- dat %>%
dplyr::rename('temp_kitchen'=T1,'temp_living'=T2,'temp_laundry'=T3,
'temp_office'=T4,'temp_bath'=T5,'temp_build'=T6,'temp_iron'=T7,
'temp_teen'=T8,'temp_parent'=T9,'hum_kitchen'=RH_1,'hum_living'=RH_2,
'hum_laundry'=RH_3,'hum_office'=RH_4,'hum_bath'=RH_5,'hum_build'=RH_6,
'hum_iron'=RH_7,'hum_teen'=RH_8,'hum_parent'=RH_9)
dat$month <- as.factor(months(dat$date))
dat$date <- strptime(dat$date, format = "%Y-%m-%d %H:%M:%S")
dat$date <- as.POSIXct(dat$date, format = "%Y-%m-%d %H:%M:%S")
I have created another dataframe with month and min and max temperature values of each room.
temparature <- dat %>% group_by(month) %>% dplyr::summarise(min_temp_kitch=min(temp_kitchen),
max_temp_kitch=max(temp_kitchen),
min_temp_living=min(temp_living),
max_temp_living=max(temp_living),
min_temp_laundry=min(temp_laundry),
max_temp_laundry=max(temp_laundry),
min_temp_iron=min(temp_iron),
max_temp_iron=max(temp_iron),
min_temp_office=min(temp_office),
max_temp_office=max(temp_office),
min_temp_bath=min(temp_bath),
max_temp_bath=max(temp_bath),
min_temp_parent=min(temp_parent),
max_temp_parent=max(temp_parent),
min_temp_teen=min(temp_teen),
max_temp_teen=max(temp_teen))
Now I am trying to plot min and max temperature values from this dataframe for each room.
Below code didn't give any plot.
ggplot() + geom_area(data = temparature,aes(x=month,y=min_temp_kitch), position = 'stack') +
geom_area(data = temparature,aes(x=month, y=max_temp_kitch), position = 'stack')
Tried to create with geom_ribbon as below.
ggplot(temparature) +
geom_ribbon(aes(x=month, ymin = min_temp_kitch, ymax = max_temp_kitch), color='blue', alpha = 0.5)
This has given
But I want a plot something similar to this with points for each value.
Can someone suggest how to do this please.
You don't need to change your dates to factor and need to make the temperature dataframe into long format :
library(dplyr)
library(ggplot2)
library(lubridate)
dat <- read.csv("energydata_complete.csv", stringsAsFactors = FALSE)
dat <- dat %>%
rename('temp_kitchen'=T1,'temp_living'=T2,'temp_laundry'=T3,
'temp_office'=T4,'temp_bath'=T5,'temp_build'=T6,'temp_iron'=T7,
'temp_teen'=T8,'temp_parent'=T9,'hum_kitchen'=RH_1,'hum_living'=RH_2,
'hum_laundry'=RH_3,'hum_office'=RH_4,'hum_bath'=RH_5,'hum_build'=RH_6,
'hum_iron'=RH_7,'hum_teen'=RH_8,'hum_parent'=RH_9) %>%
mutate(month = floor_date(date(date), unit = 'months'))
temparature <- dat %>%
group_by(month) %>%
summarise(min_temp_kitch=min(temp_kitchen),
max_temp_kitch=max(temp_kitchen),
min_temp_living=min(temp_living),
max_temp_living=max(temp_living),
min_temp_laundry=min(temp_laundry),
max_temp_laundry=max(temp_laundry),
min_temp_iron=min(temp_iron),
max_temp_iron=max(temp_iron),
min_temp_office=min(temp_office),
max_temp_office=max(temp_office),
min_temp_bath=min(temp_bath),
max_temp_bath=max(temp_bath),
min_temp_parent=min(temp_parent),
max_temp_parent=max(temp_parent),
min_temp_teen=min(temp_teen),
max_temp_teen=max(temp_teen))
temp2 <- temparature %>%
tidyr::gather(temp_min_max, Temp, -month)
ggplot() +
geom_area(data = temp2 %>%
filter(temp_min_max %in% c('min_temp_kitch', 'max_temp_kitch')),
aes(x=month,y=Temp,fill = temp_min_max, color = temp_min_max),
position = 'identity')
I have a time series of several years that I need to plot mm/dd on the x-axis and multiple years on the y-axis using plot_ly. I have generated a sample data here:
date<-seq(as.Date("2010-11-22"),as.Date("2016-05-26"),by ="days")
sales = runif(2013, 2000, 6000)
df = data.frame(date,sales)
I plotted this data and get this:
plot_ly(df,x= ~date) %>% add_lines(y = ~sales,color=I("red"))
Now, I tried to plot multiple y-axis using plot_ly:
plot_ly(df, x = ~date) %>% add_lines(y = ~sales,
df$date <= "2010-12-31",color=I("red")) %>%
add_lines(y = ~sales, df$date <= "2013-12-31" &
df$date >= 2013-01-01, color = I("green"))
but I got wrong plot:
What's the mistake in that?
I want the plot like this:
To create different lines on the same graph we have to split the df in group with plotly::group_by. In your case this is achieved using lubridate to split by year:
library(plotly)
library(lubridate)
date <- seq(as.Date("2010-11-22"), as.Date("2016-05-26"), by = "days")
# Add some variations to distinguish lines
sales <- runif(2013, 10, 20) + year(date) * 5 + yday(date) / 5
df <- data.frame(date, sales)
df %>%
mutate(year = year(date)) %>%
group_by(year) %>%
plot_ly(x = ~ yday(date)) %>%
add_lines(y = ~ sales,
color = ~ factor(year)
)
I have a large data frame of several variables (around 50) with first column as date and second column id.
My data roughly look like this:
df <- data.frame(date = c("01-04-2001 00:00","01-04-2001 00:00","01-04-2001 00:00",
"01-05-2001 00:00","01-05-2001 00:00","01-05-2001 00:00",
"01-06-2001 00:00","01-06-2001 00:00","01-06-2001 00:00",
"01-07-2001 00:00","01-07-2001 00:00","01-07-2001 00:00"),
id = c(1,2,3,1,2,3,1,2,3,1,2,3), a = c(1,2,3,4,5,6,7,8,9,10,11,12),
b = c(2,2.5,3,3.2,4,4.6,5,5.6,8,8.9,10,10.6))
I want time series plots for all three ids separately in same graph of variables, a and b in different graphs.
I tried ggplot but it didn't work. Please help me
Do you mean something like this?
library(reshape)
library(lattice)
df2 <- melt(df, id.vars = c("date", "id"), measure.vars = c("a", "b"))
xyplot(value ~ date | variable, group = id, df2, t='l')
Addendum
# The following is from a comment by jbaums.
# It will create a single plot/file for each variable of df2
png('plots%02d.png')
xyplot(value ~ date | variable, group = id, df2, t='l', layout=c(1, 1),
scales=list(alternating=FALSE, tck=1:0))
dev.off()
You can also add relation='free' to scales so that y-axis limits are calculated separately for each plot.
Edit: After reading the comments, maybe you should try something like this:
library(tidyr)
df2 <- gather(df, variable, value, -date, -id)
vars <- unique(df2$variable)
library(ggplot2)
for (i in 1:length(vars)) {
ggplot() +
geom_line(data = subset(df2, variable == vars[[i]]),
aes(date, value, group = id, color = factor(id))) +
ylab(as.character(vars[[i]])) +
ggsave(file = paste0(vars[[i]], ".png"))
}
This should save a PNG for each variable in your dataframe (and will change y label of every plot to variable name, as per your request)
Here's how to do it in ggplot, using the tidyr package to get it in the right format:
library(ggplot2)
library(tidyr)
library(dplyr)
df <- data.frame(date = c("01-04-2001 00:00","01-04-2001 00:00","01-04-2001 00:00",
"01-05-2001 00:00","01-05-2001 00:00","01-05-2001 00:00",
"01-06-2001 00:00","01-06-2001 00:00","01-06-2001 00:00",
"01-07-2001 00:00","01-07-2001 00:00","01-07-2001 00:00"),
id = c(1,2,3,1,2,3,1,2,3,1,2,3), a = c(1,2,3,4,5,6,7,8,9,10,11,12),
b = c(2,2.5,3,3.2,4,4.6,5,5.6,8,8.9,10,10.6))
Then using dplyr's group_by and do functions, we can save multiple plots.
df %>%
gather(variable, value, -date, -id) %>%
mutate(id = factor(id)) %>%
group_by(variable) %>%
do(
qplot(data = ., x = date, y = value, geom = "line", group = id, color = id, main = paste("variable =", .$variable)) +
ggsave(filename = paste0(.$variable, ".png")
)
)