Multiple lineplot with Errobar for data coming from two tables - r

I have two data sheets, one with the points I want to plot (each point in the first data set is an average of different measurements), and the second data containing the standard deviations for each point.
Below I attached an R script to create lineplot from the first data which works fine. With the code i can create a plot like the following
Now I want to use the second table (standard deviations) to create a plot similar the previous, but now also showing a errorbar, i.e., that graphically displays the standard deviation of each measurements like this.
library(ggplot2)
##loads a dataframe and returns a ggplot object that can be externally modified and plotted
makeMultipleLinePlot <- function(data){
require(reshape2)
data$id <-rownames(data)
melted <- melt(data)
colnames(melted)<-c("Measurement","Month","Percentage")
g<-ggplot(data=melted,
aes(x=Month, y=Percentage, color=Measurement,group=Measurement)) +
geom_line(size=1,alpha=0.8) + geom_point(size=4,aes(shape=Measurement))
return(g)
}
##load a table from google sheets. assumes the sheet has a single table
loadTableFromGoogleSheet <- function(url, sheet) {
require(gsheet)
a <- gsheet2text(url,sheetid=sheet, format='csv')
data <- read.csv(text=a, stringsAsFactors=FALSE,header = TRUE,row.names = 1)
return(data)
}
#URL of the google spreadsheet
url <- "docs.google.com/spreadsheets/d/10clnt9isJp_8Sr7A8ejhKEZXCQ279wGP4sdygsit1LQ"
gid.humidity <- 2080295295 #gid of the google sheet containing humidity data
data.humidity<-loadTableFromGoogleSheet(url,gid.humidity)
gid.humidity_sd <- 1568896731 #gid of the google sheet containing standard deviations for each measurement in the humidity data
data.humidity_sd<-loadTableFromGoogleSheet(url,gid.humidity_sd)
ggsave(filename="lineplot/humidity.pdf", plot=makeMultipleLinePlot(data.humidity))
#ggsave(filename="lineplot/humidity.pdf", plot=makeMultipleErrorPlot(data.humidity,data.humidity_sd))

This tidy the two data.frame, join them and plot the result, using geom_errorbar:
library(dplyr)
library(tidyr)
library(ggplot2)
df <- data.humidity %>%
mutate(measure = row.names(.)) %>%
gather(month, value, -measure)
df_sd <- data.humidity_sd %>%
mutate(measure = substr(row.names(.), 1, 2)) %>%
gather(month, sd, -measure)
dfF <- full_join(df, df_sd)
#> Joining, by = c("measure", "month")
ggplot(dfF, aes(month, value, group = measure, color = measure))+
geom_line(size=1,alpha=0.8) +
geom_point(aes(shape = measure)) +
geom_errorbar(aes(ymin = value - sd, ymax = value + sd), width = .3)

Related

how to get geom_point and legend onto line plot in R?

This is my R-script, I've been trying to include a legend onto the line plot but it isn't working? Any guidance? I also can't seem to get the geom_point() working either (I've taken the code for it out below).
library(ggsignif)
library(readxl)
library(svglite)
library(tidyverse)
library(ggplot2)
library(tidyr)
library(dplyr)
url <-'https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-020-2850-3/MediaObjects/41586_2020_2850_MOESM10_ESM.xlsx'
temp <-tempfile()
download.file(url, temp, mode='wb')
myData <- read_excel(path=temp, sheet = "ExFig.5f")
names(myData) <- NULL
view(myData)
Time_post_inj <- (myData[1])
Time_post_inj <- Time_post_inj[-c(1),]
dose_450_ug <- (myData[2])
dose_450_ug <- dose_450_ug[-c(1),]
dose_150_ug <- (myData[4])
dose_150_ug <- dose_150_ug[-c(1),]
dose_100_ug <- (myData[6])
dose_100_ug <- dose_100_ug[-c(1),]
dose_50_ug <- (myData[8])
dose_50_ug <- dose_50_ug[-c(1),]
colnames(Time_post_inj) <-c("Time_Post_Injection")
colnames(dose_450_ug) <-c("dose_450_µg")
colnames(dose_150_ug) <-c("dose_150_µg")
colnames(dose_100_ug) <-c("dose_100_µg")
colnames(dose_50_ug) <-c("dose_50_µg")
Newdata <-data.frame(Time_post_inj, dose_450_ug, dose_150_ug, dose_100_ug, dose_50_ug)
Newdata$Time_Post_Injection <-as.numeric(Newdata$Time_Post_Injection)
Newdata$dose_450_µg <-as.numeric(Newdata$dose_450_µg)
Newdata$dose_150_µg <-as.numeric(Newdata$dose_150_µg)
Newdata$dose_100_µg <-as.numeric(Newdata$dose_100_µg)
Newdata$dose_50_µg <-as.numeric(Newdata$dose_50_µg)
str(Newdata)
ggplot(data=Newdata, aes(x=Time_Post_Injection, y=hCD4_occupancy, group = 1)) + geom_line(aes(y=dose_450_µg)) + geom_line(aes(y=dose_150_µg)) + geom_line(aes(y=dose_100_µg)) + geom_line(aes(y=dose_50_µg))
Newdata
tidyr::pivot_longer(Time_Post_Injection, names_to = "DOSE", values_to = "VALUE") %>%
ggplot2::ggplot(aes(Time_Post_Injection, VALUE, group = DOSE, color = DOSE)) + ggplot2::geom_line()
The following is a full reprex, meaning that if you copy and paste, it will reproduce the plot exactly as below. You can see I have simplified your parsing considerably too; this starts with the url and produces the plot with a lot less data wrangling:
library(ggplot2) # Only load packages you really need
# This format is a handy way of keeping a long string on a single page
url <- paste0("https://static-content.springer.com/esm/art%3A10.",
"1038%2Fs41586-020-2850-3/MediaObjects/41586_2020",
"_2850_MOESM10_ESM.xlsx")
temp <- tempfile()
download.file(url, temp, mode = 'wb')
# Instead of loading an entire library to use one function, we can
# access read_excel by doing readxl::read_excel
myData <- readxl::read_excel(temp, sheet = "ExFig.5f")
# This single line subsets the data frame to chop out the first row
# and the empty columns. It also converts all columns to numeric
NewData <- as.data.frame(lapply(myData[-1, -c(3, 5, 7)], as.numeric))
names(NewData) <-c("Time_Post_Injection", "dose_450_ug",
"dose_150_ug", "dose_100_ug", "dose_50_ug")
# This switches your data to long format, which helps ggplot to work
# We put all the values in one column and have the dosages as labels
# in another column instead of having multiple columns. This allows us
# to map Color to the dosages.
NewData <- cbind(NewData[1], stack(NewData[-1]))
# Now we just tell ggplot to map colours to ind
ggplot(NewData, aes(x = Time_Post_Injection, y = values, color = ind)) +
geom_line() +
geom_point() +
scale_color_discrete(name = "Dose") +
labs(x = "Time Pist Injection") +
theme_bw()
Created on 2020-11-11 by the reprex package (v0.3.0)
Hi the main problem is that you did not get your data into a easy to handle format
library(dplyr)
library(tidyr)
library(ggplot2)
Newdata %>%
# get data in easy to handle format
tidyr::pivot_longer(-Time_Post_Injection, names_to = "DOSE", values_to = "VALUE") %>%
# plot and use the new DOSE column as group and color so you do not need one geom per line! (you can change geom_line() to geom_point also())
ggplot2::ggplot(aes(Time_Post_Injection, VALUE, group = DOSE, color = DOSE)) +
ggplot2::geom_line()

R-use dataframe's data to make histogram

I am trying to generate a histogram from below data:
There have five categories(screen.out~safety.out), and at the end have a total(just calculate how many "1" in each category)
This is my target plot:
But I don't know how to generate my target plot. Can it just use total number to generate a plot(all categories in one picture just like the annex2)? or other method?
Thanks for watching.
datatotal %>%
select(-complaindata) %>%
gather() %>%
ggplot(aes(key, value)) +
geom_col() +
labs(x = "x_name", y = "y_name")
This should give you the plot as designed in your image.
I've made an example with your data, but please next time don't post the images, copy and paste the actual code and data.
Fake data:
library(dplyr)
data <- tibble(
screen.out = c(rep(1, 19), 0),
voice.out = c(rep(0, 15), rep(1,5)),
cs.out = c(rep(0,10), rep(1, 10))
) # this is just some fake data
The trick is now to aggregate all the columns (here 3) in two columns one with the numbers and one with the original column name (it's done by gather below).
Plot:
library(ggplot2)
data %>%
gather("key", "value") %>% # you can change this names
ggplot(aes(key, value, fill = key)) + # as long as you update here too accordingly
geom_col()

R: using ggplot2 with a group_by data set

I can't quite figure this out. A CSV of 200+ rows assigned to data like so:
gid,bh,p1_id,p1_x,p1_y
90467,R,543333,80.184,98.824
90467,L,408045,74.086,90.923
90467,R,543333,57.629,103.797
90467,L,408045,58.589,95.937
Trying to group by p1_id and plot the mean values for p1_x and p1_y:
grp <- data %>% group_by(p1_id)
Trying to plot geom_point objects like so:
geom_point(aes(mean(grp$p1_x), mean(grp$p1_y), color=grp$p1_id))
But that isn't showing unique plot points per distinct p1_id values.
What's the missing step here?
Why not calculate the mean first?
library(dplyr)
grp <- data %>%
group_by(p1_id) %>%
summarise(mean_p1x = mean(p1_x),
mean_p1y = mean(p1_y))
Then plot:
library(ggplot2)
ggplot(grp, aes(x = mean_p1x, y = mean_p1y)) +
geom_point(aes(color = as.factor(p1_id)))
Edit: As per #eipi10, you can also pipe directly into ggplot
data %>%
group_by(p1_id) %>%
summarise(mean_p1x = mean(p1_x),
mean_p1y = mean(p1_y)) %>%
ggplot(aes(x = mean_p1x, y = mean_p1y)) +
geom_point(aes(color = as.factor(p1_id)))

How would one create scatterplots based on characteristics in multiple columns of a data frame?

age <- rnorm(100, 0:100)
freq <- rnorm(100, 0:1)
char1<-stringi::stri_rand_strings(100, length = 1, pattern = "[abc]")
char2<-stringi::stri_rand_strings(100, length = 1, pattern = "[def]")
char3<-stringi::stri_rand_strings(100, length = 1, pattern = "[def]")
char3<-stringi::stri_rand_strings(100, length = 1, pattern = "[ghi]")
dftest <- data.frame(age, freq, char1, char2, char3)
dflist <- list(dftest, dftest, dftest, dftest, dftest)
This creates a sample data frame that demonstrates the problem I am having.
I would like to create scatterplots for age vs freq for each of the data frames in this list, but I want a different color for the points based on the value in columns "char#". I also need a separate trend line for values in each of these separate characteristics.
I also want to be able to do this based on combinations of different characteristics from different char columns. An example of this is 3*3=9 separate colors for each of the combinations, each with a different trend line.
How would this be done?
I hope this was reproducible and clear enough. I have only posted a few times, so I am still getting used to the format.
Thanks!
Let's start by creating a data frame that will allow us to show points with different colors:
df2 <- data.frame(age=rnorm(200,0:100),
freq=rnorm(200,0:1),id=rep(1:2,each=100))
Then we can plot like so:
plot(dflist2$age,dflist2$freq, col=dflist2$id, pch=16)
We set col (color) equal to id (this would represent each data frame). pch is the point type (solid dots).
You can try dplyr for data preparing and ggplot for plotting. All functions are loaded via the tidyverse package:
library(tidyverse)
# age vs freq plus trendline for char1
as.tbl(dftest) %>%
ggplot(aes(age, freq, color=char1)) +
geom_point() +
geom_smooth(method = "lm")
# age vs freq plus trendline for combinations of char columns
as.tbl(dftest) %>%
unite(combi, char1, char2, char3, sep="-") %>%
ggplot(aes(age, freq, color=combi)) +
geom_point() +
geom_smooth(method = "lm")
# no plot as too many combinations make the plot to busy
dflist %>%
bind_rows( .id = "df_source") %>%
ggplot(aes(age, freq, color=char1)) +
geom_point() +
geom_smooth(method = "lm", se=FALSE) +
facet_wrap(~df_source)

Using gganimate and ggplot for a boxplot: Cumulative not working

I'm trying to produce an animation for a simulation model, and I want to show how the distribution of results changes as the simulation runs.
I've seen gganimate used for scatter plots but not for boxplots (or ideally violin plots). Here I've provided a reprex.
When I use sim_category (which is a bucket for a certain number of simulation runs) I want the result to be cumulative of all previous runs to show the total distribution.
In this example (and my actual code), cumulative = TRUE does not do this. Why is this?
library(gganimate)
library(animation)
library(ggplot2)
df = as.data.frame(structure(list(ID = c(1,1,2,2,1,1,2,2,1,1,2,2),
value = c(10,15,5,10,7,17,4,12,9,20,6,17),
sim_category = c(1,1,1,1,2,2,2,2,3,3,3,3))))
df$ID <- factor(df$ID, levels = (unique(df$ID)))
df$sim_category <- factor(df$sim_category, levels = (unique(df$sim_category)))
ani.options(convert = shQuote('C:/Program Files/ImageMagick-7.0.5-Q16/magick.exe'))
p <- ggplot(df, aes(ID, value, frame= sim_category, cumulative = TRUE)) + geom_boxplot(position = "identity")
gganimate(p)
gganimate's cumulative doesn't accumulate the data, it just keeps gif frames in subsequent frames as they appear. To achieve what you want, you have to do the accumulation before building the plot, something along the following lines:
library(tidyverse)
library(gganimate)
df <- data_frame(
ID = factor(c(1,1,2,2,1,1,2,2,1,1,2,2), levels = 1:2),
value = c(10,15,5,10,7,17,4,12,9,20,6,17),
sim_category = factor(c(1,1,1,1,2,2,2,2,3,3,3,3), levels = 1:3)
)
p <- df %>%
pull(sim_category) %>%
levels() %>%
as.integer() %>%
map_df(~ df %>% filter(sim_category %in% 1:.x) %>% mutate(sim_category = .x)) %>%
ggplot(aes(ID, value, frame = factor(sim_category))) +
geom_boxplot(position = "identity")
gganimate(p)

Resources