how to get geom_point and legend onto line plot in R? - r

This is my R-script, I've been trying to include a legend onto the line plot but it isn't working? Any guidance? I also can't seem to get the geom_point() working either (I've taken the code for it out below).
library(ggsignif)
library(readxl)
library(svglite)
library(tidyverse)
library(ggplot2)
library(tidyr)
library(dplyr)
url <-'https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-020-2850-3/MediaObjects/41586_2020_2850_MOESM10_ESM.xlsx'
temp <-tempfile()
download.file(url, temp, mode='wb')
myData <- read_excel(path=temp, sheet = "ExFig.5f")
names(myData) <- NULL
view(myData)
Time_post_inj <- (myData[1])
Time_post_inj <- Time_post_inj[-c(1),]
dose_450_ug <- (myData[2])
dose_450_ug <- dose_450_ug[-c(1),]
dose_150_ug <- (myData[4])
dose_150_ug <- dose_150_ug[-c(1),]
dose_100_ug <- (myData[6])
dose_100_ug <- dose_100_ug[-c(1),]
dose_50_ug <- (myData[8])
dose_50_ug <- dose_50_ug[-c(1),]
colnames(Time_post_inj) <-c("Time_Post_Injection")
colnames(dose_450_ug) <-c("dose_450_µg")
colnames(dose_150_ug) <-c("dose_150_µg")
colnames(dose_100_ug) <-c("dose_100_µg")
colnames(dose_50_ug) <-c("dose_50_µg")
Newdata <-data.frame(Time_post_inj, dose_450_ug, dose_150_ug, dose_100_ug, dose_50_ug)
Newdata$Time_Post_Injection <-as.numeric(Newdata$Time_Post_Injection)
Newdata$dose_450_µg <-as.numeric(Newdata$dose_450_µg)
Newdata$dose_150_µg <-as.numeric(Newdata$dose_150_µg)
Newdata$dose_100_µg <-as.numeric(Newdata$dose_100_µg)
Newdata$dose_50_µg <-as.numeric(Newdata$dose_50_µg)
str(Newdata)
ggplot(data=Newdata, aes(x=Time_Post_Injection, y=hCD4_occupancy, group = 1)) + geom_line(aes(y=dose_450_µg)) + geom_line(aes(y=dose_150_µg)) + geom_line(aes(y=dose_100_µg)) + geom_line(aes(y=dose_50_µg))
Newdata
tidyr::pivot_longer(Time_Post_Injection, names_to = "DOSE", values_to = "VALUE") %>%
ggplot2::ggplot(aes(Time_Post_Injection, VALUE, group = DOSE, color = DOSE)) + ggplot2::geom_line()

The following is a full reprex, meaning that if you copy and paste, it will reproduce the plot exactly as below. You can see I have simplified your parsing considerably too; this starts with the url and produces the plot with a lot less data wrangling:
library(ggplot2) # Only load packages you really need
# This format is a handy way of keeping a long string on a single page
url <- paste0("https://static-content.springer.com/esm/art%3A10.",
"1038%2Fs41586-020-2850-3/MediaObjects/41586_2020",
"_2850_MOESM10_ESM.xlsx")
temp <- tempfile()
download.file(url, temp, mode = 'wb')
# Instead of loading an entire library to use one function, we can
# access read_excel by doing readxl::read_excel
myData <- readxl::read_excel(temp, sheet = "ExFig.5f")
# This single line subsets the data frame to chop out the first row
# and the empty columns. It also converts all columns to numeric
NewData <- as.data.frame(lapply(myData[-1, -c(3, 5, 7)], as.numeric))
names(NewData) <-c("Time_Post_Injection", "dose_450_ug",
"dose_150_ug", "dose_100_ug", "dose_50_ug")
# This switches your data to long format, which helps ggplot to work
# We put all the values in one column and have the dosages as labels
# in another column instead of having multiple columns. This allows us
# to map Color to the dosages.
NewData <- cbind(NewData[1], stack(NewData[-1]))
# Now we just tell ggplot to map colours to ind
ggplot(NewData, aes(x = Time_Post_Injection, y = values, color = ind)) +
geom_line() +
geom_point() +
scale_color_discrete(name = "Dose") +
labs(x = "Time Pist Injection") +
theme_bw()
Created on 2020-11-11 by the reprex package (v0.3.0)

Hi the main problem is that you did not get your data into a easy to handle format
library(dplyr)
library(tidyr)
library(ggplot2)
Newdata %>%
# get data in easy to handle format
tidyr::pivot_longer(-Time_Post_Injection, names_to = "DOSE", values_to = "VALUE") %>%
# plot and use the new DOSE column as group and color so you do not need one geom per line! (you can change geom_line() to geom_point also())
ggplot2::ggplot(aes(Time_Post_Injection, VALUE, group = DOSE, color = DOSE)) +
ggplot2::geom_line()

Related

ggplot2 version of shepard plot, i.e. vegan::stressplot()?

There seems to be quite a bit of information for plotting NMDS outputs (i.e. NMDS1 vs NMDS1) using ggplot2 however I cannot find a way to plot the vegan::stressplot() (shepard's plot) using ggplot2.
Is there a way to produce a ggplot2 version of a metaMDS output?
Reproducible code
library(vegan)
set.seed(2)
community_matrix = matrix(
sample(1:100,300,replace=T),nrow=10,
dimnames=list(paste("community",1:10,sep=""),paste("sp",1:30,sep="")))
example_NMDS=metaMDS(community_matrix, k=2)
stressplot(example_NMDS)
Created on 2021-09-17 by the reprex package (v2.0.1)
Here's a workaround to plot a very similar plot using ggplot2.The trick was to get the structure of the stressplot(example_NMDS) and extract the data stored in that object. I used the tidyverse package that includes ggplot and other packages such as tidyr that contains the pivot_longer function.
library(vegan)
library(tidyverse)
# Analyze the structure of the stressplot
# Notice there's an x, y and yf list
str(stressplot(example_NMDS))
# Create a tibble that contains the data from stressplot
df <- tibble(x = stressplot(example_NMDS)$x,
y = stressplot(example_NMDS)$y,
yf = stressplot(example_NMDS)$yf) %>%
# Change data to long format
pivot_longer(cols = c(y, yf),
names_to = "var")
# Create plot
df %>%
ggplot(aes(x = x,
y = value)) +
# Add points just for y values
geom_point(data = df %>%
filter(var == "y")) +
# Add line just for yf values
geom_step(data = df %>%
filter(var == "yf"),
col = "red",
direction = "vh") +
# Change axis labels
labs(x = "Observed Dissimilarity", y = "Ordination Distance") +
# Add bw theme
theme_bw()

How to plot an Observed and Simulated timeseries as lines and points in ggplot?

I have timeseries of 4 simulated variables, with its 4 observed variables (observed variables have less data than simulated variables) as attached in the following link:
https://www.dropbox.com/s/sumgi6pqmjx70dl/nutrients2.csv?dl=0
I used the following code, The data is stored in "data 2" object
data2 <- read.table("C:/Users/Downloads/nutrients2.csv", header=T, sep=",")
library(lubridate)
data2$Date <- dmy(data2$Date)
library(reshape2)
data2 <- melt(data2, id=c("Date","Type"))
seg2 <- ggplot(data = data2, aes(x = Date, y = value, group = Type, colour = Type)) +
geom_line() +
facet_wrap(~ variable, scales = "free")
seg2
This give the plot (all variables in line)
Plot obtained
I need the observed data in points instead of interrupted lines, like this example
Plot desired
How to get a plot like this in ggplot, (simulated variables in line and observed variables in points or dots)?
One possible solution is to subset your dataset for geom_line and geom_point in order to use only sim and obs data respectively.
Then, if you pass shape = Type in your aes, you can remove dots for sim data in your legend by using scale_shape_manual:
(NB: I used melt function from data.table package because I found it more efficient for big dataset than the melt function reshape2)
library(lubridate)
df$Date <- dmy(df$Date)
library(data.table)
dt.m <- melt(setDT(df),measure = list(c("Nitrate","Ammonium","DIP","Chla")), value.name = "Values", variable.name = "Element")
library(ggplot2)
ggplot(dt.m, aes(x = Date, y = Values, group = Type, color = Type, shape = Type))+
geom_line(data = subset(dt.m, Type == "sim"))+
geom_point(data = subset(dt.m, Type == "obs"))+
scale_shape_manual(values = c(16,NA))+
facet_wrap(~Element, scales = "free")

Plot multiple distributions by year using ggplot Boxplot

I'm trying to evaluate the above data in a boxplot similar to this: https://www.r-graph-gallery.com/89-box-and-scatter-plot-with-ggplot2.html
I want the x axis to reflect my "Year" variable and each boxplot to evaluate the 8 methods as a distribution. Eventually I'd like to pinpoint the "Selected" variable in relation to that distribution but currently I just want this thing to render!
I figure out how to code my y variable and I get various errors no matter what I try. I think the PY needs to be as.factor but I've tried some code that way and I just get other errors.
anyway here is my code (Send Help):
# Libraries
library(tidyverse)
library(hrbrthemes)
library(viridis)
library(ggplot2)
library(readxl) # For reading in Excel files
library(lubridate) # For handling dates
library(dplyr) # for mutate and pipe functions
# Path to current and prior data folders
DataPath_Current <- "C:/R Projects/Box Plot Test"
Ult_sum <- read_excel(path = paste0(DataPath_Current, "/estimate.XLSX"),
sheet = "Sheet1",
range = "A2:J12",
guess_max = 100)
# just want to see what my table looks like
Ult_sum
# create a dataset - the below is code I commented out
# data <- data.frame(
# name=c(Ult_sum[,1]),
# value=c(Ult_sum[1:11,2:8])
#)
value <- Ult_sum[2,]
# Plot
Ult_sum %>%
ggplot( aes(x= Year, y= value, fill=Year)) +
geom_boxplot() +
scale_fill_viridis(discrete = TRUE, alpha=0.6) +
geom_jitter(color="black", size=0.4, alpha=0.9) +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("A boxplot with jitter") +
xlab("")
I do not see how your code matches the screenshot of your dataset. However, just a general hint: ggplot likes data in long format. I suggest you reshape your data using tidyr::reshape_long oder data.table::melt. This way you get 3 columns: year, method, value, of which the first two should be a factor. The resulting dataset can then be neatly used in aes() as aes(x=year, y=value, fill=method).
Edit: Added an example. Does this do what you want?
library(data.table)
library(magrittr)
library(ggplot2)
DT <- data.table(year = factor(rep(2010:2014, 10)),
method1 = rnorm(50),
method2 = rnorm(50),
method3 = rnorm(50))
DT_long <- DT %>% melt(id.vars = "year")
ggplot(DT_long, aes(x = year, y = value, fill = variable)) +
geom_boxplot()

R: Unexplainable behavior of ggplot inside a function

I have composed a function that develops histograms using ggplot2 on the numerical columns of a dataframe that will be passed to it. The function stores these plots into a list and then returns the list.
However when I run the function I get the same plot again and again.
My code is the following and I provide also a reproducible example.
hist_of_columns = function(data, class, variables_to_exclude = c()){
library(ggplot2)
library(ggthemes)
data = as.data.frame(data)
variables_numeric = names(data)[unlist(lapply(data, function(x){is.numeric(x) | is.integer(x)}))]
variables_not_to_plot = c(class, variables_to_exclude)
variables_to_plot = setdiff(variables_numeric, variables_not_to_plot)
indices = match(variables_to_plot, names(data))
index_of_class = match(class, names(data))
plots = list()
for (i in (1 : length(variables_to_plot))){
p = ggplot(data, aes(x= data[, indices[i]], color= data[, index_of_class], fill=data[, index_of_class])) +
geom_histogram(aes(y=..density..), alpha=0.3,
position="identity", bins = 100)+ theme_economist() +
geom_density(alpha=.2) + xlab(names(data)[indices[i]]) + labs(fill = class) + guides(color = FALSE)
name = names(data)[indices[i]]
plots[[name]] = p
}
plots
}
data(mtcars)
mtcars$am = factor(mtcars$am)
data = mtcars
variables_to_exclude = 'mpg'
class = 'am'
plots = hist_of_columns(data, class, variables_to_exclude)
If you check the list plots you will discover that it contains the same plot repeated.
Simply use aes_string to pass string variables into the ggplot() call. Right now, your plot uses different data sources, not aligned with ggplot's data argument. Below x, color, and fill are separate, unrelated vectors though they derive from same source but ggplot does not know that:
ggplot(data, aes(x= data[, indices[i]], color= data[, index_of_class], fill=data[, index_of_class]))
However, with aes_string, passing string names to x, color, and fill will point to data:
ggplot(data, aes_string(x= names(data)[indices[i]], color= class, fill= class))
Here is strategy using tidyeval that does what you are after:
library(rlang)
library(tidyverse)
hist_of_cols <- function(data, class, drop_vars) {
# tidyeval overhead
class_enq <- enquo(class)
drop_enqs <- enquo(drop_vars)
data %>%
group_by(!!class_enq) %>% # keep the 'class' column always
select(-!!drop_enqs) %>% # drop any 'drop_vars'
select_if(is.numeric) %>% # keep only numeric columns
gather("key", "value", -!!class_enq) %>% # go to long form
split(.$key) %>% # make a list of data frames
map(~ ggplot(., aes(value, fill = !!class_enq)) + # plot as usual
geom_histogram() +
geom_density(alpha = .5) +
labs(x = unique(.$key)))
}
hist_of_cols(mtcars, am, mpg)
hist_of_cols(mtcars, am, c(mpg, wt))

Multiple lineplot with Errobar for data coming from two tables

I have two data sheets, one with the points I want to plot (each point in the first data set is an average of different measurements), and the second data containing the standard deviations for each point.
Below I attached an R script to create lineplot from the first data which works fine. With the code i can create a plot like the following
Now I want to use the second table (standard deviations) to create a plot similar the previous, but now also showing a errorbar, i.e., that graphically displays the standard deviation of each measurements like this.
library(ggplot2)
##loads a dataframe and returns a ggplot object that can be externally modified and plotted
makeMultipleLinePlot <- function(data){
require(reshape2)
data$id <-rownames(data)
melted <- melt(data)
colnames(melted)<-c("Measurement","Month","Percentage")
g<-ggplot(data=melted,
aes(x=Month, y=Percentage, color=Measurement,group=Measurement)) +
geom_line(size=1,alpha=0.8) + geom_point(size=4,aes(shape=Measurement))
return(g)
}
##load a table from google sheets. assumes the sheet has a single table
loadTableFromGoogleSheet <- function(url, sheet) {
require(gsheet)
a <- gsheet2text(url,sheetid=sheet, format='csv')
data <- read.csv(text=a, stringsAsFactors=FALSE,header = TRUE,row.names = 1)
return(data)
}
#URL of the google spreadsheet
url <- "docs.google.com/spreadsheets/d/10clnt9isJp_8Sr7A8ejhKEZXCQ279wGP4sdygsit1LQ"
gid.humidity <- 2080295295 #gid of the google sheet containing humidity data
data.humidity<-loadTableFromGoogleSheet(url,gid.humidity)
gid.humidity_sd <- 1568896731 #gid of the google sheet containing standard deviations for each measurement in the humidity data
data.humidity_sd<-loadTableFromGoogleSheet(url,gid.humidity_sd)
ggsave(filename="lineplot/humidity.pdf", plot=makeMultipleLinePlot(data.humidity))
#ggsave(filename="lineplot/humidity.pdf", plot=makeMultipleErrorPlot(data.humidity,data.humidity_sd))
This tidy the two data.frame, join them and plot the result, using geom_errorbar:
library(dplyr)
library(tidyr)
library(ggplot2)
df <- data.humidity %>%
mutate(measure = row.names(.)) %>%
gather(month, value, -measure)
df_sd <- data.humidity_sd %>%
mutate(measure = substr(row.names(.), 1, 2)) %>%
gather(month, sd, -measure)
dfF <- full_join(df, df_sd)
#> Joining, by = c("measure", "month")
ggplot(dfF, aes(month, value, group = measure, color = measure))+
geom_line(size=1,alpha=0.8) +
geom_point(aes(shape = measure)) +
geom_errorbar(aes(ymin = value - sd, ymax = value + sd), width = .3)

Resources