how to put a plot and a table together using grid.arrange - r

I have a plot and a table, and I would like to combine them into a plot. how should I do that.
Here is my codes:
df<-structure(list(AEDECOD = c("Hypoxia", "Malignant pleural effusion",
"Decubitus ulcer", "Nausea"), ADY = c(13, 13, 13, 14)), row.names = c(NA,
-4L), class = "data.frame")
tbl <-structure(list(`Analysis Relative Day` = 13, `AE Type` = "SER",
`Adverse Event` = "Hypoxia/Malignant pleural effusion"), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame"))
p1<- ggplot(data =df, aes(x = ADY, y = AEDECOD))+ geom_point()
p2 <-grid.arrange(p1, tbl,
nrow = 2,as.table = TRUE)
print(p2)
I got the error codes:
Error: Input must be a vector, not a <viewport> object.
If you know any other way to do the same thing, I would like to learn that as well.

We may use ggarrange after converting the tibble to ggtexttable
library(ggpubr)
ggarrange(p1, ggtexttable(tbl), nrow = 2)
Or using the OP's code
library(gridExtra)
grid.arrange(p1, ggtexttable(tbl),
nrow = 2,as.table = TRUE)
-output

Related

Bar chart with ID indicator

My data:
structure(list(Id = c(8378563200, 5577150313, 4702921684, 4388161847,
7086361926, 6962181067, 4445114986, 5553957443, 4319703577, 1503960366,
2026352035, 3977333714, 6117666160, 8792009665, 4020332650, 2347167796
), total_steps = c(8377719, 6477458, 7174818, 7710336, 6972600,
9412809, 4163404, 8276690, 5858684, 9390475, 4832044, 9227036,
3551544, 806370, 562272, 2570310), total_sleeps = c(427769, 336960,
349432, 285324, 337125, 430528, 334335, 445408, 384183, 279217,
439363, 246660, 241304, 189515, 86645, 120636), total_calories = c(3302554,
2620514, 2482164, 2205930, 1909368, 1904733, 1897616, 1802526,
1642368, 1407725, 1337280, 1271480, 1139616, 853605, 591680,
551730)), row.names = c(NA, -16L), class = c("tbl_df", "tbl",
"data.frame"))
I am looking for a way to build a bar chart in R Languages with the Id indicator on top of each bar chart showing the calories burned, total steps and sleeps. Or I can try to facet them with 24 respondents indicating the calories burned, total steps and sleep. Please tell me which way is better or consult me on a better way to visualize it.
Maybe something like this:
Bring you data in lang format with pivot_longer
transform Id and name to factor type
use ggplot
library(tidyverse)
df %>%
pivot_longer(
-Id
) %>%
mutate(Id = factor(Id),
name=factor(name, levels = c("total_steps", "total_sleeps", "total_calories"))) %>%
ggplot(aes(x=Id, y=value, fill=name, label=value))+
geom_col(position=position_stack()) +
geom_text(aes(label = value,family = "serif"), position = position_stack(vjust = 0.5))+
theme_bw()
data:
df <- structure(list(Id = c(8378563200, 5577150313, 4702921684, 4388161847,
7086361926, 6962181067, 4445114986, 5553957443, 4319703577, 1503960366,
2026352035, 3977333714, 6117666160, 8792009665, 4020332650, 2347167796
), total_steps = c(8377719, 6477458, 7174818, 7710336, 6972600,
9412809, 4163404, 8276690, 5858684, 9390475, 4832044, 9227036,
3551544, 806370, 562272, 2570310), total_sleeps = c(427769, 336960,
349432, 285324, 337125, 430528, 334335, 445408, 384183, 279217,
439363, 246660, 241304, 189515, 86645, 120636), total_calories = c(3302554,
2620514, 2482164, 2205930, 1909368, 1904733, 1897616, 1802526,
1642368, 1407725, 1337280, 1271480, 1139616, 853605, 591680,
551730)), row.names = c(NA, -16L), class = c("tbl_df", "tbl",
"data.frame"))

How to plot layers of tupples on same plot in R?

I am trying to plot the time and NDVI for each region on the same plot. I think to do this I have to convert the date column from characters to time and then plot each layer. However I cannot figure out how to do this. Any thoughts?
list(structure(list(observation = 1L, HRpcode = NA_character_,
timeseries = NA_character_), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(observation = 1:6, time = c("2014-01-01",
"2014-02-01", "2014-03-01", "2014-04-01", "2014-05-01", "2014-06-01"
), ` NDVI` = c("0.3793765496776215", "0.21686891782421552", "0.3785652933528299",
"0.41027240624704164", "0.4035578030242673", "0.341299793064468"
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(observation = 1:6, time = c("2014-01-01",
"2014-02-01", "2014-03-01", "2014-04-01", "2014-05-01", "2014-06-01"
), ` NDVI` = c("0.4071076986818826", "0.09090719657570319", "0.35214166081795284",
"0.4444311032927228", "0.5220702877666005", "0.5732370503295022"
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(observation = 1:6, time = c("2014-01-01",
"2014-02-01", "2014-03-01", "2014-04-01", "2014-05-01", "2014-06-01"
), ` NDVI` = c("0.3412131556625801", "0.18815996897460135", "0.5218904976415136",
"0.6970128777711452", "0.7229657162729096", "0.535967435470161"
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
)))
111
First we need to clean your data. The first element in this list is empty
df = df[-1]
Now we need to make a data.frame
df = do.call(rbind, df)
I am going to add a region variable, change the name of NDVI to remove the space,
change ndvi into a numeric vector, and change time into a Date object
library(dplyr)
df = df %>%
mutate(region = factor(rep(1:3, rep(6, 3)))) %>%
rename(ndvi = ' NDVI') %>%
mutate(ndvi = as.numeric(ndvi)) %>%
mutate(time = as.Date(time))
Now we can use ggplot2 to plot the data by region
library(ggplot2)
g = df %>%
ggplot(aes(x = time, y = ndvi, col = region)) +
geom_line()
g
Which gives this plot:
Here's an approach with lubridate to handle dates and dplyr to make the binding of the data.frames easier to understand.
Note that the group names are taken from the names of the list, and since those don't exist in the data you provided, we have to set them in advance.
library(lubridate)
library(ggplot2)
library(dplyr)
names(data) <- 1:3
data <- bind_rows(data, .id = "group")
data$time <- ymd(data$time)
setnames(data," NDVI","NDVI")
data$NDVI <- as.numeric(data$NDVI)
ggplot(data, aes(x=time,y=NDVI,color=Group)) + geom_line()

Need plotly colorbar at the bottom

Here's a dataframe, and the rendered plot using plotly,
I went through the attributes mentioned on plotly R reference guide but couldn't help myself.
require(plotly)
require(dplyr)
df <- structure(list(nn = c(5, 10, 20), Sparsity = c(93.7, 94.3, 94.9
), Method = c("adjcos", "adjcos", "adjcos"), Filtering = c("IBCF",
"IBCF", "IBCF"), error = c("MAE", "MAE", "MAE"), `5` = c(0.716433082,
0.746625993, 0.786221009), `10` = c(0.709316631, 0.737244112,
0.784197414), `20` = c(0.711888414, 0.73875951, 0.783307894),
`30` = c(0.716054462, 0.741430884, 0.78574634), `50` = c(0.722296008,
0.745516396, 0.788986052), `70` = c(0.727236608, 0.749367689,
0.792077439), `100` = c(0.73261171, 0.75337796, 0.795475065
), `150` = c(0.738988515, 0.75764295, 0.799090637), `200` = c(0.743606268,
0.760689524, 0.801416126)), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"))
plot_ly(x = ~df$Sparsity, y = ~df$nn, z = ~as.matrix(df[,-c(1:5)])) %>%
add_surface()
Plot I obtained
I need the colorbar/scale (referenced here colorbar attributes) at the bottom of the plot, directly rendered from the plot_ly().
as shown below :

Subset Data.Frame With Multiple Conditions

End Goal:
Create a plot for each region of StressCumulative, BaseCumulative, StressQoQ, and BaseQoQ over the date range from rows 1:167.
Problem:
I'm having difficulty subsetting my data.frame. My issue is that the condition by which I'm subsetting is logical, and thus will only return the first element after the condition.
subset_region_1 <- subset.data.frame(HPF, HPF$region == 1, select = BaseCumulative, HPF$StressCumulative, StressQoQ, BaseQoQ)
Warning messages:
1: In if (drop) warningc("drop ignored") :
the condition has length > 1 and only the first element will be used
2: drop ignored
This returns only the first column, BaseCumulative.
Data:
Here you get a glimpse of what I'm working with. This is the table I am looking to subset from. My data.frame is in a tall format
I would like to create a subset in order to graph BaseCumulative, StressCumulative, BaseQoQ, and StressQoQ variables over the range of dates from rows 1:167. The date column uses the same dates for all 100 regions. My issue is that when I go to plot in ggplot, I get an error that my aes mappings are not of the same size. The full table has date = 18370 rows long, but the values repeat every 167 rows (for each unique region). Further, the BaseCumulative variable is also 18370 rows long but is unique for all regions, i.e. every 167 rows. I want to know how I can subset by region while obtaining the correct row size for the variables I am interested in measuring.
Data Pts:
#Rows 1-3 (Region 1 Sample):
dput(head(HPF[1:3, ]))
structure(list(region = c(1, 1, 1), path = c(1, 1, 1), date = c(20140215,
20140515, 20140815), index_value = c(1, 1.033852765, 1.041697122
), index = 0:2, counter = 1:3, BaseQoQ = c(NA, 0.033852765, 0.00758749917354029
), BaseCumulative = c(100, 103.3852765, 104.1697122), StressCumulative = c(110,
113.3852765, 114.1697122), StressQoQ = c(NA, 0.0307752409090909,
0.00691832065162346)), .Names = c("region", "path", "date", "index_value",
"index", "counter", "BaseQoQ", "BaseCumulative", "StressCumulative",
"StressQoQ"), row.names = c(NA, -3L), class = c("tbl_df", "tbl",
"data.frame"))
#Rows 168:200 (Region 2 Sample):
dput(head(HPF[168:200, ]))
structure(list(region = c(2, 2, 2, 2, 2, 2), path = c(1, 1, 1,
1, 1, 1), date = c(20140215, 20140515, 20140815, 20141115, 20150215,
20150515), index_value = c(1, 1.014162265, 1.01964828, 1.009372314,
1.007210703, 1.018695493), index = 0:5, counter = 1:6, BaseQoQ = c(NA,
0.014162265, 0.00540940556489744, -0.0100779515854232, -0.0021415398163972,
0.0114025694582001), BaseCumulative = c(100, 101.4162265, 101.964828,
100.9372314, 100.7210703, 101.8695493), StressCumulative = c(110,
111.4162265, 111.964828, 110.9372314, 110.7210703, 101.8695493
), StressQoQ = c(NA, 0.0128747863636363, 0.00492389230216839,
-0.00917785181610786, -0.00194849914020834, -0.0799443229370588
)), .Names = c("region", "path", "date", "index_value", "index",
"counter", "BaseQoQ", "BaseCumulative", "StressCumulative", "StressQoQ"
), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
Question:
How do I subset other columns in addition to specifying region == #? I have tried the following but then the issue is that values recycle for the dates and my charts are incorrect:
ggplot(HPF, aes(x = date, y= BaseCumulative, linetype = factor(region == 1))) +
geom_line() +
theme_light()
Further, I am also unsuccessful if I try to subset within the ggplot such as:
ggplot(HPF[HPF$region == 1, ], aes(x = HPF$date[1:167, ], y= HPF$BaseCumulative[1:167, ], linetype = factor(region == 1))) +
geom_line() +
theme_light()
Any help is appreciated.
I'm not entirely sure what you're trying to show in your plot; is this what you're after?
library(tidyverse);
df %>%
gather(what, value, 7:10) %>%
ggplot(aes(date, value, colour = what)) + geom_line() + theme_light()
Explanation: Convert your data from wide to long format, then pass what as a colour (or linetype) aesthetic to get different line plots for columns 7, 8, 9, 10 in one plot.
If you want separate plots for region, you could add + facet_wrap(~ as.factor(region)), e.g.
df %>%
gather(what, value, 7:10) %>%
ggplot(aes(date, value, colour = what)) + geom_line() + theme_light() + facet_wrap(~ as.factor(region))
Sample data
df1 <- structure(list(region = c(1, 1, 1), path = c(1, 1, 1), date = c(20140215,
20140515, 20140815), index_value = c(1, 1.033852765, 1.041697122
), index = 0:2, counter = 1:3, BaseQoQ = c(NA, 0.033852765, 0.00758749917354029
), BaseCumulative = c(100, 103.3852765, 104.1697122), StressCumulative = c(110,
113.3852765, 114.1697122), StressQoQ = c(NA, 0.0307752409090909,
0.00691832065162346)), .Names = c("region", "path", "date", "index_value",
"index", "counter", "BaseQoQ", "BaseCumulative", "StressCumulative",
"StressQoQ"), row.names = c(NA, -3L), class = c("tbl_df", "tbl",
"data.frame"));
df2 <- structure(list(region = c(2, 2, 2, 2, 2, 2), path = c(1, 1, 1,
1, 1, 1), date = c(20140215, 20140515, 20140815, 20141115, 20150215,
20150515), index_value = c(1, 1.014162265, 1.01964828, 1.009372314,
1.007210703, 1.018695493), index = 0:5, counter = 1:6, BaseQoQ = c(NA,
0.014162265, 0.00540940556489744, -0.0100779515854232, -0.0021415398163972,
0.0114025694582001), BaseCumulative = c(100, 101.4162265, 101.964828,
100.9372314, 100.7210703, 101.8695493), StressCumulative = c(110,
111.4162265, 111.964828, 110.9372314, 110.7210703, 101.8695493
), StressQoQ = c(NA, 0.0128747863636363, 0.00492389230216839,
-0.00917785181610786, -0.00194849914020834, -0.0799443229370588
)), .Names = c("region", "path", "date", "index_value", "index",
"counter", "BaseQoQ", "BaseCumulative", "StressCumulative", "StressQoQ"
), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
df <- rbind.data.frame(df1, df2);

Adding parameters to a ggplot produced plot in a function

Let's say I have a saved plot named my_plot, produced with ggplot. Also, let's say that the column in my_plot[[1]] data frame used for horizontal axis is named my_dates
Now, I want to add some vertical lines to the plot, which, of course, can be done by something like that:
my_plot +
geom_vline(aes(xintercept = my_dates[c(3, 8)]))
Since I perform this task quite on a regular basis, I want to write a function for that -- something like that:
ggplot.add_lines <- function(given_plot, given_points) {
finale <- given_plot +
geom_vline(aes(xintercept = given_plot[[1]]$my_dates[given_points]))
return(finale)
}
Which, as it's probably obvious to everyone, doesn't work:
> ggplot.add_lines(my_plot, c(3, 5))
Error in eval(expr, envir, enclos) : object 'given_plot' not found
So, my question would be what am I doing wrong, and how can it be fixed? Below is some data for a reproducible example:
> dput(my_plot)
structure(list(data = structure(list(my_dates = c(1, 2, 3, 4,
5, 6, 7, 8, 9, 10), my_points = c(-2.20176409422924, -1.12872396340683,
-0.259703895194354, 0.634233385649338, -0.678983982973015, -1.83157126614836,
1.33360095418957, -0.120455389285709, -0.969431974863616, -1.20451262626184
)), .Names = c("my_dates", "my_points"), row.names = c(NA, -10L
), class = "data.frame"), layers = list(<environment>), scales = <S4 object of class structure("Scales", package = "ggplot2")>,
mapping = structure(list(x = my_dates, y = my_points), .Names = c("x",
"y"), class = "uneval"), theme = list(), coordinates = structure(list(
limits = structure(list(x = NULL, y = NULL), .Names = c("x",
"y"))), .Names = "limits", class = c("cartesian", "coord"
)), facet = structure(list(shrink = TRUE), .Names = "shrink", class = c("null",
"facet")), plot_env = <environment>, labels = structure(list(
x = "my_dates", y = "my_points"), .Names = c("x", "y"
))), .Names = c("data", "layers", "scales", "mapping", "theme",
"coordinates", "facet", "plot_env", "labels"), class = c("gg",
"ggplot"))
According to this post, below is my solution to this problem. The environment issue in the **ply and ggplot is annoying.
ggplot.add_lines <- function(given_plot, given_points) {
finale <- eval(substitute( expr = {given_plot +
geom_vline(aes(xintercept = my_dates[given_points]))}, env = list(given_points = given_points)))
return(finale)
}
The following code runs well on my machine. (I cannot make your reproducible work on my machine...)
df <- data.frame(my_dates = 1:10, val = 1:10)
my_plot <- ggplot(df, aes(x = my_dates, y = val)) + geom_line()
my_plot <- ggplot.add_lines(my_plot, c(3, 5))
print(my_plot)
Update: The above solution fails when more than two points are used.
It seems that we can easily solve this problem by not including the aes (subsetting together with aescauses problems):
ggplot.add_lines <- function(given_plot, given_points) {
finale <- given_plot + geom_vline(xintercept = given_plot[[1]]$my_dates[given_points])
return(finale)
}
I would take the following approach: extract the data.frame of interest, and pass it to the new layer,
df <- data.frame(my_dates = 1:10, val = rnorm(10))
my_plot <- ggplot(df, aes(x = my_dates, y = val)) + geom_line()
add_lines <- function(p, given_points=c(3,5), ...){
d <- p[["data"]][given_points,]
p + geom_vline(data = d, aes_string(xintercept="my_dates"), ...)
}
add_lines(my_plot, c(3,5), lty=2)

Resources