How to plot layers of tupples on same plot in R? - r

I am trying to plot the time and NDVI for each region on the same plot. I think to do this I have to convert the date column from characters to time and then plot each layer. However I cannot figure out how to do this. Any thoughts?
list(structure(list(observation = 1L, HRpcode = NA_character_,
timeseries = NA_character_), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(observation = 1:6, time = c("2014-01-01",
"2014-02-01", "2014-03-01", "2014-04-01", "2014-05-01", "2014-06-01"
), ` NDVI` = c("0.3793765496776215", "0.21686891782421552", "0.3785652933528299",
"0.41027240624704164", "0.4035578030242673", "0.341299793064468"
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(observation = 1:6, time = c("2014-01-01",
"2014-02-01", "2014-03-01", "2014-04-01", "2014-05-01", "2014-06-01"
), ` NDVI` = c("0.4071076986818826", "0.09090719657570319", "0.35214166081795284",
"0.4444311032927228", "0.5220702877666005", "0.5732370503295022"
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(observation = 1:6, time = c("2014-01-01",
"2014-02-01", "2014-03-01", "2014-04-01", "2014-05-01", "2014-06-01"
), ` NDVI` = c("0.3412131556625801", "0.18815996897460135", "0.5218904976415136",
"0.6970128777711452", "0.7229657162729096", "0.535967435470161"
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
)))
111

First we need to clean your data. The first element in this list is empty
df = df[-1]
Now we need to make a data.frame
df = do.call(rbind, df)
I am going to add a region variable, change the name of NDVI to remove the space,
change ndvi into a numeric vector, and change time into a Date object
library(dplyr)
df = df %>%
mutate(region = factor(rep(1:3, rep(6, 3)))) %>%
rename(ndvi = ' NDVI') %>%
mutate(ndvi = as.numeric(ndvi)) %>%
mutate(time = as.Date(time))
Now we can use ggplot2 to plot the data by region
library(ggplot2)
g = df %>%
ggplot(aes(x = time, y = ndvi, col = region)) +
geom_line()
g
Which gives this plot:

Here's an approach with lubridate to handle dates and dplyr to make the binding of the data.frames easier to understand.
Note that the group names are taken from the names of the list, and since those don't exist in the data you provided, we have to set them in advance.
library(lubridate)
library(ggplot2)
library(dplyr)
names(data) <- 1:3
data <- bind_rows(data, .id = "group")
data$time <- ymd(data$time)
setnames(data," NDVI","NDVI")
data$NDVI <- as.numeric(data$NDVI)
ggplot(data, aes(x=time,y=NDVI,color=Group)) + geom_line()

Related

How to fill dataframe in R with months and NA values

This is my dataframe:
df <- structure(list(month_date = structure(c(19117, 19149, 19180,
19212, 19244, 19275), class = "Date"), Values = c(9693, 10227,
10742, 11672, 10565, 10080)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
I need to increse the column month_date until "2023-12-01" with "NA" values.
The output should be a datframe with months until "2023-12-01" and on the Values column filled by "NA" values starting on "2022-11-01".
How can I do this?
library(tidyr)
complete(df, month_date = seq(min(month_date), as.Date("2023-12-01"),
by = '1 day'))
You can also create a separate dataframe/tibble if for some reason you do not want to use tidyr()
add <- data.frame(month_date = seq.Date(as.Date("2022-11-01"), as.Date("2023-12-01"), by = "month"), Values = NA)
final <- rbind(df, add)

Re-organizing nested list elements in R

I have a long list that has elements including charactor and numeric vectors. I want to re-organize them to the data frame format.
Let's say this is the structure of two first elements of the list_a:
list_a <- list(ENSG00000040608 = list(var = c("chr22_20230714_G_A_b38",
"chr22_20230737_G_A_b38", "chr22_20231229_T_A_b38", "chr22_20231474_G_A_b38",
"chr22_20231667_C_G_b38", "chr22_20231957_G_C_b38", "chr22_20231969_G_T_b38",
"chr22_20232125_G_A_b38", "chr22_20232392_G_A_b38", "chr22_20232643_A_C_b38"
), coeff = c(0.0000953181301665087, -0.00124036704551427, 0.000808061558738542,
0.000387528601423933, -0.000120624028990859, -0.00119982510044018,
-0.000120623899338185, -0.000435011907222715, 0.000715410285903684,
-0.000347899088267475)), ENSG00000040608 = list(var = c("chr22_20230714_G_A_b38",
"chr22_20230737_G_A_b38", "chr22_20231229_T_A_b38", "chr22_20231474_G_A_b38",
"chr22_20231667_C_G_b38", "chr22_20231957_G_C_b38", "chr22_20231969_G_T_b38",
"chr22_20232125_G_A_b38", "chr22_20232392_G_A_b38", "chr22_20232643_A_C_b38"
), coeff = c(0.0000953181301665087, -0.00124036704551427, 0.000808061558738542,
0.000387528601423933, -0.000120624028990859, -0.00119982510044018,
-0.000120623899338185, -0.000435011907222715, 0.000715410285903684,
-0.000347899088267475)))
The desire output as list_b:
list_b <- list(ENSG00000040608 = structure(list(chr22_20230714_G_A_b38 = -0.000347899088267475,
chr22_20230737_G_A_b38 = 0.0000953181301665087, chr22_20231229_T_A_b38 = -0.00124036704551427,
chr22_20231474_G_A_b38 = 0.000808061558738542, chr22_20231667_C_G_b38 = 0.000387528601423933,
chr22_20231957_G_C_b38 = -0.000120624028990859, chr22_20231969_G_T_b38 = -0.00119982510044018,
chr22_20232125_G_A_b38 = -0.000120623899338185, chr22_20232392_G_A_b38 = -0.000435011907222715,
chr22_20232643_A_C_b38 = -0.000347899088267475), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame")), ENSG00000040608 = structure(list(
chr22_20230714_G_A_b38 = -0.000347899088267475, chr22_20230737_G_A_b38 = 0.0000953181301665087,
chr22_20231229_T_A_b38 = -0.00124036704551427, chr22_20231474_G_A_b38 = 0.000808061558738542,
chr22_20231667_C_G_b38 = 0.000387528601423933, chr22_20231957_G_C_b38 = -0.000120624028990859,
chr22_20231969_G_T_b38 = -0.00119982510044018, chr22_20232125_G_A_b38 = -0.000120623899338185,
chr22_20232392_G_A_b38 = -0.000435011907222715, chr22_20232643_A_C_b38 = -0.000347899088267475), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame")))
Also I need to have those numbers as integer.
I would appreciate your guidance.
base R
lapply(list_a, function(x) {
df <- as.data.frame(t(x[[2]]))
colnames(df) <- x[[1]]
df
})
tidyverse
library(tidyverse)
map(list_a, ~ .x %>%
as_tibble() %>%
pivot_wider(names_from = "var", values_from = "coeff"))

Bar chart with ID indicator

My data:
structure(list(Id = c(8378563200, 5577150313, 4702921684, 4388161847,
7086361926, 6962181067, 4445114986, 5553957443, 4319703577, 1503960366,
2026352035, 3977333714, 6117666160, 8792009665, 4020332650, 2347167796
), total_steps = c(8377719, 6477458, 7174818, 7710336, 6972600,
9412809, 4163404, 8276690, 5858684, 9390475, 4832044, 9227036,
3551544, 806370, 562272, 2570310), total_sleeps = c(427769, 336960,
349432, 285324, 337125, 430528, 334335, 445408, 384183, 279217,
439363, 246660, 241304, 189515, 86645, 120636), total_calories = c(3302554,
2620514, 2482164, 2205930, 1909368, 1904733, 1897616, 1802526,
1642368, 1407725, 1337280, 1271480, 1139616, 853605, 591680,
551730)), row.names = c(NA, -16L), class = c("tbl_df", "tbl",
"data.frame"))
I am looking for a way to build a bar chart in R Languages with the Id indicator on top of each bar chart showing the calories burned, total steps and sleeps. Or I can try to facet them with 24 respondents indicating the calories burned, total steps and sleep. Please tell me which way is better or consult me on a better way to visualize it.
Maybe something like this:
Bring you data in lang format with pivot_longer
transform Id and name to factor type
use ggplot
library(tidyverse)
df %>%
pivot_longer(
-Id
) %>%
mutate(Id = factor(Id),
name=factor(name, levels = c("total_steps", "total_sleeps", "total_calories"))) %>%
ggplot(aes(x=Id, y=value, fill=name, label=value))+
geom_col(position=position_stack()) +
geom_text(aes(label = value,family = "serif"), position = position_stack(vjust = 0.5))+
theme_bw()
data:
df <- structure(list(Id = c(8378563200, 5577150313, 4702921684, 4388161847,
7086361926, 6962181067, 4445114986, 5553957443, 4319703577, 1503960366,
2026352035, 3977333714, 6117666160, 8792009665, 4020332650, 2347167796
), total_steps = c(8377719, 6477458, 7174818, 7710336, 6972600,
9412809, 4163404, 8276690, 5858684, 9390475, 4832044, 9227036,
3551544, 806370, 562272, 2570310), total_sleeps = c(427769, 336960,
349432, 285324, 337125, 430528, 334335, 445408, 384183, 279217,
439363, 246660, 241304, 189515, 86645, 120636), total_calories = c(3302554,
2620514, 2482164, 2205930, 1909368, 1904733, 1897616, 1802526,
1642368, 1407725, 1337280, 1271480, 1139616, 853605, 591680,
551730)), row.names = c(NA, -16L), class = c("tbl_df", "tbl",
"data.frame"))

Merging data frames to plot multiple time series using plyr

I want to plot multiple time series on one plot. Currently I can plot them all individually but not together. How can I join the data because the years are split by decimals.
What I basically want to end up with is this Plotting multiple time-series in ggplot (See plot in the first answer)
library(tidyverse)
library(plyr)
theme_set(theme_bw(10))
Sydney1<-read.csv("Sydney1.csv",header=TRUE)
Sydney2<-read.csv("Sydney2.csv",header=TRUE)
Eden<-read.csv("Eden.csv",header=TRUE)
StonyBay<-read.csv("Stonybay.csv",header=TRUE)
LowHead<-read.csv("Lowhead.csv",header=TRUE)
Hobart<-read.csv("Hobart.csv",header=TRUE)
Devonport<-read.csv("Devonport.csv",header=TRUE)
Freemantle<-read.csv("Freemantle.csv",header=TRUE)
ggplot(Sydney1,aes(x=Year,y=SLR))+geom_line(aes(color="Sydney1"))
ggplot(Sydney2,aes(x=Year,y=SLR))+geom_line(aes(color="Sydney2"))
ggplot(Eden,aes(x=Year,y=SLR))+geom_line(aes(color="Eden"))
ggplot(StonyBay,aes(x=Year,y=SLR))+geom_line(aes(color="StonyBay"))
ggplot(LowHead,aes(x=Year,y=SLR))+geom_line(aes(color="Lowhead"))
ggplot(Hobart,aes(x=Year,y=SLR))+geom_line(aes(color="Hobart"))
ggplot(Devonport,aes(x=Year,y=SLR))+geom_line(aes(color="Devonport"))
ggplot(Freemantle,aes(x=Year,y=SLR))+geom_line(aes(color="Freemantle"))
#Sydney 1
structure(list(Year = c(1886.0417, 1886.125, 1886.2083, 1886.2917,
1886.375, 1886.4583), SLR = c(6819L, 6942L, 6980L, 6958L, 7015L,
6892L)), row.names = c(NA, 6L), class = "data.frame")
#Sydney 2
structure(list(Year = c(1914.4583, 1914.5417, 1914.625, 1914.7083,
1914.7917, 1914.875), SLR = c(7022L, 6963L, 6915L, 6924L, 6866L,
6956L)), row.names = c(NA, 6L), class = "data.frame")
#Eden
structure(list(Year = c(1986.7917, 1986.875, 1986.9583, 1987.0417,
1987.125, 1987.2083), SLR = c(7003L, 6942L, 6969L, 7067L, NA,
7015L)), row.names = c(NA, 6L), class = "data.frame")
#Stony Bay
structure(list(Year = c(1993.0417, 1993.125, 1993.2083, 1993.2917,
1993.375, 1993.4583), SLR = c(6826L, 6868L, 6796L, 6862L, 6893L,
6951L)), row.names = c(NA, 6L), class = "data.frame")
#Low head
structure(list(Year = c(2010.125, 2010.2083, 2010.2917, 2010.375,
2010.4583, 2010.5417), SLR = c(6971L, 6968L, 7030L, 7088L, 7063L,
7035L)), row.names = c(NA, 6L), class = "data.frame")
#Hobart
structure(list(Year = c(1987.875, 1987.9583, 1988.0417, 1988.125,
1988.2083, 1988.2917), SLR = c(6916L, 6870L, 6930L, 6870L, 6820L,
6817L)), row.names = c(NA, 6L), class = "data.frame")
#Devonport
structure(list(Year = c(1989.875, 1989.9583, 1990.0417, 1990.125,
1990.2083, 1990.2917), SLR = c(6976L, 7025L, 7030L, 7046L, 6999L,
7055L)), row.names = c(NA, 6L), class = "data.frame")
#Freemantle
structure(list(Year = c(1897.0417, 1897.125, 1897.2083, 1897.2917,
1897.375, 1897.4583), SLR = c(6542L, 6524L, 6557L, 6655L, 6648L,
6729L)), row.names = c(NA, 6L), class = "data.frame")
Using the data in the Note at the end first accumulate the series in a list L -- we assume any data frame having column names Year and SLR is to be added -- and then convert that to a single zoo object and plot it using autoplot.zoo which uses ggplot2. Remove the facet = NULL argument if you want them plotted in separate facets.
library(ggplot2)
library(zoo)
is_city_df <- function(x) is.data.frame(x) && identical(names(x), c("Year", "SLR"))
L <- Filter(is_city_df, mget(ls()))
z <- do.call("merge", lapply(L, read.zoo))
autoplot(z, facet = NULL)
Note
We assume the following inputs:
Sydney1 <-
structure(list(Year = c(1886.0417, 1886.125, 1886.2083, 1886.2917,
1886.375, 1886.4583), SLR = c(6819L, 6942L, 6980L, 6958L, 7015L,
6892L)), row.names = c(NA, 6L), class = "data.frame")
Sydney2 <-
structure(list(Year = c(1914.4583, 1914.5417, 1914.625, 1914.7083,
1914.7917, 1914.875), SLR = c(7022L, 6963L, 6915L, 6924L, 6866L,
6956L)), row.names = c(NA, 6L), class = "data.frame")
Eden <-
structure(list(Year = c(1986.7917, 1986.875, 1986.9583, 1987.0417,
1987.125, 1987.2083), SLR = c(7003L, 6942L, 6969L, 7067L, NA,
7015L)), row.names = c(NA, 6L), class = "data.frame")
Freemantle <-
structure(list(Year = c(1897.0417, 1897.125, 1897.2083, 1897.2917,
1897.375, 1897.4583), SLR = c(6542L, 6524L, 6557L, 6655L, 6648L,
6729L)), row.names = c(NA, 6L), class = "data.frame")
Assuming that your global environment only contains the dataframes you want to plot:
bind_rows(mget(ls()), .id = "City") %>%
ggplot(aes(x = Year, y = SLR, color = City)) +
geom_line()
Instead of creating multiple objects in the global env, it can be read all at once in a loop
library(dplyr)
library(purrr)
library(ggplot2)
library(readr)
files <- c("Sydney1.csv", "Sydney2.csv", "Eden.csv", "Stonybay.csv",
"Lowhead.csv", "Hobart.csv", "Devonport.csv", "Freemantle.csv")
map_dfr(files, ~ read_csv(.x) %>%
mutate(City = .x)) %>%
ggplot(aes(x = Year, y = SLR, color = City)) +
geom_line())
Try this:
Sydney1 %>% mutate(city = 'sydney1') %>%
bind_rows(Sydney2 %>% mutate(city = 'sydney2')) %>%
bind_rows(Eden %>% mutate(city = 'eden')) %>%
bind_rows(Freemantle %>% mutate(city = 'freemantle')) %>%
ggplot(aes(x=Year, y=SLR, color=city)) + geom_line() + facet_wrap(~city, scale='free')

values become NA after use left_join() function in r

The value in another data frame becomes NA after I used left_join() function. And I check the answer at here[dplyr::left_join produce NA values for new joined columns.
I also specify the by arguement but failed.
I don't know why.
qx_p2 <- structure(list(province = c("安徽", "安徽", "安徽", "安徽", "安徽"
), date = c("2020-01-21", "2020-01-22", "2020-01-23", "2020-01-24",
"2020-01-25"), PRS = c(1013.9035387141, 1011.48779584751, 1014.28302402211,
1019.16970261716, 1018.92203467498), PRS_Sea = c(1024.73084750567,
1022.22210612717, 1025.02632842026, 1029.97905104403, 1029.77650132275
), PRS_Max = c(1014.26828869048, 1011.80445613662, 1014.51628117914,
1019.43671957672, 1019.31935504063), PRS_Min = c(1013.7138513322,
1011.13447054516, 1013.86811271731, 1018.75406934996, 1018.62469257842
), WIN_S_Max = c(2.30187606292517, 2.08586132369615, 2.76893908257748,
4.22074853552532, 3.63427225056689), WIN_S_Inst_Max = c(3.44360343442933,
3.09963836923658, 4.28499952758881, 6.68930898053666, 5.80619165721844
), WIN_D_INST_Max = c(116.878029336735, 218.745851048753, 120.88310303288,
72.1640447845805, 72.0331526360544), WIN_D_Avg_2mi = c(116.23329724764,
210.524530689871, 113.104009452075, 68.7694017991261, 70.322008604388
), WIN_S_Avg_2mi = c(1.77558118386243, 1.49959490740741, 2.20936874055178,
3.47942613851096, 2.99431642101285), WIN_D_S_Max = c(116.68018866665,
218.180671371681, 120.40502999811, 71.0831467309146, 68.3670670351474
), TEM = c(3.81968088624339, 5.16464226662887, 6.82721856103553,
5.98099596088435, 4.8940626181028), TEM_Max = c(4.04776301492819,
5.35075514928193, 6.97597470238095, 6.15192401266062, 5.07960293839758
), TEM_Min = c(3.49020455404384, 4.95346053004535, 6.65049142573696,
5.85618067365835, 4.76455794123205), RHU = c(85.9359859221466,
96.1710766250945, 91.749678760393, 88.3347741874528, 80.693040202192
), VAP = c(6.98015376984127, 8.55406509826153, 9.08114866780046,
8.27843124055178, 6.98599714191232), RHU_Min = c(83.965092356387,
95.6411387471655, 90.9997401738473, 87.3134436413454, 79.2219635770975
), PRE_1h = c(0.102133763227513, 0.422205333522298, 1488.33246492347,
0.0715384070294785, 372.116791028911)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -5L))
covid_p2 <- structure(list(province = c("安徽", "安徽", "安徽", "安徽", "安徽"
), date = c("2020/1/21", "2020/1/22", "2020/1/23", "2020/1/24",
"2020/1/25"), 新增确诊 = c(0L, 1L, 14L, 24L, 21L)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -5L))
dat2 <- covid_p2 %>% left_join(qx_p2, by = c('province' = 'province', 'date' = 'date'))
dat2
Your date columns are character columns and do not have the same format:
qx_p2$date
# "2020-01-21" "2020-01-22" "2020-01-23" "2020-01-24" "2020-01-25"
covid_p2$date
# "2020/1/21" "2020/1/22" "2020/1/23" "2020/1/24" "2020/1/25"
You can get them in the same format by, for example, applying as.Date(...):
covid_p2$date <- as.Date(covid_p2$date)
qx_p2$date <- as.Date(qx_p2$date)
After that, your join works.

Resources