How could I select special part of dataset? - r

i have a dataset: 
 
x = data.frame(store=c("store1", "store1", "store1","store2","store2", "store3", "store3", "store4", "store4", "store4"),  
                  
                 pos=c("room1", "room2", "room2", "room1", "room1", "room1", "room1", "room2", "room2", "room3"),  
                  
                 error=c("error1", "error2", "error2", "error5", "error6", "error2", "error3", "error1", "error3", "error2"),  
               time = c("10:00:14", "10:00:44", "10:20:31", "10:24:11", "10:55:14", "10:20:10", "10:44:12", "10:04:34", "12:34:55", "10:12:17") 
                  
 )   
 
I want to select rows which have error2 or error5 in error column and maximum time in time column for each store and pos. How could i do it?
So new dataset must be like this:
x_new = data.frame(store=c("store1","store2", "store3", "store4"),
pos=c("room2", "room1", "room1", "room3"),
error=c("error2", "error5", "error2", "error2"),
time = c("10:20:31", "10:24:11", "10:20:10", "10:12:17")
)

library(tidyverse)
library(chron)
x %>%
mutate(time = chron::as.times(time)) %>%
group_by(store, pos, error) %>%
filter(error %in% c("error2", "error5")) %>%
summarise(time = max(time, na.rm = T))

Related

I'm trying to clean my data and the following error keeps appearing:

M_aff <- API_info$affil_info names(API_info$affil_info) <- c("entry_number", "doi", "affiliation_name", "affiliation_city", "affiliation_country", "affiliation_id") filter(is.na(as.numeric(gsub("([0-9]+).*$", "\1", affiliation_name)))) %>% mutate(affiliation_name = str_replace_all(affiliation_name, "&", "&"), affiliation_name = stringi::stri_trans_general(affiliation_name, "ASCII"), affiliation_name = str_replace_all(affiliation_name, "\.|\(|\)|\-|\\", " "), affiliation_name = trimws(affiliation_name, which = "both")) %>% filter(str_detect(affiliation_name, "netflix")) %>% distinct(entry_number, affiliation_id, .keep_all = T) %>% left_join(cleaned_data %>% select(entry_number, citations, year), by = "entry_number")
ERROR:Error in is.factor(x) : object 'affiliation_name' not found
The M_aff is the set of observations with 6 variables.
The "netflix" file is the file with the raw data.
I also am not sure if i should be using filter, I want to clean my data (remove the false positives): the "netflix" file that is a list of documents published by netflix.
I extracted the Netflix file as following:
api_key <- '26d72e4e014f3c18226cd4bf29725557' set_api_key(api_key) netflix <- scopus_search(api_key ='26d72e4e014f3c18226cd4bf29725557', query = "affil(netflix)", count=1) df_netflix = gen_entries_to_df(netflix$entries) save(df_netflix, file = paste(getwd(), 'netflix/input_data/df_netflix.RData', sep='/'))
Then I applied the following code to clean:
cleaned_data <- df_netflix$df %>%
select("eid", "dc:title", "dc:creator", "prism:publicationName", "prism:issn", "prism:eIssn", "prism:coverDate", "prism:doi",
"citedby-count", "subtypeDescription", "entry_number") %>%
rename(title = "dc:title",
creator = "dc:creator",
journal = "prism:publicationName",
date = "prism:coverDate",
doi = "prism:doi",
citations = "citedby-count",
doc_type = "subtypeDescription",
issn_og = "prism:issn",
eissn_og = "prism:eIssn") %>%
mutate(year = str_sub(date,start=1, end=4),
issn = case_when(is.na(issn_og) & !is.na(eissn_og) ~ eissn_og,
!is.na(issn_og) & is.na(eissn_og) ~ issn_og,
!is.na(issn_og) & !is.na(eissn_og) ~ paste(issn_og, eissn_og, sep=', '),
TRUE ~ 'NA'
)) %>%
left_join(sjrdata::sjr_journals %>% select(year, issn, sjr, sjr_best_quartile, h_index), by=c('year','issn'))
openxlsx::write.xlsx(cleaned_data, file= paste(getwd(), 'netflix/output_data/netflix.xlsx', sep='/'))
save(cleaned_data, file = paste(getwd(), 'netflix/input_data/cleaned_data.RData', sep='/'))
api_key <- '26d72e4e014f3c18226cd4bf29725557'
set_api_key(api_key)
doi_not_na <-
cleaned_data %>%
as_tibble() %>%
select(entry_number, doi) %>%
filter(!is.na(doi))
API_info <- get_api_info(doi_not_na)
save(API_info, file = paste(getwd(), "netflix/input_data/API_info_netflix.RData", sep = "/"))
Then I was trying to continue to clean data as following:
M_aff <-
API_info$affil_info
names(API_info$affil_info) <- c("entry_number", "doi", "affiliation_name", "affiliation_city", "affiliation_country", "affiliation_id")
filter(is.na(as.numeric(gsub("([0-9]+).*$", "\\1", affiliation_name)))) %>%
mutate(affiliation_name = str_replace_all(affiliation_name, "&", "&"),
affiliation_name = stringi::stri_trans_general(affiliation_name, "ASCII"),
affiliation_name = str_replace_all(affiliation_name, "\\.|\\(|\\)|\\-|\\\\", " "),
affiliation_name = trimws(affiliation_name, which = "both")) %>%
filter(str_detect(affiliation_name, "netflix")) %>%
distinct(entry_number, affiliation_id, .keep_all = T) %>%
left_join(cleaned_data %>%
select(entry_number, citations, year),
by = "entry_number")
true_positive= M_aff%>%
filter(grepl(pattern = 'netflix', x = tolower(affiliation_id)))%>%
select(doi)%>%
distinct()%>%
pull(doi)
M_aff=M_aff%>%
filter(doi %in% true_positive)
API_info$author_info= API_info$author_info%>%
filter(doi %in% true_positive)
API_info$subject_info=API_info$subject_info%>%
filter(doi %in% true_positive)
API_info$authkey_info= API_info$authkey_info%>%
filter(doi %in% true_positive)
save(M_aff, file = paste(getwd(), "netflix/input_data/M_aff.RData", sep = "/"))
openxlsx::write.xlsx(M_aff, file= paste(getwd(), 'netflix/output_data/netflix_aff.xlsx', sep='/'))
openxlsx::write.xlsx(API_info$author_info, file= paste(getwd(), 'netflix/output_data/netflix_auth.xlsx', sep='/'))
openxlsx::write.xlsx(API_info$subject_info, file= paste(getwd(), 'netflix/output_data/netflix_subject.xlsx', sep='/'))
openxlsx::write.xlsx(API_info$authkey_info, file= paste(getwd(), 'netflix/output_data/netflix_authkey.xlsx', sep='/'))

loading combined data in R

I have got several Excel files (see link: we.tl/t-qJl3kVcY0j) that I combine together.
Each Excel file have columns from A to AF as below:
t-ph
Load
HR
BF
V'E
V'O2
V'CO2
d O2/dW
RER
EqO2
EqCO2
PETCO2
VES (ml)
VESi (ml/m²)
FC (bpm)
QC (l/min)
IC (l/min/m²)
PAS (mmHg)
PAD (mmHg)
PAM (mmHg)
ICT
TEV (ms)
RPD (%)
WCI (kg.m/m²)
RVSi (dyn.s/cm5.m²)
RVS (dyn.s/cm5)
VTD est (ml)
FE est (%)
O2Hb
HHb
tHb
HbDiff
My previous code was working, but since I have added columns AC (HHb) to AF (HbDiff), I can't load them anymore. I have tried to match the number of columns, the title, but still not. I can't produce a reproducible example since I can't load my data.
Here is the code I used:
pacman::p_load(tidyverse, readxl, ggpubr)
library(dplyr)
library(ggplot2)
library(afex) ##statistic package
# load data and format
load_files <- function(files){
temp <- read_excel(files) %>%
select(-(c(8:11, 14:15, 18:23))) %>%
mutate(id = pull(.[4,1])) %>% ##ID
mutate(body_mass = pull(.[7,3])) %>% ##body mass
mutate(training = pull(.[4,2])) %>% ##training group
set_names(c("time", "power", "hr", "fr", "VE", "absVO2", "VCO2", "PETCO2", "VES", "QC", "IC", "WCI", "RVSi", "RVS", "VTD", "FE", "O2Hb", "HHb", "tHb", "HbDiff", "id", "body_mass", "training")) %>%
slice(86:which(grepl("ration", VE))-1) %>% ##until recovery period
mutate_at(vars(1:16), as.numeric) %>%
mutate_at(vars(18), as.numeric) %>%
mutate(time = format(as.POSIXct(Sys.Date() + time), "%H:%M", tz="UTC"),
absVO2 = absVO2/1000,
VCO2 = VCO2/1000)
}
# apply function to all files
df <- map_df(file_list, load_files)
# remove those with who have less than four similar power
df <- df %>%
mutate(len_seq = rep(rle(power)$lengths, rle(power)$lengths)) %>%
filter(len_seq == 4) %>%
mutate(seq_id = rep(1:(n()/4), each = 4)) %>%
group_by(id) %>%
select(-seq_id)%>%
select(-(20))
# group data
df_sum <- df %>%
type.convert(as.is = TRUE) %>%
group_by(id, power, training) %>%
summarise_if(is.numeric, mean) %>%
group_by(id) %>%
mutate(percent_absVO2 = absVO2/max(absVO2)*100,
percent_power = power/max(power)*100,
percent_QC = QC/max(QC)*100,
percent_SV = VES/max(VES)*100,
percent_VCO2 = VCO2/max(VCO2)*100,
percent_VE = VE/max(VE)*100) %>%
mutate(VE_VO2 = VE/absVO2,
VE_VCO2 = VE/VCO2) %>%
mutate(RER = VCO2/absVO2, VT = VE/fr) %>%
mutate(relVO2 = absVO2/body_mass*1000,
percent_relVO2 = relVO2/max(relVO2)*100) %>%
mutate(BF = VE/VT) %>%
mutate(mech_perf = (power/(((0.003*power+0.1208)*1000*body_mass)/60))*100) %>%
mutate(group = ifelse(grepl(".*-PRD-C", id), "CAD", "Healthy")) %>%
mutate(temps = ifelse(grepl(".*-PRD-C1", id), "1", ifelse(grepl(".*-PRD-S1", id), "1", "2")))
Then the outcome I get:
Error in `set_names()`:
! The size of `nm` (23) must be compatible with the size of `x` (20).
Run `rlang::last_error()` to see where the error occurred.
Thank you very much for your precious help.

How to convert the nested for loop in apply function

Here I've nested loop i don't know how to convert this into apply can anyone tell me how to convert this in to apply function
plans_achievements <- function(pa_m,pa_q){
if(nrow(pa_m)==0 & nrow(pa_q==0)){
df = data.frame(a = c(""), b = c("No Data Available"))
colnames(df)=""
}else{
pa_m= pa_m%>% select(inc,month_year,Plans,Achievements,quarter_year)
colnames(pa_mon)[2] = "Period"
pa_q= pa_q%>% select(inc,quarter_year,Plans,Achievements)
colnames(pa_qtr)[2] = "Period"
df = data.frame(inc=c(""),Period=c(""),Plans=c(""),Achievements=c(""))
for (q in unique(pa_q$Period)){
df1 = pa_q[pa_q$Period==q,]
df1$Period = paste0("<span style=\"color:#288D55\">",df1$Period,"</span>")
df1$Plans = paste0("<span style=\"color:#288D55\">",df1$Plans,"</span>")
df1$Achievements = paste0("<span style=\"color:#288D55\">",df1$Achievements,"</span>")
df = rbind(df,df1)
for (m in unique(pa_m$quarter_year)){
if(m==q){
df2 = pa_m[pa_m$quarter_year==q,][-5]
df = rbind(df,df2)
}
}
}
df = df[-1,]
}
return(df)
}
DT::datatable(plans_achievements(pa_m[pa_m$inc=="vate",],pa_q[pa_q$inc=="vate",]), rownames = F,escape = FALSE,selection=list(mode="single",target="row"),options = list(pageLength = 50,scrollX = TRUE,dom = 'tp',ordering=F,columnDefs = list(list(visible=FALSE, targets=c(0)),list(className = 'dt-left', targets = '_all'))))
I could not imagine why you would want to do this, what about vectorizing the function instead?
Note a few typing errors (pa_mon instead of pa_m etc.) has been corrected below
plans_achievements2 <- function(pa_m,pa_q){
if(nrow(pa_m)==0 & nrow(pa_q==0)){
df = data.frame(a = c(""), b = c("No Data Available"))
colnames(df)=""
}else{
pa_m= pa_m%>% select(inc,month_year,Plans,Achievements,quarter_year)
colnames(pa_m)[2] = "Period"
pa_q= pa_q%>% select(inc,quarter_year,Plans,Achievements)
colnames(pa_q)[2] = "Period"
df <- pa_q
df$Period <- paste0("<span style=\"color:#288D55\">",df$Period,"</span>")
df$Plans = paste0("<span style=\"color:#288D55\">",df$Plans,"</span>")
df$Achievements = paste0("<span style=\"color:#288D55\">",df$Achievements,"</span>")
return(rbind(df, pa_m[pa_m$quarter_year %in% pa_q$Period,-5])[-1, ])
}
return(df)
}

data.frame to json in R

I have a data.frame that looks like this:
test <- data.frame(ID = c('1','1','1','1','1','1','1','1','2','2','2','2','2','2','2','2',
'3','3','3','3','3','3','3','3','4','4','4','4','4','4','4','4',
'5','5','5','5','5','5','5','5','6','6','6','6','6','6','6','6'),
CAT = c('CAT1','CAT1','CAT1','CAT1','CAT2','CAT2','CAT2','CAT2',
'CAT1','CAT1','CAT1','CAT1','CAT2','CAT2','CAT2','CAT2',
'CAT1','CAT1','CAT1','CAT1','CAT2','CAT2','CAT2','CAT2',
'CAT1','CAT1','CAT1','CAT1','CAT2','CAT2','CAT2','CAT2',
'CAT1','CAT1','CAT1','CAT1','CAT2','CAT2','CAT2','CAT2',
'CAT1','CAT1','CAT1','CAT1','CAT2','CAT2','CAT2','CAT2'),
CODE = c('code1','code2','code3','code4','code1','code2','code3','code4',
'code1','code2','code3','code4','code1','code2','code3','code4',
'code1','code2','code3','code4','code1','code2','code3','code4',
'code1','code2','code3','code4','code1','code2','code3','code4',
'code1','code2','code3','code4','code1','code2','code3','code4',
'code1','code2','code3','code4','code1','code2','code3','code4'),
DATE = c('date1', 'date2', 'date3','date4','date1','date2','date3','date4',
'date1', 'date2', 'date3','date4','date1','date2','date3','date4',
'date1', 'date2', 'date3','date4','date1','date2','date3','date4',
'date1', 'date2', 'date3','date4','date1','date2','date3','date4',
'date1', 'date2', 'date3','date4','date1','date2','date3','date4',
'date1', 'date2', 'date3','date4','date1','date2','date3','date4'),
stringsAsFactors = F)
I would like to have like following:
[
{"id": 1,
"CAT1": ['code1', 'code2','code3', 'code4'],
"CAT1_dates": ['date1', 'date2','date3','date4'],
"CAT2": ['code1', 'code2','code3', 'code4'],
"CAT2_dates": ['date1', 'date2','date3','date4'],
}
{"id": 2,
"CAT1": ['code1', 'code2','code3', 'code4'],
"CAT1_dates": ['date1', 'date2','date3','date4'],
"CAT2": ['code1', 'code2','code3', 'code4'],
"CAT2_dates": ['date1', 'date2','date3','date4'],
}
]
I understood that i need to write a function to do that job. I was not successfull.
From dataFrame to grouped Json in R
convert date frame to json in R
One method using dplyr (for nesting and pivoting) and jsonlite:
library(dplyr)
library(tidyr) # pivot_wider
# library(jsonlite)
test %>%
group_by(ID, CAT) %>%
summarize(x = list(CODE), x_dates = list(DATE)) %>%
pivot_wider(ID, names_from = "CAT", values_from = c("x", "x_dates"),
names_glue = "{CAT}{gsub('^x','',.value)}") %>%
ungroup() %>%
jsonlite::toJSON(pretty = TRUE)
# [
# {
# "ID": "1",
# "CAT1": ["code1", "code2", "code3", "code4"],
# "CAT2": ["code1", "code2", "code3", "code4"],
# "CAT1_dates": ["date1", "date2", "date3", "date4"],
# "CAT2_dates": ["date1", "date2", "date3", "date4"]
# },
# {
# "ID": "2",
# "CAT1": ["code1", "code2", "code3", "code4"],
# "CAT2": ["code1", "code2", "code3", "code4"],
# "CAT1_dates": ["date1", "date2", "date3", "date4"],
# "CAT2_dates": ["date1", "date2", "date3", "date4"]
# },
# ...truncated...
# ]
It's certainly feasible to do this in base R or data.table if needed, though admittedly not as smoothly as that.

Add new variable (column) in the fly to a reactive dataframe in Shiny

I am trying to add in a reactive dataframe the outpust of both a non-linear regression model and a multivariate analysis. I managed to create the reactive dataframe which is updated anytime I filter my data. I now want to update the model outputs whenever I filter the dataframe and add the prediction values of the model to the reactive dataframe. Below is a subset of the dataset I am using as well as the ui and server files I use to create the shiny App.
Load package
library (shiny)
library(ggvis)
library(dplyr)
library(rbokeh)
library (minpack.lm)
library (hydroGOF)
library(caret)
The dataframe I use:
Flux_Data_df<- structure(list(Site_ID = structure(c(1L, 3L, 5L, 7L, 8L), .Label = c("AR-Slu",
"AR-Vir", "AU-Tum", "AU-Wac", "BE-Bra", "BE-Jal", "BE-Vie", "BR-Cax",
"BR-Ma2", "BR-Sa1", "BR-Sa3", "BW-Ma1", "CA-Ca1", "CA-Ca2", "CA-Ca3",
"CA-Gro", "Ca-Man", "CA-NS1", "CA-NS2", "CA-NS3", "CA-NS4", "CA-NS5",
"CA-NS6", "CA-NS7", "CA-Oas", "CA-Obs", "CA-Ojp", "CA-Qcu", "CA-Qfo",
"CA-SF1", "CA-SF2", "CA-SF3", "CA-SJ1", "CA-SJ2", "CA-SJ3", "CA-TP1",
"CA-TP2", "CA-TP3", "CA-TP4", "CA-Wp1", "CN-Bed", "CN-Cha", "CN-Din",
"CN-Ku1", "CN-Qia", "CZ-Bk1", "De-Bay", "DE-Hai", "DE-Har", "DE-Lkb",
"DE-Meh", "DE-Obe", "DE-Tha", "DE-Wet", "DK-Sor", "ES-Es1", "FI-Hyy",
"FI-Sod", "FR-Fon", "FR-Hes", "FR-Lbr", "FR-Pue", "GF-Guy", "ID-Pag",
"IL-Yat", "IS-Gun", "IT-Col", "IT-Cpz", "IT-Lav", "IT-Lma", "IT-Noe",
"IT-Non", "IT-Pt1", "IT-Ro1", "IT-Ro2", "IT-Sro", "JP-Tak", "JP-Tef",
"JP-Tom", "MY-Pso", "NL-Loo", "PA-Spn", "PT-Esp", "RU-Fyo", "RU-Skp",
"RU-Zot", "SE-Abi", "SE-Fla", "SE-Nor", "SE-Sk1", "SE-Sk2", "SE-St1",
"UK-Gri", "UK-Ham", "US-Bar", "US-Blo", "US-Bn1", "US-Bn2", "Us-Bn3",
"US-Dk2", "US-Dk3", "US-Fmf", "US-Fuf", "US-Fwf", "US-Ha1", "US-Ha2",
"US-Ho1", "US-Ho2", "US-Lph", "US-Me1", "US-Me3", "US-Me4", "US-Me6",
"US-Moz", "US-NC1", "US-Nc2", "US-NR1", "US-Oho", "US-So2", "US-So3",
"US-Sp1", "US-Sp2", "US-Sp3", "US-Syv", "US-Umb", "US-Wbw", "US-Wcr",
"US-Wi0", "US-Wi1", "US-Wi2", "US-Wi4", "US-Wi8", "VU-Coc", "CA-Cbo",
"CN-Lao", "ID-Buk", "JP-Fuj", "RU-Ab", "RU-Be", "RU-Mix"), class = "factor"),
Ecosystem = structure(c(5L, 3L, 5L, 5L, 3L), .Label = c("DBF",
"DNF", "EBF", "ENF", "MF", "SHB", "WSA"), class = "factor"),
Climate = structure(c(3L, 3L, 3L, 3L, 4L), .Label = c("Arid",
"Continental", "Temperate", "Tropical"), class = "factor"),
Management = structure(c(4L, 2L, 3L, 4L, 4L), .Label = c("High",
"Low", "Moderate", "None"), class = "factor"), Stand_Age = c(50,
99, 77.0833333333333, 66.2, 97), NEP = c(1262.24986565392,
251.665998718498, 89.590110051402, 467.821910494384, 560),
GPP = c(2437.9937774539, 1837.82835206203, 1353.91140903122,
1740.68843840394, 3630), NEP_GPP = c(0.517741217113419, 0.143353622997247,
0.0760076059028116, 0.270737440100469, 0.1542699725), Uncert = c(7.29706486170583,
12.3483066698996, 7.59406340226036, 8.2523670901841, 12.1
), Gap_filled = c(0.953310527540233, 0.969648973753497, 0.9395474605477,
0.923408280276339, 1), MAT = c(19.0438821722383, 9.67003296799878,
10.7728316162948, 8.2796213684244, 27.341666667), MAT_An = c(-0.0413522012578611,
0.840055031446541, 0.705896226415094, 0.805524109014675,
0.191666666666667), MAT_Trend = c(0.0119577487502016, 0.0196238509917756,
0.0305871364833632, 0.0381007095629741, 0.0194619147449338
), MAP = c(351.700001291931, 1107.49999958277, 844.158499979666,
998.205467054248, 2279.5), MAP_CRU = c(592.2, 850.925, 852.591666666667,
1098.98, 2279.5), SPI_CRU_Mean = c(-0.352735702252502, 0.188298093749456,
0.0830157542916604, 0.397632136136383, 1.31028809089487),
MAP_An = c(4.14188988095238, -15.8198660714286, 5.39074900793651,
2.28799107142857, 1.55565476190476), MAP_Trend = c(1.38787584993337,
0.147192657259031, 0.747167885331603, 0.104885622031644,
0.841903850753408), CEC_Total_1km = c(14.05, 10.25, 17.975,
21, 9.95), Clay_Silt = c(36.65, 42.125, 32.275, 55, 54.825
), Clay_1km = c(26.425, 31.425, 11.25, 22.45, 38.075), Silt_1km = c(10.225,
10.7, 21.025, 32.55, 16.75), Sand_1km = c(63.35, 57.325,
67.65, 45, 45.275), NOy = c(1.73752826416889, 2.76055219091326,
4.96187381895543, 5.06857284157762, 0.90948457442513), NHx = c(2.50363534311763,
2.99675999696687, 11.2747222582845, 13.9207300067467, 1.53292533883169
), Soil_C_1km = c(3.6, 17, 23.575, 26.65, 8.15), Lat = c(-33.4648,
-35.6566, 51.3092, 50.3051, -1.72000003), Long = c(-66.4598,
148.1516, 4.5206, 5.9981, -51.4500008)), .Names = c("Site_ID",
"Ecosystem", "Climate", "Management", "Stand_Age", "NEP", "GPP",
"NEP_GPP", "Uncert", "Gap_filled", "MAT", "MAT_An", "MAT_Trend",
"MAP", "MAP_CRU", "SPI_CRU_Mean", "MAP_An", "MAP_Trend", "CEC_Total_1km",
"Clay_Silt", "Clay_1km", "Silt_1km", "Sand_1km", "NOy", "NHx",
"Soil_C_1km", "Lat", "Long"), row.names = c(NA, 5L), class = "data.frame")
Choose x and y variable to choose
axis_vars <- c(
"NEP observed [gC.m-2.y-1]" = "NEP",
"NEP predicted [gC.m-2.y-1]" = "prediction",
"CUEe" = "NEP_GPP",
"GPP [gC.m-2.y-1]" = "GPP",
"Forest Age [years]" = "Stand_Age",
"MAT [°C]" = "MAT",
"SPI" = "SPI_CRU_Mean",
"MAP [mm.y-1]" = "MAP",
"MAP trend [mm.y-1]" = "MAP_Trend",
"MAT tremd [°C.y-1]" = "MAT_Trend",
"Clay content [kg.kg-1]" = "Clay_1km",
"N deposition [kg N.ha-1.y-1]" = "NHx"
)
The ui file:
ui<- actionLink <- function(inputId, ...) {
tags$a(href='javascript:void',
id=inputId,
class='action-button',
...)
}
shinyUI(fluidPage(
titlePanel("Data exploration"),
p('Interactive tool for data exploration'),
em('by, ', a('Simon Besnard', href = 'http://www.bgc-jena.mpg.de/bgi/index.php/People/SimonBesnard')),
fluidRow(
column(4,
wellPanel(
selectInput("xvar", "X-axis variable", axis_vars, selected = "Stand_Age"),
selectInput("yvar", "Y-axis variable", axis_vars, selected = "NEP")
),
wellPanel(
h4("Filter data"),
sliderInput("Gap_filled", "Fraction gap filling", 0, 1, value = c(0, 1)),
sliderInput("Uncert", "Uncertainties", 0, 45, value = c(0, 45),
step = 1),
sliderInput("Stand_Age", "Forest age [years]", 0, 400, value = c(0, 400),
0, 400, 400, step = 5),
sliderInput("GPP", "GPP [gC.m-2.y-1]", 0, 4000, value = c(0, 4000),
0, 4000, 4000, step = 100),
sliderInput("MAT", "MAT [°C]", -10, 30, value = c(-10, 30),
-10, 30, 30, step = 1),
sliderInput("MAP", "MAP [mm.y-1]", 0, 4000, value = c(0, 4000),
0, 4000, 400, step = 100),
checkboxGroupInput("Management", "Intensity of management", c("None", "Low", "Moderate", "High"),
selected= c("None", "Low", "Moderate", "High"), inline = T),
checkboxGroupInput("Climate", "Type of climate",
c("Arid", "Continental", "Temperate", "Tropical"),
selected=c("Arid", "Continental", "Temperate", "Tropical"), inline=T),
checkboxGroupInput("Ecosystem",
label="PFTs",
choices=list("DBF", "DNF", "EBF", "ENF", "MF", "SHB"),
selected=c("DBF", "DNF", "EBF", "ENF", "MF","SHB"), inline=T)
)),
mainPanel(
navlistPanel(
tabPanel("Plot", rbokehOutput("rbokeh")),
tabPanel("Statistics", tableOutput("summaryTable")),
tabPanel("Variable importance", plotOutput("Var_Imp")),
tabPanel("Spatial distribution - Flux tower", rbokehOutput("Map_Site"))
),
downloadLink('downloadData', 'Download'))
))
)
And the server file:
server<- shinyServer(function(input, output, session) {
# A reactive expression for filtering dataframe
Update_df <- reactive({
# Lables for axes
xvar_name <- names(axis_vars)[axis_vars == input$xvar]
yvar_name <- names(axis_vars)[axis_vars == input$yvar]
xvar <- prop("x", as.symbol(input$xvar))
yvar <- prop("y", as.symbol(input$yvar))
Flux_Data_df %>%
filter(
Gap_filled >= input$Gap_filled[1] &
Gap_filled <= input$Gap_filled[2] &
Uncert > input$Uncert[1] &
Uncert < input$Uncert[2] &
Stand_Age >= input$Stand_Age[1] &
Stand_Age <= input$Stand_Age[2] &
GPP > input$GPP[1] &
GPP < input$GPP[2] &
MAT > input$MAT[1] &
MAT < input$MAT[2] &
MAP > input$MAP[1] &
MAP < input$MAP[2]) %>%
filter(
Management %in% input$Management &
Climate %in% input$Climate &
Ecosystem %in% input$Ecosystem) %>% as.data.frame()
})
# A reactive expression to add model predicion to a new dataframe
Update_df<- reactive({
for(id in unique(Update_df()$Site_ID)){
lm.Age<- try(nlsLM(NEP~offset + A*(1-exp(k*Stand_Age)), data = Update_df()[Update_df()$Site_ID != id,],
start = list(A= 711.5423, k= -0.2987, offset= -444.2672),
lower= c(A = -Inf, k = -Inf, offset= -1500), control = list(maxiter = 500), weights = 1/Uncert), silent=TRUE);
Update_df()$f_Age[Update_df()$Site_ID == id] <- predict(object = lm.Age, newdata = Update_df()[Update_df()$Site_ID == id,])
} %>% as.data.frame()
})
#Plot scatter plot
output$rbokeh <- renderRbokeh({
plot_data<- Update_df()
g<- figure() %>%
ly_points(x = input$xvar, y = input$yvar, data=plot_data, hover= c(Site_ID, year)) %>%
x_axis("x", label = names(axis_vars)[axis_vars == input$xvar]) %>%
y_axis("y", label = names(axis_vars)[axis_vars == input$yvar])
return(g)
})
output$Map_Site <- renderRbokeh({
plot_data<- Update_df()
p<- gmap(lat=0, lng=0, zoom = 2, width = 600, height = 600, map_type ="hybrid") %>%
ly_points(x=Long, y=Lat, data = plot_data, hover= c(Site_ID), col = "red", size=5) %>%
tool_box_select() %>%
tool_lasso_select() %>%
tool_reset()
return(p)
})
output$downloadData <- downloadHandler(
filename = function() {
paste('data-', Sys.Date(), '.csv', sep='')
},
content = function(con) {
write.csv(data, con)
}
)
})
shinyApp(ui, server)
Basically, I would like to add a prediction column to the updated dataframe anytime a filtering action is done in the shiny app based on the filtering set-up in the ui file. Anyone can help me out with it?
Here is the way the server.R file should be done:
# Provide R code to build the object.
shinyServer(function(input, output, session) {
# A reactive expression for filtering dataframe
Update_df1 <- reactive({
Flux_Data_df %>%
filter(
Gap_filled >= input$Gap_filled[1] &
Gap_filled <= input$Gap_filled[2] &
Uncert > input$Uncert[1] &
Uncert < input$Uncert[2] &
Stand_Age >= input$Stand_Age[1] &
Stand_Age <= input$Stand_Age[2] &
GPP > input$GPP[1] &
GPP < input$GPP[2] &
MAT > input$MAT[1] &
MAT < input$MAT[2] &
MAP > input$MAP[1] &
MAP < input$MAP[2]) %>%
filter(
Management %in% input$Management &
Disturbance %in% input$Disturbance &
Climate %in% input$Climate &
Ecosystem %in% input$Ecosystem) %>% as.data.frame()
})
# A reactive expression to add model predicion to a new dataframe
Age<-reactive({
prediction<- Update_df1()
for(id in unique(prediction$Site_ID)){
lm_Age<- try(nlsLM(NEP~offset + A*(1-exp(k*Stand_Age)), data = prediction[prediction$Site_ID != id,],
start = list(A= 711.5423, k= -0.2987, offset= -444.2672),
lower= c(A = -Inf, k = -Inf, offset= -1500), control = list(maxiter = 500), weights = 1/Uncert), silent=TRUE)
prediction$f_Age[prediction$Site_ID == id] <- predict(object = lm_Age, newdata = prediction[prediction$Site_ID == id,])
}
return(prediction)
})
Final_df<-reactive({
df<- Age()
for(id in unique(df$Site_ID)){
lm_NEP<- lm(NEP~ (f_Age + Stand_Age + GPP)^2 +
Clay_1km + GPP:MAP + SPI_CRU_Mean:NHx + Stand_Age:NHx,
data = df[df$Site_ID != id,], weights = 1/Uncert)
df$prediction[df$Site_ID == id] <- predict(object = lm_NEP, newdata = df[df$Site_ID == id,])
}
return(df)
})
Model_Performance<- reactive({
Stat<- data.frame(matrix(ncol = 3, nrow = 1))
colnames(Stat)<- c("R2", "MEF", "RMSE")
Stat$R2<- round(cor(Final_df()$prediction, Final_df()$NEP, use="complete")^2, digits = 2)
Stat$RMSE <- round(rmse(Final_df()$prediction, Final_df()$NEP), digits = 2)
Stat$MEF<-round(NSE(Final_df()$prediction, Final_df()$NEP, na.rm=TRUE), digits=2)
return(Stat)
})
Var_Imp<- reactive({
Imp<- data.frame(matrix(ncol = 7, nrow = 1))
colnames(Imp)<- c("Age", "GPP*Age", "GPP*MAP", "Clay content", "Ndepo*SPI", "GPP", "Ndepo*Age")
VarImp_NEP<- varImp(lm(NEP ~ (f_Age + Stand_Age + GPP)^2 +
Clay_1km + GPP:MAP + SPI_CRU_Mean:NHx + Stand_Age:NHx,
data=Final_df(), weights = 1/Uncert))
Imp$Age<- (VarImp_NEP$Overall[1] + VarImp_NEP$Overall[2] + VarImp_NEP$Overall[5])/ sum(VarImp_NEP$Overall)
Imp["GPP*Age"]<- (VarImp_NEP$Overall[6] + VarImp_NEP$Overall[7])/ sum(VarImp_NEP$Overall)
Imp["GPP*MAP"]<- VarImp_NEP$Overall[8]/ sum(VarImp_NEP$Overall)
Imp["Clay content"]<- VarImp_NEP$Overall[4]/ sum(VarImp_NEP$Overall)
Imp["Ndepo*SPI"]<- VarImp_NEP$Overall[9]/ sum(VarImp_NEP$Overall)
Imp["GPP"]<- VarImp_NEP$Overall[3]/ sum(VarImp_NEP$Overall)
Imp["Ndepo*Age"]<- VarImp_NEP$Overall[10]/ sum(VarImp_NEP$Overall)
Imp<- gather(Imp)
colnames(Imp)<- c("Variable", "Percentage")
Imp$Percentage<- round(Imp$Percentage*100, digits = 1)
return(Imp)
})
#Plot Univariate
output$Univariate <- renderRbokeh({
plot_data<- Final_df()
plot_data$Stand_Age<- round(plot_data$Stand_Age, digits = 0)
plot_data$Stand_Age<- round(plot_data$Stand_Age, digits = 0)
g<- figure() %>%
ly_points(x = input$xvar, y = input$yvar, data=plot_data, hover= c(Site_ID, Stand_Age)) %>%
x_axis("x", label = names(axis_vars)[axis_vars == input$xvar]) %>%
y_axis("y", label = names(axis_vars)[axis_vars == input$yvar])
return(g)
})
#Plot model performance
output$Model_perf <- renderRbokeh({
plot_data<- Final_df()
plot_data$Stand_Age<- round(plot_data$Stand_Age, digits = 0)
g<- figure() %>%
ly_points(x = prediction, y = NEP, data=plot_data, hover= c(Site_ID, Stand_Age, Ecosystem)) %>%
ly_abline(a=0, b=1) %>%
x_axis("NEP predicted [gC.m-2.y-1]") %>%
y_axis("NEP observed [gC.m-2.y-1]") %>%
x_range(c(-700, 1500)) %>%
y_range(c(-700, 1500))
return(g)
})
#Plot Variable importance
output$Var_Imp <- renderRbokeh({
plot_data<- Var_Imp()
g<- figure() %>%
ly_points(x =Percentage, y = Variable, data=plot_data, hover= c(Percentage)) %>%
x_axis("Percentage [%]") %>%
y_axis("")
return(g)
})
output$Map_Site <- renderRbokeh({
plot_data<- Final_df()
plot_data$Stand_Age<- round(plot_data$Stand_Age, digits = 0)
p<- gmap(lat=0, lng=0, zoom = 2, width = 600, height = 1000, map_type ="hybrid") %>%
ly_points(x=Long, y=Lat, data = plot_data, hover= c(Site_ID, Stand_Age), col = "red", size=5) %>%
tool_box_select() %>%
tool_lasso_select() %>%
tool_reset() %>%
tool_resize()
return(p)
})
output$Update_data = renderDataTable({
Final_df()
})
output$Summary_Table = renderDataTable({
Model_Performance()
})
output$downloadData <- downloadHandler(
filename = function() {paste('Updated.csv', sep='') },
content = function(file) {
write.csv(Final_df(), file)
}
)
})

Resources