Insert a blank column in dataframe - r

I would like to insert a blank column in between "Delta = delta" and "Card = vars" in the dataframe below. I would also like to sort the output by the column "Model_Avg_Error" in the dataframe as well.
df = data.frame(Card = vars, Model_Avg_Error = model_error, Forecast = forecasts, Delta = delta, ,Card = vars, Model_Avg_Error = model_error,
Forecast = forecasts, Delta = delta)
# save
write.csv(df, file = file.path(proj_path, "output.csv"), row.names = F)
This was the error received from above:
Error in data.frame(Card = vars, Model_Avg_Error = model_error, Forecast = forecasts, :
argument is missing, with no default

You can add your blank column, re-order, and sort using the code below:
df$blankVar <- NA #blank column
df[c("Card", "blankVar", "Model_Avg_Error", "Forecast", "Delta")] #re-ordering columns by name
df[order(df$Model_Avg_Error),] #sorting by Model_Avg_Error

Here's a general way to add a new, blank column
library(tibble)
# Adds after the second column
iris %>% add_column(new_col = NA, .after = 2)
# Adds after a specific column (in this case, after Sepal.Width)
iris %>% add_column(new_col = NA, .after = "Sepal.Width")

Related

MCA : how to assign colors and shapes to variables categories based on their variables?

I have this data frame
df <- data.frame(subjects = 12:23,
Why_are_you_not_happy =
c(1,2,"1,2,5",5,1,2,"3,4",3,2,"1,5",3,4),
why_are_you_sad =
c("1,2,3",1,2,3,"4,5,3",2,1,4,3,1,1,1))
df
that is coverted into this format
df1 <- df %>%
separate(Why_are_you_not_happy,
sep = ",", into = c("Why_are_you_not_happy_1",
"Why_are_you_not_happy_2",
"Why_are_you_not_happy_3")) %>%
separate(why_are_you_sad,
sep = ",", into = c("why_are_you_sad_1",
"why_are_you_sad_2",
"why_are_you_sad_3"))
when applying the MCA function we take all the columns except the first one
library(FactoMineR)
library(factoextra)
#> Loading required package: ggplot2
results <- MCA(df1[,2:7])
using the following function, I wish to assign a specific shape and a specific colour for each group of categories (meaing for each variable )
fviz_mca_var(results,
repel = TRUE,
ggtheme = theme_minimal())

Error in m == q : R comparison (1) is possible only for atomic and list types

The whole function which i need to convert the for loop in to apply for optimization
plans_achievements <- function(pa_m,pa_q){
if(nrow(pa_m)==0 & nrow(pa_q==0)){
df = data.frame(a = c(""), b = c("No Data Available"))
colnames(df)=""
}else{
pa_m= pa_m%>% select(inc,month_year,Plans,Achievements,quarter_year)
colnames(pa_mon)[2] = "Period"
pa_q= pa_q%>% select(inc,quarter_year,Plans,Achievements)
colnames(pa_qtr)[2] = "Period"
df = data.frame(inc=c(""),Period=c(""),Plans=c(""),Achievements=c(""))
for (q in unique(pa_q$Period)){
df1 = pa_q[pa_q$Period==q,]
df1$Period = paste0("<span style=\"color:#288D55\">",df1$Period,"</span>")
df1$Plans = paste0("<span style=\"color:#288D55\">",df1$Plans,"</span>")
df1$Achievements = paste0("<span style=\"color:#288D55\">",df1$Achievements,"</span>")
df = rbind(df,df1)
for (m in unique(pa_m$quarter_year)){
if(m==q){
df2 = pa_m[pa_m$quarter_year==q,][-5]
df = rbind(df,df2)
}
}
}
df = df[-1,]
}
return(df)
}
The apply which i tried
my_fun <- function(q){
df1 = pa_qtr[pa_qtr$Period==q,]
df1$Period = paste0("<span style=\"color:#288D55\">",df1$Period,"</span>")
df1$Plans = paste0("<span style=\"color:#288D55\">",df1$Plans,"</span>")
df1$Achievements = paste0("<span style=\"color:#288D55\">",df1$Achievements,"</span>")
df = rbind(df,df1)
}
df = do.call(rbind,lapply(unique(pa_qtr$Period), my_fun))
my_fun2 <- function(m,my_fun){
if (m == q) {
df2 = pa_mon[pa_mon$qtr_yr == q, ][-5]
df = rbind(df,df2)
}
}
df = do.call(cbind,lapply(unique(pa_mon$qtr_yr), my_fun2))
DT::datatable(plans_achievements(pa_m[pa_m$inc=="vate",],pa_q[pa_q$inc=="vate",]), rownames = F,escape = FALSE,selection=list(mode="single",target="row"),options = list(pageLength = 50,scrollX = TRUE,dom = 'tp',ordering=F,columnDefs = list(list(visible=FALSE, targets=c(0)),list(className = 'dt-left', targets = '_all'))))
Why you get the error comparison is possible only for atomic and list types
I will answer your original question first:
You get the error because you haven't defined q as a variable inside the function my_fun2. Since you haven't defined this variable, R will look for it in the global environment. There R will find the function q() (used to quit R). So you get the error message comparison (1) is possible only for atomic and list types because R thinks you are trying to compare a number m with the function q.
Here is a small example to make it easy to see:
# Run this in a clean environment
m <- 1
m == b # Understandable error message - "b" is not found
m == q # Your error - because R thinks you are comparing m to a function
You fix this error by making sure that q is defined inside your function. Either by creating it inside the function, or by supplying it as an input argument.
A possible solution for your problem
As I understand your code, you want to format, merge and sort the values in pa_q and pa_m, to display them in a html table.
Under is a possible solution, using tidyverse and vectorized operations, rather than a loop or apply functions. Vectorized functions are typically your fastest option in R, as I know you want to optimize your code.
library(dplyr)
plans_achievements <- function(pa_m, pa_q) {
# I've modified the logic a bit: there is no need to wrap the full function in
# an else statement, since we can return early if the data has no rows
if (nrow(pa_m) == 0 && nrow(pa_q == 0)) {
df = data.frame(a = c(""), b = c("No Data Available"))
colnames(df) = ""
return(df)
}
pa_q <-
pa_q %>%
# Select and rename the columns vi need
select(inc, Period = quarter_year, Plans, Achievements, date) %>%
# Format the values
mutate(
Period = paste0("<span style=\"color:#288D55\">", Period,"</span>"),
Plans = paste0("<span style=\"color:#288D55\">", Plans,"</span>"),
Achievements = paste0("<span style=\"color:#288D55\">", Achievements,"</span>")
)
pa_m <-
pa_m %>%
# Select and rename the columns we need
select(inc, Period = month_year, Plans, Achievements, date) #%>%
# Combine the datasets
bind_rows(
pa_q,
pa_m
) %>%
# Make sure that R understand date as a date value
mutate(
date = lubridate::dmy(date)
) %>%
# Sort by date
arrange(desc(date)) %>%
# Remove columns we do not need
select(-date, -inc)
}
DT::datatable(
plans_achievements(
pa_m[pa_m$inc=="vate",],
pa_q[pa_q$inc=="vate",]
),
rownames = FALSE,
escape = FALSE,
selection = list(mode = "single", target = "row"),
options = list(
pageLength = 50,
scrollX = TRUE,
dom = 'tp',
ordering = FALSE,
columnDefs = list(
list(className = 'dt-left', targets = '_all')
)
)
)
Hopefully this solves your problem.

Importing a dataframe from a function to Shiny server

I need to get the dataframe from a function in rShiny server. But that function returns a Plot and the return value cannot be changed as the plots are used in the future use.
have not pasted the whole code as its like 200 lines each for the function and also for the rshiny server.
Hist_Read_data4 <- full_join(Hist_Read_data1,Hist_Read_data_opst, by = c("timestamp"))%>%
arrange(timestamp)%>%
subset(timestamp >= as.POSIXct(start_timestamp, origin = "1970-01-01") & timestamp <= as.POSIXct(end_timestamp, origin = "1970-01-01"))%>%
mutate(value.y = na.locf(value.y, na.rm = FALSE))%>%
mutate(value.y = fct_explicit_na(value.y, na_level = "None"))%>%
mutate(value.x = na.locf(value.x, na.rm=FALSE))%>%
mutate(new_value = abs(value.x - lag(value.x)))%>%
mutate(new_value = replace_na(new_value, 0))%>%
mutate(new_value = cumsum(new_value))
plot <- ggplot() +
geom_path(data = Hist_Read_data4, mapping = aes(x = timestamp, y=value.x, color = value.y), na.rm = TRUE, linejoin = 'round' , size=1.5, group = 1)
//Hist_Read_data4 is the dataframe which i need to return//
//plot is the return value of the function//
output$HoverText <- renderText({
coordinfo <- input$PlotHover
nearpts <- nearPoints(Hist_Read_data4, coordinfo, xvar= "timestamp", yvar = "value.y", threshold = 20)
})
need Hist_Read_data4 in inside nearpoints. But it cannot be accessed as its inside a function named chooseDevice() in a separate script file named data_funcs.R
I do not want to change the return value of the chooseDevice function from plot to returning this dataframe as it will complicate the whole code and 2 months work will be wasted.

How to scale point size and color at the same time in ggvis?

Considering a data.frame like this:
df <- data.frame(t = rep(seq(from=as.POSIXct('00:15:00',format='%H:%M:%S'),
to=as.POSIXct('24:00:00',format='%H:%M:%S'),by='15 min'),times=2),
y = c(rnorm(96,10,10),rnorm(96,40,5)),
group = factor(rep(1:2,each=96)),
type = factor(rep(1:3,each=64)))
Using ggvis, I want to generate a point-line plot in which the line is grouped by group. The size of points with type==3 should be 100 while the size of points withtype==1 and type==2 are all 50. The colour of the points should be green, blue and red corresponding to type1,type2 and type3. Here is my ggvis code:
df <- data.frame(df,id=1:nrow(df))
all_values <- function(x) {
if(is.null(x)) return(NULL)
row <- df[df$id == x$id, ]
paste0(names(row), ": ", format(row), collapse = "<br />")
}
ggvis(data=df,x=~t,y=~y,stroke=~group) %>%
layer_points(fill=~type,size=~type, key:=~id, fillOpacity := 0.5,
fillOpacity.hover := 0.8,size.hover := 500) %>%
scale_nominal("size",domain = c(1,2,3), range = c(50,50,100)) %>%
scale_nominal("fill",domain = c(1,2,3), range = c('green','blue','red')) %>%
layer_lines() %>%
add_tooltip(all_values,'click') %>%
add_legend(scales=c("fill","size"), properties = legend_props(legend = list(y = 150))) %>%
set_options(duration = 0) %>%
add_axis(type="x",format="%H:%M")
I get the error of Error: length(x) not less than or equal to 2.
Why this happened and how can I fix it?
It turns out that scale_nominal("size",domain = c(1,2,3), range = c(50,50,100)) should be replaced by scale_nominal("size",domain = c(1,2,3), range = c('50','50','100')).
The culprit for the error is more than 2 values defined for range. The definition for range suggests : For numeric values, the range can take the form of a two-element array with minimum and maximum values.
For ordinal data, the range may by an array of desired output values, which are mapped to elements in the specified domain. In this case, value should be defined in character.
This should resolve your error.

Warning message from ggfortify::ggbiplot

I am trying to use ggbiplotfrom ggfortify package. It seems its working fine but I am getting warning message as follows,
mdl <- pls::plsr(mpg ~ ., data = mtcars, scale = T)
scrs <- data.frame(pls::scores(mdl)[])
loads <- data.frame(pls::loadings(mdl)[])
ggfortify::ggbiplot(scrs, loads,
label.label = rownames(scrs), asp = 1, label = T, label.size = 3,
loadings = T, loadings.label = T, loadings.label.label = rownames(loads))
Warning messages:
1: In if (value %in% columns) { :
the condition has length > 1 and only the first element will be used
2: In if (value %in% columns) { :
the condition has length > 1 and only the first element will be used
Have I taken any wrong step or is it a bug.
According to the ggbiplot documentation, the label.label= parameter expects the column names from which to pull the names; it does not expect a vector of names. Same goes for loadings.label.label=. (ggplot and most tidyverse functions don't like rownames very much -- better to make them a proper column)
scrs$ID <- rownames(scrs)
loads$ID <- rownames(loads)
ggfortify::ggbiplot(scrs, loads,
label.label = "ID", asp = 1, label = T, label.size = 3,
loadings = T, loadings.label = T, loadings.label.label = "ID")

Resources