The objective is to populate a new column (df$final.count) according to multiple conditions. An example data frame below:
structure(list(item = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L), .Label = c("a", "b"), class = "factor"), raw.count = c(16,
300, 203, 6, 5, 40, 20, 16, 300, 203), loc = structure(c(4L,
2L, 2L, 2L, 2L, 3L, 3L, 4L, 2L, 3L), .Label = c(" ", "in", "out",
"NA"), class = "factor"), side = structure(c(4L, 2L, 3L, 2L,
3L, 4L, 3L, 4L, 2L, 4L), .Label = c("F", "L", "R", "NA"), class = "factor"),
recount = c(15, NA, NA, 7, NA, NA, 16, 15, NA, NA), final.count = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), EXPECTED = c(15, 60, 120,
7, 5, 40, 16, 15, 300, 203)), row.names = c(NA, 10L), class = "data.frame")
The objective is to populate a new column (df$final.count) according to the following conditions affecting multiple columns:
if there is a number in df$recount THAN df$recount should be used in df$final.count unconditional to other column values
if there is no number (NA) in df$recount AND df$raw.count > 10 AND df$loc is "in" AND df$side is "L" THAN function 0.2*df$raw.count should be used to populate df$final.count
if there is no number (NA) in df$recount AND df$raw.count > 10 AND df$loc is "in" AND df$side is "R" THAN function 0.6*df$raw.count should be used to populate df$final.count (NOTE only side is different)
if df$raw.count =<10 than df$raw.count should be used exept if 1 above holds
if df$loc is "out" than df$final.count <- df$raw.count unconditional to other column values exept if 1 above holds
I have tried various versions of if / else if in a loop, for example:
for (i in 1:nrow(df)) {
if(!is.na(df$recount[i]) {
df$final.count <- df$recount
}
else if(df$item[i] == "a" & df$raw.count[i] > 10 & df$loc[i] == "in" & df$side[i] == "L") {
df$final.count <- 0.2*df$raw.count[i]
}
else if(df$item[i] == "a" & df$raw.count[i] > 10 & df$loc[i] == "in" & df$side[i] == "R") {
df$final.count <- 0.6*df$raw.count[i]
}
else if(df$raw.count <= 10){
df$final.count <- df$raw.count
}
else(df$loc == "out") {
df$final.count <- df$raw.count
}
}
if you use a case_when() from the dplyr-package, it becomes more readable.. you can also loose the for.
library( dplyr )
df %>%
mutate( final.cond = case_when(
!is.na( recount ) ~ recount,
item == "a" & raw.count > 10 & loc == "in" & side == "L" ~ 0.2 * raw.count,
item == "a" & raw.count > 10 & loc == "in" & side == "R" ~ 0.6 * raw.count,
raw.count <= 10 ~ raw.count,
loc == "out" ~ raw.count,
TRUE ~ as.numeric(NA)
))
Related
So I am trying to create percentages to display on a plot.
Here is my dataset:
how_often_ByYear <- structure(list(Var1 = structure(c(1L, 2L, 3L, 1L, 2L, 3L), levels = c("A few times a year",
"Never been", "Once or twice"), class = "factor"), Var2 = structure(c(1L,
1L, 1L, 2L, 2L, 2L), levels = c("Year 1", "Year 2"), class = "factor"),
Freq = c(0, 122, 47, 1, 117, 50), percent = c(0, 72, 28,
1, 69, 30)), class = "data.frame", row.names = c(NA, -6L))
And here is my code:
how_often_ByYear <- Visitor_Data_ByYear %>%
dplyr::select(How_often_have_you_visited_us, Year) #selects column for question 16
#mutate_all(funs(gsub("[[:punct:]]", "", .))) #removes annoying symbols
how_often_ByYear <- table(how_often_ByYear$How_often_have_you_visited_us, how_often_ByYear$Year)
how_often_ByYear <- as.data.frame(how_often_ByYear)
how_often_ByYear <- how_often_ByYear %>%
mutate(percent = Freq/sum(Freq)*100) %>%
mutate_if(is.numeric, round, 0)
View(how_often_ByYear)
right now, the numbers include both year 1 and year 2, so my percentages add up to around 50 percent. How do I separate the percentages for each year so that I can report on both?
Thanks in advance for your help.
I have a series of commands that create a vibration of effects plot. Now, I want to assign the plot to an object (to later make it downloadable via Shiny). However, that does not seem possible. When I try to save the plot to an object, the object returns "Null" and likewise if I try to save it it saves an empty .png file.
See below for the function and some example data.
#some packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(MASS, tidyverse, ggplot2, dplyr, shiny, here, BayesFactor, ggpubr, effsize, DescTools, rqPen)
#plot of p value vs effect size vibration plot
#https://figshare.com/articles/Code_data_and_analysis_script_for_A_Traveler_s_Guide_to_the_Multiverse_Promises_Pitfalls_and_a_Framework_for_the_Evaluation_of_Analytic_Decisions_/12089736 main source
multiverse.vibration <- function(effsize, statistic, alpha = 0.05, threshold = 6, type = c("frequentist")){
#assign colours schemes
point.color <- rgb(0,76,153, alpha=80, maxColorValue=255)
contour.color = rgb(60,130,180, alpha=130, maxColorValue=255)
#vibrations
vibrations <- kde2d(effsize, -log10(statistic), n=50)
if (type == "frequentist"){
#do the plotting.
plot(effsize, -log10(statistic), type="n", las=1, xlab=expression(paste("Effect size")), ylab=expression(paste("-log"[10],"(",italic("p"),"-value)")), main="", cex.lab=1.35, cex.axis=1.2 ) ####the label of the y axis gets cut off by the picture for no reason whatsoever####
#add quantile lines
abline(v=as.numeric(quantile(effsize, probs=0.5)), lty=3, lwd=1.8, col="gray70")
abline(h=-log10(as.numeric(quantile(statistic, probs=0.5))), lty=3, lwd=1.8, col="gray70")
#add data points
points(effsize, -log10(statistic), pch=16, col=point.color, cex=1.5)
#add "vibrations"
contour(vibrations, drawlabels=FALSE, nlevels=5, lwd=1.7, col=contour.color, add=TRUE)
text(as.numeric(quantile(effsize, probs=0.5)), max(-log10(statistic)), "50", pos=2, col="gray40", cex=1)
text(max(effsize), -log10(as.numeric(quantile(statistic, probs=0.5))), "50", pos=3, col="gray40", cex=1)
#add alpha line and label
abline(h=-log10(alpha), lty=3, lwd=1.5, col="red")
text(min(effsize), -log10(alpha), expression(paste(alpha)), pos = 1, cex = 1, col = "red")
}
#...function simplified
}
#and below some data
df_multiverse <- structure(list(transformation = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("square",
"squareroot"), class = "factor"), datatrimming = structure(c(2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("mad",
"notrimming"), class = "factor"), fixedtrimming = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "nofixedtrimming", class = "factor"),
min = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), max = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), DispersionMeasure = c(NA,
2, 2.5, 3, 3.5, 4, 4.5, 5, NA, 2, 2.5, 3, 3.5, 4, 4.5, 5),
NumberOfTrials = c(2481, 2017, 2089, 2152, 2202, 2235, 2271,
2292, 2481, 2017, 2089, 2152, 2202, 2235, 2271, 2292), df = c(21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21
), t.value = c(0.834352731211477, -1.89143806501942, -2.06164045172582,
-2.29402139720537, -2.20170894686594, -1.30874979649765,
-1.46580636234517, -0.933033039387291, -0.381340656529586,
-2.65553835404059, -2.70367808996487, -2.88191068442976,
-2.89698876130645, -2.31203065738409, -2.40524937843272,
-1.99997820996895), p.value = c(0.413473232348569, 0.0724397922282673,
0.0518359697127152, 0.0322027617938105, 0.0390026786336539,
0.204761347160827, 0.157515139319996, 0.361407402521166,
0.706781450011369, 0.0147953018060795, 0.013300947944711,
0.00892256290108781, 0.0086233125398353, 0.0310102245266004,
0.0254623057912856, 0.0586025361696588), estimate = c(0.0513517727014905,
-0.138440596771433, -0.152826845040145, -0.172473124495872,
-0.150035258885051, -0.106059860414446, -0.0904972867538278,
-0.0636909905658258, -0.0224006885730891, -0.132591874705722,
-0.141473579509691, -0.162307800901886, -0.156924178280938,
-0.138723145332572, -0.124862443444392, -0.109932966289113
)), row.names = c("df", "df1", "df2", "df3", "df4", "df5",
"df6", "df7", "df8", "df9", "df10", "df11", "df12", "df13", "df14",
"df15"), class = "data.frame")
#and below a call
object <- multiverse.vibration(df_multiverse$estimate, df_multiverse$p.value, type = "frequentist")
#Now I try to save it
svg(file = "Figure 1.svg", width = 9, height = 9, antialias = "gray")
object
dev.off()
#empty file, does not save plot.
My goal is to save the plot to an object in a way that later allows me to download the object via some command.
Still very new to coding and R, I am working with some healthcare data in a data frame. There are 3 outcomes that I am interested in - Mobilised_D1, Diet_D1 and Catheter_rm_D1. I wish to create a fourth column called AnyTwo whereby if any 2 of the 3 outcomes are Y or all three outcomes are Y, then it will be T for AnyTwo.
I've managed to do this by using [] as below:
ERAS_limited[ERAS_limited$Mobilised_D1 == "Y" & ERAS_limited$Catheter_rm_D1 == "Y", "AnyTwo"] <- T
ERAS_limited[ERAS_limited$Diet_D1 == "Y" & ERAS_limited$Catheter_rm_D1 == "Y", "AnyTwo"] <- T
ERAS_limited[ERAS_limited$Diet_D1 == "Y" & ERAS_limited$Catheter_rm_D1 == "Y" & ERAS_limited$Mobilised_D1 == "Y", "AnyTwo"] <- T
dput(head(ERAS_limited))
structure(list(Mobilised_D1 = structure(c(2L, 2L, 1L, 1L, 1L,
2L), .Label = c("N", "Y"), class = "factor"), Diet_D1 = structure(c(2L,
2L, 2L, 2L, 1L, 2L), .Label = c("N", "Y"), class = "factor"),
Catheter_rm_D1 = structure(c(2L, 2L, 1L, 1L, 1L, 2L), .Label = c("N",
"Y"), class = "factor"), AnyTwo = c(TRUE, TRUE, FALSE, FALSE,
FALSE, TRUE)), row.names = c(NA, 6L), class = "data.frame")```
However, I would be keen to see if there is a more elegant way of doing this e.g. by writing a loop for my own education and curiosity.
We can use rowSums to create the logical vector
library(dplyr)
ERAS_limited %>%
mutate(AnyTwo = rowSums(.[-4] == "Y") >= 2)
In base R, it would be
ERAS_limited$AnyTwo <- rowSums(ERAS_limited[-4]) == "Y") >= 2
Calculation inside for loop & ifelse is working when I have 100-200 rows but not working when I have 20000 rows.
Can someone help me with the FOR loop and IFELSE if something is wrong or if there is some timeout happening in R studio when running for & if-else loop
Code:
#FROM HERE IT IS NOT WORKING WHEN WE HAVE 20000 ROWS OF DATA IN FINAL DATFRAME.
#WE ARE CREATING FINAL_V1 WHICH IS POPULATING ONLY 1 ROW
#New Dataframe with Null values
Final <- structure(list(Item = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "0S1576", class = "factor"),
LC = structure(1:6, .Label = c("MW92", "OY01", "RM11", "RS11",
"WK14", "WK15"), class = "factor"), Fiscal.Week = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "2019-W24", class = "factor"),
SS = c(15L, 7L, 5L, 9L, 2L, 2L), Freq = c(3, 6, 1, 2, 1,
1), agg = c(1, 1, 1, 1, 0, 0)), row.names = c(NA, -6L), class = "data.frame")
lctolc <- structure(list(Item = structure(c(1L, 1L, 1L, 1L, 1L), .Label = "0S1576", class = "factor"),
LC = structure(c(1L, 2L, 2L, 3L, 3L), .Label = c("MW92",
"OY01", "RM11"), class = "factor"), ToLC = structure(1:5, .Label = c("OY01",
"RM11", "RS11", "WK14", "WK15"), class = "factor")), row.names = c(NA,
-5L), class = "data.frame")
df <- as.data.frame(unique(Final$Item))
Final_v1 <- NA
j <- 1
i <- 1
#SS computations
#For 1 to no of rows in df(which is having no of unique items
for(j in 1:nrow(df)) {
#copying the data from Final to Final_v1(with charater type)
Final_v1 <- Final[Final$Item == as.character(df[j,1]),]
#for 1 to the no of rows in Final_v1
for(i in 1:nrow(Final_v1)) {
if(Final_v1[i,6] <= 0)
{
Final_v1[i,7] = Final_v1[i,4]}
else
{
if(Final_v1[i,5] == '1')
{
Final_v1[i,7]=0
}
else
{
Final_v1[i,7]=Final_v1[i,4]
}
SSNew <- Final_v1[i,7]
#Leftover distribution
LCS <- lctolc$ToLC[Final_v1$Item[i] == lctolc$Item & Final_v1$LC[i] == lctolc$LC]
inds <- Final_v1$LC %in% LCS
if (any(inds))
{ Final_v1$SS[inds]<- if (SSNew == 0) {Final_v1$SS[inds]==0} else {Final_v1$SS[inds]=Final_v1$SS[inds]} }
}
}
names(Final_v1)[7] <- "SSNew"
}
Can someone help why it is not performing for 20000rows
I have the following table:
structure(list(Id = structure(c(1L, 1L, 2L, 2L, 1L, 3L, 3L, 3L
), .Label = c("a", "b", "c"), class = "factor"), stops = c(1,
1, 1, 1, 1, 2, 2, 2)), .Names = c("Id", "stops"), row.names = c(NA,
-8L), class = "data.frame")
I would like to add to $stops new characters when the stop did not change but the $Id did.
For example, I would like to get:
structure(list(Id = structure(c(1L, 1L, 2L, 2L, 1L, 3L, 3L, 3L
), .Label = c("a", "b", "c"), class = "factor"), stops = structure(c(1L,
1L, 2L, 2L, 3L, 4L, 4L, 4L), .Label = c("1", "1-1", "1-2", "2"
), class = "factor")), .Names = c("Id", "stops"), row.names = c(NA,
-8L), class = "data.frame")
I just would like to do so if the Id is different than the previous one, and if the Stops is the same than the previous one...
I tried with mutate() but it seems I am quite far away to get something working here...
Here's a looples attempt using data.table
library(data.table)
setDT(df)[, `:=`(stops = as.character(stops), Idindx = rleid(Id))]
indx <- unique(df, by = "Idindx")[, counter := (1:.N) - 1L, by = rleid(stops)]
df[indx[counter > 0], stops := paste(stops, i.counter, sep = "-"), on = "Idindx"]
# Id stops Idindx
# 1: a 1 1
# 2: a 1 1
# 3: b 1-1 2
# 4: b 1-1 2
# 5: a 1-2 3
# 6: c 2 4
# 7: c 2 4
# 8: c 2 4
The first step is to create an unique index for each Id (as they aren't unique) and convert stops to a character (per your desired output)
Then, operating on unique indexes identify counts of same stops and join back to the original data
You could write a loop to solve your problem:
# Original data
data <- structure(list(Id = structure(c(1L, 1L, 2L, 2L, 1L, 3L, 3L, 3L
), .Label = c("a", "b", "c"), class = "factor"), stops = c(1,
1, 1, 1, 1, 2, 2, 2)), .Names = c("Id", "stops"), row.names = c(NA,
-8L), class = "data.frame")
# Add new column, which will be converted in the following loop
data$stops_new <- as.character(data$stops)
new <- 1
for(i in 2:nrow(data)) {
# Convert values of stops_new, if your specified conditions appear
if(data$Id[i] != data$Id[i - 1] & data$stops[i] == data$stops[i - 1]) {
data$stops_new[i] <- paste(data$stops_new[i], "-", new, sep = "")
# Repeat the convertion for all values with the same ID and stop-value
j <- i + 1
while(data$Id[i] == data$Id[j] & data$stops[i] == data$stops[j]) {
data$stops_new[j] <- paste(data$stops[i], "-", new, sep = "")
j <- j + 1
}
new <- new + 1
}
}
data
this is a base R solution.
create indicators showing you whether Id has changed (id.ind) and whether stops has changed (stops.ind) from the previous line (convention being that these indicators are set to "0", i.e. no change, for the first row):
stops.ind <- c(0, diff(dat$stops))
id.ind <- c(0, diff(as.numeric(dat$Id)))
create new stops vector:
stops <- new.stops <- dat$stops
row by row check whether a) there is a change in id and no change in stops or b) there is no change in either from the previous row. in case a) increase k by one and append "-k" to stops value b) use previous value of stops:
k <- 0
for(i in 2 : nrow(dat)){
if(id.ind[i] != 0 & stops.ind[i] == 0){
k <- k + 1
new.stops[i] <- paste0(stops[i], "-", k)
}
if(id.ind[i] == 0 & stops.ind[i] == 0)
new.stops[i] <- new.stops[i - 1]
}
new.stops
# [1] "1" "1" "1-1" "1-1" "1-2" "2" "2" "2"
new.dat <- data.frame(Id = dat$Id, stops = new.stops)