I have a data set containing the step count of cows from a 4 week trial where each animal was exposed to treatment A or treatment B at the beginning of week 2, and want to know how the step rate of the two treatment groups changed each week compared to week 1.
How do I add an offset to my model to do this?
The model I am running before adding the offset is this:
mod.1 <- glmmTMB(Step.count ~ Week*Treatment + (1|Cow.ID), data = data.df, family = poisson)
Here is an example of my data
data.1 <- data.frame(Cow.ID = rep(1, 20),
Week = sample(c(1,2,3,4), 20, replace = TRUE),
Treatment = sample(c("infected"), 20, replace = TRUE),
Step.count = rpois(20, 60.1))
data.2 <- data.frame(Cow.ID = rep(2, 20),
Week = sample(c(1,2,3,4), 20, replace = TRUE),
Treatment = sample(c("infected"), 20, replace = TRUE),
Step.count = rpois(20, 60.1))
data.3 <- data.frame(Cow.ID = rep(3, 20),
Week = sample(c(1,2,3,4), 20, replace = TRUE),
Treatment = sample(c("non-infected"), 20, replace = TRUE),
Step.count = rpois(20, 60.1))
data.4 <- data.frame(Cow.ID = rep(4, 20),
Week = sample(c(1,2,3,4), 20, replace = TRUE),
Treatment = sample(c("non-infected"), 20, replace = TRUE),
Step.count = rpois(20, 60.1))
sample.df <- rbind(data.1, data.2, data.3, data.4)
Hard to say without an example of your data, but assuming that you have a datafame something like this
library(dplyr)
cows <- tibble(
Cow.Id = rep(1:4, times = 5),
Week = rep(1:5, each = 4),
Step.count = floor(runif(20, 100,200)),
Treatment = rep(c('A','B','A','B'), times = 5),
)
Then, you can easily calculate a column of Step.count.offset for each cow like this:
cows.clean <- cows %>%
group_by(Cow.Id) %>%
arrange(Week) %>%
mutate(
Step.count.offset = Step.count - first(Step.count)
) %>%
ungroup()
Related
I have tried the following formula but it gives all nos even when I change the quantile value.
NOTE: I have 3 independent datasets that I want to apply the function.
outlier<-function(x1,x2){
q1<-quantile(x1 , .75, na.rm = TRUE)
if(x1>q1){x2<-"Yes"
}else{
x2<-"No"
}
}
I have tried x2<-ifelse(x1>q1,"Yes","No")
inside the function but it still doesn't work.
You can use an ifelse statement and create a new column using mutate.
library(dplyr)
set.seed(1)
df <- tibble(x1 = sample(c(1:10), size = 10, replace = T))
df %>%
mutate(x2 = ifelse(quantile(x1, 0.75, na.rm = T) < x1, "Yes", "No"))
If you want a function
library(dplyr)
set.seed(1)
df <- tibble(x1 = sample(c(1:10), size = 10, replace = T),
x2 = sample(c(1:10), size = 10, replace = T),
x3 = sample(c(1:10), size = 10, replace = T),
x4 = sample(c(1:10), size = 10, replace = T))
outlier<-function(dataframe, quant = 0.75, col = c("x1", "x2")){
dataframe %>%
mutate(across(all_of(col), ~ifelse(.x>quantile(.x,0.75), 'Yes', 'No'),
.names = '{col}_yes'))
}
outlier(dataframe = df,quant = 0.25)
DF <- data.frame(Height = rnorm(100, 170, 5),
Weight = rnorm(100, 55, 5))
BMI = function(height,weight){(weight/(height)^2*10000)}
DF$bmi = BMI(DF$Height,DF$Weight)
DF$weight_group <-
cut(
x = DF$Weight,
breaks = c(0,60,70,Inf),
include.lowest = TRUE,
labels = c("0-60", "61-70", "71+")
)
DF$BMI_group <-
cut(
x = DF$bmi,
breaks = c(0, 19.75582, Inf),
include.lowest = TRUE,
labels = c("Below Average", "Above Average")
)
This is my code. I cannot figure out how to just calculate the average of the last half of the data frame. I didn't know how to add in gender, to make 50 males and 50 females, so this is my work around.
Are you looking for such a solution?
DF <- data.frame(Height = rnorm(100, 170, 5),
Weight = rnorm(100, 55, 5),
Gender = c(rep("male", 50), rep("female", 50)))
BMI <- function(height,weight){(weight/(height)^2*10000)}
library(dplyr)
DF %>%
group_by(Gender) %>%
mutate(bmi = BMI(Height, Weight)) %>%
summarise(mean_bmi = mean(bmi))
# A tibble: 2 x 2
Gender mean_bmi
* <chr> <dbl>
1 female 19.4
2 male 19.6
We may use sample to create the column, subset the 'Gender' for 'F', and apply the BMI
DF$Gender <- sample(c("F", "M"), nrow(DF), replace = TRUE, prob = c(0.5, 0.5))
with(subset(DF, Gender == "F"), mean(BMI(Height, Weight)))
If we want to get the mean of 'BMI' by 'BMI_group'
subdf <- subset(DF, Gender == "F")
with(subdf, tapply(BMI(Height, Weight), BMI_group, FUN = mean))
Below Average Above Average
17.57841 21.43003
I have a huge dataset with several groups (factors with between 2 to 6 levels), and dichotomous variables (0, 1).
example data
DF <- data.frame(
group1 = sample(x = c("A","B","C","D"), size = 100, replace = T),
group2 = sample(x = c("red","blue","green"), size = 100, replace = T),
group3 = sample(x = c("tiny","small","big","huge"), size = 100, replace = T),
var1 = sample(x = 0:1, size = 100, replace = T),
var2 = sample(x = 0:1, size = 100, replace = T),
var3 = sample(x = 0:1, size = 100, replace = T),
var4 = sample(x = 0:1, size = 100, replace = T),
var5 = sample(x = 0:1, size = 100, replace = T))
I want to do a chi square for every group, across all the variables.
library(tidyverse)
library(rstatix)
chisq_test(DF$group1, DF$var1)
chisq_test(DF$group1, DF$var2)
chisq_test(DF$group1, DF$var3)
...
etc
I managed to make it work by using two nested for loops, but I'm sure there is a better solution
groups <- c("group1","group2","group3")
vars <- c("var1","var2","var3","var4","var5")
results <- data.frame()
for(i in groups){
for(j in vars){
test <- chisq_test(DF[,i], DF[,j])
test <- mutate(test, group=i, var=j)
results <- rbind(results, test)
}
}
results
I think I need some kind of apply function, but I can't figure it out
Here is one way to do it with apply. I am sure there is an even more elegant way to do it with dplyr. (Note that here I extract the p.value of the test, but you can extract something else or the whole test result if you prefer).
res <- apply(DF[,1:3], 2, function(x) {
apply(DF[,4:7], 2,
function(y) {chisq.test(x,y)$p.value})
})
Here's a quick and easy dplyr solution, that involves transforming the data into long format keyed by group and var, then running the chi-sq test on each combination of group and var.
DF %>%
pivot_longer(starts_with("group"), names_to = "group", values_to = "group_val") %>%
pivot_longer(starts_with("var"), names_to = "var", values_to = "var_val") %>%
group_by(group, var) %>%
summarise(chisq_test(group_val, var_val)) %>%
ungroup()
I am trying to combine bar plot and line plot on same visualization while the data for each plot comes from different data set. So the code I am trying:
library(dplyr)
library(echarts4r)
set.seed(600)
df1 <- data.frame(
#class = c(rep("1st", times = 60), rep("2nd", time = 30), rep("3rd", time = 30)),
week = rep(1:20, times = 3),
cat = c(rep("A", times = 20), rep("B", times = 20), rep("C", times = 20)),
count = round(runif(60, 1, 100))
)
df <- data.frame(
week = rep(1:20, times = 2),
cat = c(rep("D", times = 20), rep("E", times = 20)),
count = round(runif(40, 1, 100))
)
df1 %>%
group_by(cat) %>%
e_charts(week) %>%
e_bar(count, bind = cat) %>%
e_tooltip(
formatter = htmlwidgets::JS("
function(params){
return('<strong>' + params.name +
'</strong><br />week: ' + params.value[0] +
'<br />count: ' + params.value[1])
}
")
)
Trying to add line considering the data df on the viz. Below is what I am trying to achieve :
Here I have used echarts4rProxy() but is same thing possible outside Shiny?
Also is it possible to change the colors of bars and lines?
Thanks!!
Yes,
To go about it the way you do with 2 different datasets you can use e_data pass new data, it's just like e_charts but within the echarts4r pipe.
library(dplyr)
library(echarts4r)
set.seed(600)
df1 <- data.frame(
#class = c(rep("1st", times = 60), rep("2nd", time = 30), rep("3rd", time = 30)),
week = rep(1:20, times = 3),
cat = c(rep("A", times = 20), rep("B", times = 20), rep("C", times = 20)),
count = round(runif(60, 1, 100))
)
df <- data.frame(
week = rep(1:20, times = 2),
cat = c(rep("D", times = 20), rep("E", times = 20)),
count = round(runif(40, 1, 100))
)
df1 %>%
group_by(cat) %>%
e_charts(week) %>%
e_bar(count, bind = cat) %>%
e_data(data = group_by(df, cat), x = week) %>%
e_line(count) %>%
e_tooltip(
formatter = htmlwidgets::JS("
function(params){
return('<strong>' + params.name +
'</strong><br />week: ' + params.value[0] +
'<br />count: ' + params.value[1])
}
")
)
I'm trying to load data from Quandl with collapse = "monthly".
Some of the values are only available in a yearly or halve-yearly fashion.
Some other values are only available within certain periods of time.
This leaves me with a lot of inhomogeneous data.
How can I fill the yearly and halve-yearly data in a "Last observation carried forward" fashion and the remaining NAs with 0?
Here is my idea of the data I got and the one I want to have at the end:
library(tibble)
set.seed(4711)
# How do I get from:
#
df.start <- data_frame(
Date = seq.Date(as.Date("1990-01-01"), as.Date("1999-12-01"), "1 month"),
B = rep(NA, 120),
C = c(rep(NA, 50), rnorm(120 - 50)),
D = rep(c(rnorm(1), rep(NA, 11)), 10),
E = c(rep(NA, 24), rep(c(rnorm(1), rep(NA, 11)), 8)),
F = c(rep(NA, 45), rnorm(50), rep(NA, 25)),
G = c(rep(NA, 24), rep(c(rnorm(1), rep(NA, 11)), 6), rep(NA, 24)),
H = c(rep(NA, 10), rnorm(20), rep(NA, 16), rnorm(37), rep(NA, 37)),
I = rep(c(rnorm(1), rep(NA, 5)), 20)
)
#
# To:
#
df.end <- data_frame(
Date = seq.Date(as.Date("1990-01-01"), as.Date("1999-12-01"), "1 month"),
B = rep(0, 120),
C = c(rep(0, 50), rnorm(120 - 50)),
D = rep(rnorm(10), each = 12),
E = c(rep(0, 24), rep(rnorm(8), each = 12)),
F = c(rep(0, 45), rnorm(50), rep(0, 25)),
G = c(rep(0, 24), rep(rnorm(6), each = 12), rep(0, 24)),
H = c(rep(0, 10), rnorm(20), rep(0, 16), rnorm(37), rep(0, 37)),
I = rep(rnorm(20), each = 6)
)
#
# Automatically?
#
You can use fill to fill the NAs with the last non-empty value (except for the Date column), and then replace the remaining NAs by 0. We do these operations grouped by year.
library(tidyverse)
library(lubridate)
df.end <- df.start %>%
mutate(year = year(Date)) %>%
group_by(year) %>%
fill(., colnames(df.start[-1])) %>%
replace(., is.na(.), 0) %>%
ungroup() %>%
select(-year)