Estimated treatment plot in r - r

Having a dataframe as the one below, How can i get an estimated treatment means plot in r
I have tried using the ggplot
p <- ggplot(data2, aes(x = faca, y = res, colour = facb, shape = facb))
plot(p)
but im getting an empty plot for it. How can i fix this?
>data2
res faca facb obs
1 2.4 1 low 1
2 2.7 1 low 2
3 2.3 1 low 3
4 2.5 1 low 4
5 4.6 1 medium 1
6 4.2 1 medium 2
7 4.9 1 medium 3
8 4.7 1 medium 4
9 4.8 1 high 1
10 4.5 1 high 2
11 4.4 1 high 3
12 4.6 1 high 4
13 5.8 2 low 1
14 5.2 2 low 2
15 5.5 2 low 3
16 5.3 2 low 4
17 8.9 2 medium 1
18 9.1 2 medium 2
19 8.7 2 medium 3
20 9.0 2 medium 4
21 9.1 2 high 1
22 9.3 2 high 2
23 8.7 2 high 3
24 9.4 2 high 4
25 6.1 3 low 1
26 5.7 3 low 2
27 5.9 3 low 3
28 6.2 3 low 4
29 9.9 3 medium 1
30 10.5 3 medium 2
31 10.6 3 medium 3
32 10.1 3 medium 4
33 13.5 3 high 1
34 13.0 3 high 2
35 13.3 3 high 3
36 13.2 3 high 4

Related

How to calculate mean value of subset of rows for different groups?

Assuming the following data frame:
set.seed(2409)
df <- data.frame(group = rep(1:4, each=5), value = round(runif(20, 1, 10),0))
df
group value
1 1 4
2 1 9
3 1 7
4 1 1
5 1 6
6 2 5
7 2 8
8 2 5
9 2 5
10 2 3
11 3 6
12 3 1
13 3 4
14 3 4
15 3 9
16 4 6
17 4 5
18 4 7
19 4 7
20 4 4
I'm now interested in calculating the mean of the value column based on the first three (or n) rows for each group.
So, what I want to achieve is:
group value mean
1 1 4 6.666667
2 1 9 6.666667
3 1 7 6.666667
4 1 1 6.666667
5 1 6 6.666667
6 2 5 6.000000
7 2 8 6.000000
8 2 5 6.000000
9 2 5 6.000000
10 2 3 6.000000
11 3 6 3.666667
12 3 1 3.666667
13 3 4 3.666667
14 3 4 3.666667
15 3 9 3.666667
16 4 6 6.000000
17 4 5 6.000000
18 4 7 6.000000
19 4 7 6.000000
20 4 4 6.000000
I can get the values in the mean column e.g. by running:
sapply(split(df, df$group),
function(x) mean(x[1:3,]$value))
1 2 3 4
6.666667 6.000000 3.666667 6.000000
But I am pretty sure that there has to be a more elegant way to get these values maybe by using dplyr. It's easy to calculate the overall mean for each group:
df <- df %>%
group_by(group) %>%
mutate(mean = mean(value))
df
group value mean
<int> <dbl> <dbl>
1 1 4 5.4
2 1 9 5.4
3 1 7 5.4
4 1 1 5.4
5 1 6 5.4
6 2 5 5.2
7 2 8 5.2
8 2 5 5.2
9 2 5 5.2
10 2 3 5.2
11 3 6 4.8
12 3 1 4.8
13 3 4 4.8
14 3 4 4.8
15 3 9 4.8
16 4 6 5.8
17 4 5 5.8
18 4 7 5.8
19 4 7 5.8
20 4 4 5.8
But how do I consider only the first 3 rows here?
Thank you very much for your help!
If you need to do it repeatedly (programmatically), you can do
means <- c(2,3,5)
df %>%
group_by(group) %>%
mutate(as.data.frame(lapply(setNames(means, paste0("mean", means)),
function(z) mean(head(value,z))))) %>%
ungroup()
# # A tibble: 20 x 5
# group value mean2 mean3 mean5
# <int> <dbl> <dbl> <dbl> <dbl>
# 1 1 4 6.5 6.67 5.4
# 2 1 9 6.5 6.67 5.4
# 3 1 7 6.5 6.67 5.4
# 4 1 1 6.5 6.67 5.4
# 5 1 6 6.5 6.67 5.4
# 6 2 5 6.5 6 5.2
# 7 2 8 6.5 6 5.2
# 8 2 5 6.5 6 5.2
# 9 2 5 6.5 6 5.2
# 10 2 3 6.5 6 5.2
# 11 3 6 3.5 3.67 4.8
# 12 3 1 3.5 3.67 4.8
# 13 3 4 3.5 3.67 4.8
# 14 3 4 3.5 3.67 4.8
# 15 3 9 3.5 3.67 4.8
# 16 4 6 5.5 6 5.8
# 17 4 5 5.5 6 5.8
# 18 4 7 5.5 6 5.8
# 19 4 7 5.5 6 5.8
# 20 4 4 5.5 6 5.8

Subset rows in a list based on a value of variables

I have a list named lst which contains three iterations and in each iteration there is 2 years of projections. I want to subset and extract from my data gender =2 in such a way that finally I have two lists. one list with gender 1 and second list with gender2.
iteration1 <- list(year1 =data.frame(age=c(10,11,12,13), district=c(1,2,3,4),gender=c(1,2,2,1)
,weight=c(12.2,11.3,11.2,10.1)),
year2 =data.frame(age=c(10,11,12,13,10,10), district=c(1,2,3,4,2,1),gender=c(1,2,2,1,1,1),weight=c(12.2,11.3,11.2,10.1,12.2,13.1)))
iteration2 <- list(year1 =data.frame(age=c(10,11,12,13), district=c(1,2,3,4),gender=c(2,2,1,1)
,weight=c(12.2,11.3,11.2,10.1)),
year2 =data.frame(age=c(10,11,12,13,13,13,12), district=c(1,2,3,4,1,3,3),gender=c(2,2,1,1,2,2,2),weight=c(12.2,11.3,11.2,10.1,10.9,11.9,15.1)))
iteration3 <- list(year1 =data.frame(age=c(10,11,12,13), district=c(1,2,3,4),gender=c(2,2,1,1)
,weight=c(12.2,11.3,11.2,10.1)),
year2 =data.frame(age=c(10,11,12,13,10,10,11,12), district=c(1,2,3,4,4,3,2,2),gender=c(2,2,1,1,2,2,1,2),weight=c(12.2,11.3,11.2,10.1,13.5,12.8,13.9,14.9)))
lst <- list(iteration1 = iteration1, iteration2 = iteration2, iteration3= iteration3 )
I hope this is what you have in mind:
library(purrr)
map(1:2, function(a){
lst %>%
map_dfr(~ .x %>%
map_dfr(~ .x %>%
filter(gender == a)))
}) %>%
set_names(paste("gender", 1:length(.)))
$`gender 1`
age district gender weight
1 10 1 1 12.2
2 13 4 1 10.1
3 10 1 1 12.2
4 13 4 1 10.1
5 10 2 1 12.2
6 10 1 1 13.1
7 12 3 1 11.2
8 13 4 1 10.1
9 12 3 1 11.2
10 13 4 1 10.1
11 12 3 1 11.2
12 13 4 1 10.1
13 12 3 1 11.2
14 13 4 1 10.1
15 11 2 1 13.9
$`gender 2`
age district gender weight
1 11 2 2 11.3
2 12 3 2 11.2
3 11 2 2 11.3
4 12 3 2 11.2
5 10 1 2 12.2
6 11 2 2 11.3
7 10 1 2 12.2
8 11 2 2 11.3
9 13 1 2 10.9
10 13 3 2 11.9
11 12 3 2 15.1
12 10 1 2 12.2
13 11 2 2 11.3
14 10 1 2 12.2
15 11 2 2 11.3
16 10 4 2 13.5
17 10 3 2 12.8
18 12 2 2 14.9
You may also do this
library(tidyverse)
map_dfr(lst, \(x) map_dfr(x, ~.x)) %>% split(.$gender)
#> $`1`
#> age district gender weight
#> 1 10 1 1 12.2
#> 4 13 4 1 10.1
#> 5 10 1 1 12.2
#> 8 13 4 1 10.1
#> 9 10 2 1 12.2
#> 10 10 1 1 13.1
#> 13 12 3 1 11.2
#> 14 13 4 1 10.1
#> 17 12 3 1 11.2
#> 18 13 4 1 10.1
#> 24 12 3 1 11.2
#> 25 13 4 1 10.1
#> 28 12 3 1 11.2
#> 29 13 4 1 10.1
#> 32 11 2 1 13.9
#>
#> $`2`
#> age district gender weight
#> 2 11 2 2 11.3
#> 3 12 3 2 11.2
#> 6 11 2 2 11.3
#> 7 12 3 2 11.2
#> 11 10 1 2 12.2
#> 12 11 2 2 11.3
#> 15 10 1 2 12.2
#> 16 11 2 2 11.3
#> 19 13 1 2 10.9
#> 20 13 3 2 11.9
#> 21 12 3 2 15.1
#> 22 10 1 2 12.2
#> 23 11 2 2 11.3
#> 26 10 1 2 12.2
#> 27 11 2 2 11.3
#> 30 10 4 2 13.5
#> 31 10 3 2 12.8
#> 33 12 2 2 14.9
Created on 2021-06-19 by the reprex package (v2.0.0)
An option with bind_rows
library(dplyr)
library(purrr)
map_dfr(lst, bind_rows) %>%
group_split(gender)
-output
[[1]]
# A tibble: 15 x 4
age district gender weight
<dbl> <dbl> <dbl> <dbl>
1 10 1 1 12.2
2 13 4 1 10.1
3 10 1 1 12.2
4 13 4 1 10.1
5 10 2 1 12.2
6 10 1 1 13.1
7 12 3 1 11.2
8 13 4 1 10.1
9 12 3 1 11.2
10 13 4 1 10.1
11 12 3 1 11.2
12 13 4 1 10.1
13 12 3 1 11.2
14 13 4 1 10.1
15 11 2 1 13.9
[[2]]
# A tibble: 18 x 4
age district gender weight
<dbl> <dbl> <dbl> <dbl>
1 11 2 2 11.3
2 12 3 2 11.2
3 11 2 2 11.3
4 12 3 2 11.2
5 10 1 2 12.2
6 11 2 2 11.3
7 10 1 2 12.2
8 11 2 2 11.3
9 13 1 2 10.9
10 13 3 2 11.9
11 12 3 2 15.1
12 10 1 2 12.2
13 11 2 2 11.3
14 10 1 2 12.2
15 11 2 2 11.3
16 10 4 2 13.5
17 10 3 2 12.8
18 12 2 2 14.9
Using purrr:
library(dplyr)
library(purrr)
lst1 <- map(lst, ~ map(., filter, gender == 1))
lst2 <- map(lst, ~ map(., filter, gender == 2))
lst1
$iteration1
$iteration1$year1
age district gender weight
1 10 1 1 12.2
2 13 4 1 10.1
$iteration1$year2
age district gender weight
1 10 1 1 12.2
2 13 4 1 10.1
3 10 2 1 12.2
4 10 1 1 13.1
$iteration2
$iteration2$year1
age district gender weight
1 12 3 1 11.2
2 13 4 1 10.1
$iteration2$year2
age district gender weight
1 12 3 1 11.2
2 13 4 1 10.1
$iteration3
$iteration3$year1
age district gender weight
1 12 3 1 11.2
2 13 4 1 10.1
$iteration3$year2
age district gender weight
1 12 3 1 11.2
2 13 4 1 10.1
3 11 2 1 13.9
lst2
$iteration1
$iteration1$year1
age district gender weight
1 11 2 2 11.3
2 12 3 2 11.2
$iteration1$year2
age district gender weight
1 11 2 2 11.3
2 12 3 2 11.2
$iteration2
$iteration2$year1
age district gender weight
1 10 1 2 12.2
2 11 2 2 11.3
$iteration2$year2
age district gender weight
1 10 1 2 12.2
2 11 2 2 11.3
3 13 1 2 10.9
4 13 3 2 11.9
5 12 3 2 15.1
$iteration3
$iteration3$year1
age district gender weight
1 10 1 2 12.2
2 11 2 2 11.3
$iteration3$year2
age district gender weight
1 10 1 2 12.2
2 11 2 2 11.3
3 10 4 2 13.5
4 10 3 2 12.8
5 12 2 2 14.9
Using base R, you'd get the same output with the following lines:
lst1 <- lapply(lst, function(x) lapply(x, function(y) subset(y, gender == 1)))
lst2 <- lapply(lst, function(x) lapply(x, function(y) subset(y, gender == 2)))

R - DataFrames and operation with rows

suppose I have the next data frame.
table<-data.frame(group=c(0,5,10,15,20,25,30,35,40,0,5,10,15,20,25,30,35,40,0,5,10,15,20,25,30,35,40),plan=c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3),price=c(1,4,5,6,8,9,12,12,12,3,5,6,7,10,12,20,20,20,5,6,8,12,15,20,22,28,28))
group plan price
1 0 1 1
2 5 1 4
3 10 1 5
4 15 1 6
5 20 1 8
6 25 1 9
7 30 1 12
8 35 1 12
9 40 1 12
10 0 2 3
11 5 2 5
12 10 2 6
13 15 2 7
14 20 2 10
15 25 2 12
16 30 2 20
17 35 2 20
18 40 2 20
19 0 3 5
20 5 3 6
21 10 3 8
22 15 3 12
23 20 3 15
24 25 3 20
25 30 3 22
26 35 3 28
27 40 3 28
So, I want to group the columns so that for each "plan" with "group" greater than 20, group me 2-in-2 records (average of the next record) and when the largest number is repeated , Leave the latter without duplicates.
The example below shows how to result would be.
data.frame(group=c(0,5,10,15,20,30,0,5,10,15,20,30,0,5,10,15,20,30,40),plan=c(1,1,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,3,3),price=c(1,4,5,6,8.5,12,3,5,6,7,11,20,5,6,8,12,17.5,25,28))
group plan price
1 0 1 1.0
2 5 1 4.0
3 10 1 5.0
4 15 1 6.0
5 20 1 8.5
6 30 1 12.0
7 0 1 3.0
8 5 2 5.0
9 10 2 6.0
10 15 2 7.0
11 20 2 11.0
12 30 2 20.0
13 0 3 5.0
14 5 3 6.0
15 10 3 8.0
16 15 3 12.0
17 20 3 17.5
18 30 3 25.0
19 40 3 28.0
Thanks!
You could try this using the dplyr package:
library(dplyr)
table %>%
group_by(plan) %>%
mutate(group=ifelse(group<20,group,10*floor(group/10))) %>%
group_by(plan,group) %>%
summarise(price=mean(price)) %>%
## Keep the last row per group only if the price is different from the previous average price
group_by(plan) %>%
filter(!(row_number()==n() & price==lag(price)))
This returns:
plan group price
<dbl> <dbl> <dbl>
1 1 0 1.0
2 1 5 4.0
3 1 10 5.0
4 1 15 6.0
5 1 20 8.5
6 1 30 12.0
7 2 0 3.0
8 2 5 5.0
9 2 10 6.0
10 2 15 7.0
11 2 20 11.0
12 2 30 20.0
13 3 0 5.0
14 3 5 6.0
15 3 10 8.0
16 3 15 12.0
17 3 20 17.5
18 3 30 25.0
19 3 40 28.0
How about:
dat<-data.frame(group=c(0,5,10,15,20,25,30,35,40,0,5,10,15,20,25,30,35,40,0,5,10,15,20,25,30,35,40),plan=c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3),price=c(1,4,5,6,8,9,12,12,12,3,5,6,7,10,12,20,20,20,5,6,8,12,15,20,22,28,28))
s <- split(dat, ifelse(dat$group>20, ">20", "<=20"))
s20 <- s[[">20"]] # easier to read
tens <- which(s20$group %% 10 == 0)
tens
# [1] 2 4 6 8 10 12
subgroup <- rep(1:length(tens), each = nrow(s20)/length(tens)) # can handle different freqs
subgroup
# [1] 1 1 2 2 3 3 4 4 5 5 6 6
ToAddBack <- s20[tens,]
ToAddBack[,"price"] <- aggregate(s20$price, by = list(subgroup), mean)[2]
newdat <- rbind(s[["<=20"]], ToAddBack)
finaldat <- newdat[order(newdat$plan, newdat$group),]
Where your finaldat is a little different from your example as I think you left out some rows by accident:
finaldat
group plan price
1 0 1 1.0
2 5 1 4.0
3 10 1 5.0
4 15 1 6.0
5 20 1 8.0
7 30 1 10.5
9 40 1 12.0
10 0 2 3.0
11 5 2 5.0
12 10 2 6.0
13 15 2 7.0
14 20 2 10.0
16 30 2 16.0
18 40 2 20.0
19 0 3 5.0
20 5 3 6.0
21 10 3 8.0
22 15 3 12.0
23 20 3 15.0
25 30 3 21.0
27 40 3 28.0

How to get correct ticklabs in a 3d-scatterplot in R?

Please see this example. Look at y axis. The data there has only two levels: 1 and 2. But in the plot 6 tickmarks drawn on that axis. How could I fix that. The x axis has the same problem.
The data
extra group ID
1 0.7 1 1
2 -1.6 1 2
3 -0.2 1 3
4 -1.2 1 4
5 -0.1 1 5
6 3.4 1 6
7 3.7 1 7
8 0.8 1 8
9 0.0 1 9
10 2.0 1 10
11 1.9 2 1
12 0.8 2 2
13 1.1 2 3
14 0.1 2 4
15 -0.1 2 5
16 4.4 2 6
17 5.5 2 7
18 1.6 2 8
19 4.6 2 9
20 3.4 2 10
The script
require('mise')
require('scatterplot3d')
mise() # clear the workspace
# example data
print(sleep)
# plot it
scatterplot3d(x=sleep$ID,
x.ticklabs=levels(sleep$ID),
y=sleep$group,
y.ticklabs=levels(sleep$group),
z=sleep$extra)
The result
How about this:
scatterplot3d(x=sleep$ID, y=sleep$extra, z=sleep$group, lab.z = c(1, 2))

Getting 2 Scatterplots and Histograms from a set of Data

so I have the following set of Data
> sleep
extra group ID
1 0.7 1 1
2 -1.6 1 2
3 -0.2 1 3
4 -1.2 1 4
5 -0.1 1 5
6 3.4 1 6
7 3.7 1 7
8 0.8 1 8
9 0.0 1 9
10 2.0 1 10
11 1.9 2 1
12 0.8 2 2
13 1.1 2 3
14 0.1 2 4
15 -0.1 2 5
16 4.4 2 6
17 5.5 2 7
18 1.6 2 8
19 4.6 2 9
20 3.4 2 10
My Task is to generate two Scatterplot that shows the effects of Drug 1 and 2(group) also two Histog, I've been using different things but seriously not clue and I cannot use ggplot as I'm not able to instal anything on my computer labs! Please help!.
Something like this, maybe:
pdf("my_plots.pdf")
for (g in unique(sleep$group)) {
with(sleep[sleep$group==g,], plot(ID, extra, main=paste0("Group = ",g)))
hist(sleep$extra[sleep$group==g], main=paste0("Group = ",g))
}
dev.off()
Per #rawr's comment, you can also have two or four plots on a single page by adding par(mfrow=c(1,2)) or par(mfrow=c(2,2)) before running the code above.

Resources