I have a dataframe and I need to find the largest product in the data frame in the same direction(up, down, left, right, or diagonally).
The dataframe is as follows:
structure(list(V1 = c(8L, 49L, 81L, 52L, 22L, 24L, 32L, 67L,
24L, 21L, 78L, 16L, 86L, 19L, 4L, 88L, 4L, 20L, 20L, 1L), V2 = c(2L,
49L, 49L, 70L, 31L, 47L, 98L, 26L, 55L, 36L, 17L, 39L, 56L, 80L,
52L, 36L, 42L, 69L, 73L, 70L), V3 = c(22L, 99L, 31L, 95L, 16L,
32L, 81L, 20L, 58L, 23L, 53L, 5L, 0L, 81L, 8L, 68L, 16L, 36L,
35L, 54L), V4 = c(97L, 40L, 73L, 23L, 71L, 60L, 28L, 68L, 5L,
9L, 28L, 42L, 48L, 68L, 83L, 87L, 73L, 41L, 29L, 71L), V5 = c(38L,
17L, 55L, 4L, 51L, 99L, 64L, 2L, 66L, 75L, 22L, 96L, 35L, 5L,
97L, 57L, 38L, 72L, 78L, 83L), V6 = c(15L, 81L, 79L, 60L, 67L,
3L, 23L, 62L, 73L, 0L, 75L, 35L, 71L, 94L, 35L, 62L, 25L, 30L,
31L, 51L), V7 = c(0L, 18L, 14L, 11L, 63L, 45L, 67L, 12L, 99L,
76L, 31L, 31L, 89L, 47L, 99L, 20L, 39L, 23L, 90L, 54L), V8 = c(40L,
57L, 29L, 42L, 89L, 2L, 10L, 20L, 26L, 44L, 67L, 47L, 7L, 69L,
16L, 72L, 11L, 88L, 1L, 69L), V9 = c(0L, 60L, 93L, 69L, 41L,
44L, 26L, 95L, 97L, 20L, 15L, 55L, 5L, 28L, 7L, 3L, 24L, 34L,
74L, 16L), V10 = c(75L, 87L, 71L, 24L, 92L, 75L, 38L, 63L, 17L,
45L, 94L, 58L, 44L, 73L, 97L, 46L, 94L, 62L, 31L, 92L), V11 = c(4L,
17L, 40L, 68L, 36L, 33L, 40L, 94L, 78L, 35L, 3L, 88L, 44L, 92L,
57L, 33L, 72L, 99L, 49L, 33L), V12 = c(5L, 40L, 67L, 56L, 54L,
53L, 67L, 39L, 78L, 14L, 80L, 24L, 37L, 13L, 32L, 67L, 18L, 69L,
71L, 48L), V13 = c(7L, 98L, 53L, 1L, 22L, 78L, 59L, 63L, 96L,
0L, 4L, 0L, 44L, 86L, 16L, 46L, 8L, 82L, 48L, 61L), V14 = c(78L,
43L, 88L, 32L, 40L, 36L, 54L, 8L, 83L, 61L, 62L, 17L, 60L, 52L,
26L, 55L, 46L, 67L, 86L, 43L), V15 = c(52L, 69L, 30L, 56L, 40L,
84L, 70L, 40L, 14L, 33L, 16L, 54L, 21L, 17L, 26L, 12L, 29L, 59L,
81L, 52L), V16 = c(12L, 48L, 3L, 71L, 28L, 20L, 66L, 91L, 88L,
97L, 14L, 24L, 58L, 77L, 79L, 32L, 32L, 85L, 16L, 1L), V17 = c(50L,
4L, 49L, 37L, 66L, 35L, 18L, 66L, 34L, 34L, 9L, 36L, 51L, 4L,
33L, 63L, 40L, 74L, 23L, 89L), V18 = c(77L, 56L, 13L, 2L, 33L,
17L, 38L, 49L, 89L, 31L, 53L, 29L, 54L, 89L, 27L, 93L, 62L, 4L,
57L, 19L), V19 = c(91L, 62L, 36L, 36L, 13L, 12L, 64L, 94L, 63L,
33L, 56L, 85L, 17L, 55L, 98L, 53L, 76L, 36L, 5L, 67L), V20 = c(8L,
0L, 65L, 91L, 80L, 50L, 70L, 21L, 72L, 95L, 92L, 57L, 58L, 40L,
66L, 69L, 36L, 16L, 54L, 48L)), .Names = c("V1", "V2", "V3",
"V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12", "V13",
"V14", "V15", "V16", "V17", "V18", "V19", "V20"), class = "data.frame", row.names = c(NA,
-20L))
The dataframe looks like this:
> newjd
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
1 8 2 22 97 38 15 0 40 0 75 4 5 7 78 52 12 50 77 91 8
2 49 49 99 40 17 81 18 57 60 87 17 40 98 43 69 48 4 56 62 0
3 81 49 31 73 55 79 14 29 93 71 40 67 53 88 30 3 49 13 36 65
4 52 70 95 23 4 60 11 42 69 24 68 56 1 32 56 71 37 2 36 91
5 22 31 16 71 51 67 63 89 41 92 36 54 22 40 40 28 66 33 13 80
6 24 47 32 60 99 3 45 2 44 75 33 53 78 36 84 20 35 17 12 50
7 32 98 81 28 64 23 67 10 26 38 40 67 59 54 70 66 18 38 64 70
8 67 26 20 68 2 62 12 20 95 63 94 39 63 8 40 91 66 49 94 21
9 24 55 58 5 66 73 99 26 97 17 78 78 96 83 14 88 34 89 63 72
10 21 36 23 9 75 0 76 44 20 45 35 14 0 61 33 97 34 31 33 95
11 78 17 53 28 22 75 31 67 15 94 3 80 4 62 16 14 9 53 56 92
12 16 39 5 42 96 35 31 47 55 58 88 24 0 17 54 24 36 29 85 57
13 86 56 0 48 35 71 89 7 5 44 44 37 44 60 21 58 51 54 17 58
14 19 80 81 68 5 94 47 69 28 73 92 13 86 52 17 77 4 89 55 40
15 4 52 8 83 97 35 99 16 7 97 57 32 16 26 26 79 33 27 98 66
16 88 36 68 87 57 62 20 72 3 46 33 67 46 55 12 32 63 93 53 69
17 4 42 16 73 38 25 39 11 24 94 72 18 8 46 29 32 40 62 76 36
18 20 69 36 41 72 30 23 88 34 62 99 69 82 67 59 85 74 4 36 16
19 20 73 35 29 78 31 90 1 74 31 49 71 48 86 81 16 23 57 5 54
20 1 70 54 71 83 51 54 69 16 92 33 48 61 43 52 1 89 19 67 48
Any suggestion is highly appreciated.
m <- as.matrix(df)
ll <- c(lapply(1:20, function(X) row(m)==X), ## Rows
lapply(1:20, function(X) col(m)==X), ## Columns
lapply(-19:19, function(X) (col(m)+X)==row(m)), ## NW-SE diagonals
lapply(-19:19, function(X) (col(m)+X)==(21-row(m)))) ## SW-NE diagonals
## Calculate product of each row, column, and diagonal
pp <- sapply(ll, function(X) prod(m[X]))
max(pp)
# [1] 1.824798e+35
m[ll[[which.max(pp)]]]
# [1] 75 87 71 24 92 75 38 63 17 45 94 58 44 73 97 46 94 62 31 92
Or, more generally:
maxProdElements <- function(m) {
mm <- nrow(m)
nn <- ncol(m)
ll <- c(lapply(seq_len(mm), function(X) which(row(m)==X)),
lapply(seq_len(nn), function(X) which(col(m)==X)),
lapply((-mm+1):(nn-1), function(X) which(col(m)==(row(m)+X))),
lapply((-mm+1):(nn-1), function(X) which((nn+1-col(m))==(row(m)+X))))
pp <- sapply(ll, function(X) prod(m[X]))
ll[[which.max(pp)]]
}
## Try it with a 2 by 5 matrix
M <- matrix(1:10, ncol=5)
M[maxProdElements(M)]
# [1] 2 4 6 8 10
prod(M[maxProdElements(M)])
# [1] 3840
Related
ind
set
inst_0
inst_1
inst_2
Inst_3
inst_4
inst_5
0
1
20
30
50
55
58
60
0
2
34
44
46
67
89
70
0
3
37
89
78
80
90
98
0
4
23
45
67
89
87
89
1
1
34
56
65
78
77
89
1
2
23
32
45
55
66
77
1
3
35
69
88
99
98
57
1
4
23
45
56
78
89
99
2
1
23
34
55
55
77
88
2
2
12
44
55
67
88
90
2
3
12
66
77
91
44
99
2
4
45
55
88
31
56
100
I have a data frame like this above and I would like to make a plot showing this kind of a trend like in the graph below( this is only made for 4 individual in a same set) for the combinations of for example Ind0-set1, Ind1-set1, Ind2-set2...,Ind0-set2,Ind1-set2 and second question is that how to plot multiple line graph separately for each set in one graph?
I am not sure to use ggplot2 or it can be done plot function too.
If you want to do this using ggplot2 then the first step would be to reshape your data to long or tidy format using e.g. tidyr::pivot_longer:
library(tidyr)
library(dplyr)
library(ggplot2)
# Reshape to long
dat <- dat %>%
# Convert all column names to lower case
rename_with(tolower) %>%
pivot_longer(-c(ind, set), names_to = "inst", values_to = "value", names_prefix = "inst_")
After doing so you could create a plot showing all individuals for all sets by using facetting:
ggplot(dat, aes(inst, value, color = factor(ind), group = ind)) +
geom_line() +
geom_point() +
facet_wrap(~set)
Or you could filter your data for your desired combinations to create a plot for e.g. just one set like so:
dat_filtered <- dat[dat$set == 1, ]
ggplot(dat_filtered, aes(inst, value, color = factor(ind), group = ind)) +
geom_line() +
geom_point()
DATA
dat <- data.frame(
ind = c(0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L),
set = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L),
inst_0 = c(20L, 34L, 37L, 23L, 34L, 23L, 35L, 23L, 23L, 12L, 12L, 45L),
inst_1 = c(30L, 44L, 89L, 45L, 56L, 32L, 69L, 45L, 34L, 44L, 66L, 55L),
inst_2 = c(50L, 46L, 78L, 67L, 65L, 45L, 88L, 56L, 55L, 55L, 77L, 88L),
Inst_3 = c(55L, 67L, 80L, 89L, 78L, 55L, 99L, 78L, 55L, 67L, 91L, 31L),
inst_4 = c(58L, 89L, 90L, 87L, 77L, 66L, 98L, 89L, 77L, 88L, 44L, 56L),
inst_5 = c(60L, 70L, 98L, 89L, 89L, 77L, 57L, 99L, 88L, 90L, 99L, 100L)
)
I want to convert values 0<x<9 & x<50.2 to NA.My data frame has the first five columns which do not have to have replaced, I only want to replace values in column 6 to column 60. I have tried to it in 2 steps as follows but, it also replaced values I dont intend to change
BdsDf[BdsDf > 50.2][6:60] <- NA; BdsDf[BdsDf < 9][6:60] <- NA
Here is one way:
# test data
df = data.frame(lapply(1:60, function(x) {
rnorm(100, 50, 25)
}))
df[,6:60][0 < df[,6:60] & df[,6:60] < 9 & df[,6:60] < 50.2] = NA
With naniar, you can use replace_with_na_at to select the specific columns and add the 2 conditions.
library(dplyr)
library(naniar)
BdsDf %>%
replace_with_na_at(.vars = -1:5,
condition = ~ .x < 9 | .x > 50.2)
Or in base R, you can do the following with the sample dataset. For your data, you would just change 6:8 to 6:60 (like with the commented out portion below).
BdsDf[,6:8][BdsDf[6:8] > 50.2 | BdsDf[6:8] < 9] <- NA
# BdsDf[,6:60][BdsDf[6:60] > 50.2 | BdsDf[6:60] < 9] <- NA
Output
X1 X2 X3 X4 X5 X6 X7 X8
1 75 86 91 16 6 NA NA NA
2 84 68 8 85 19 NA 38 NA
3 18 52 5 17 59 NA 28 NA
4 97 86 45 17 31 NA NA 28
5 41 95 60 80 49 NA 24 NA
6 47 56 65 35 44 18 NA 19
7 29 46 3 22 36 15 NA 10
8 48 50 60 38 47 NA NA 35
9 91 20 50 5 24 40 47 19
10 85 84 15 71 96 NA NA 26
Data
BdsDf <- structure(list(X1 = c(75L, 84L, 18L, 97L, 41L, 47L, 29L, 48L,
91L, 85L), X2 = c(86L, 68L, 52L, 86L, 95L, 56L, 46L, 50L, 20L,
84L), X3 = c(91L, 8L, 5L, 45L, 60L, 65L, 3L, 60L, 50L, 15L),
X4 = c(16L, 85L, 17L, 17L, 80L, 35L, 22L, 38L, 5L, 71L),
X5 = c(6L, 19L, 59L, 31L, 49L, 44L, 36L, 47L, 24L, 96L),
X6 = c(66L, 0L, 84L, 3L, 84L, 18L, 15L, 60L, 40L, 67L), X7 = c(73L,
38L, 28L, 6L, 24L, 91L, 79L, 7L, 47L, 88L), X8 = c(92L, 57L,
66L, 28L, 85L, 19L, 10L, 35L, 19L, 26L)), class = "data.frame", row.names = c(NA,
-10L))
This question already has answers here:
Add (insert) a column between two columns in a data.frame
(18 answers)
Closed 4 years ago.
Suppose i have dataset
df=structure(list(SaleCount = c(7L, 35L, 340L, 260L, 3L, 31L, 420L,
380L, 45L, 135L, 852L, 1L, 34L, 360L, 140L, 14L, 62L, 501L, 560L,
0L, 640L, 0L, 0L, 16L, 0L), DocumentNum = c(36L, 4L, 41L, 41L,
36L, 4L, 41L, 41L, 33L, 33L, 33L, 36L, 4L, 41L, 41L, 33L, 33L,
33L, 62L, 63L, 62L, 63L, 36L, 4L, 41L)), .Names = c("SaleCount",
"DocumentNum"), class = "data.frame", row.names = c(NA, -25L))
i need create the column, but this column must be second by order.
If i do so:
df["MY_NEW_COLUMN"] <- NA .
The new colums is third.
How it create that it was second by order?
I.E. i expect output
SaleCount newcolumn DocumentNum
1 7 NA 36
2 35 NA 4
3 340 NA 41
4 260 NA 41
5 3 NA 36
6 31 NA 4
7 420 NA 41
8 380 NA 41
9 45 NA 33
10 135 NA 33
11 852 NA 33
12 1 NA 36
13 34 NA 4
14 360 NA 41
15 140 NA 41
16 14 NA 33
17 62 NA 33
18 501 NA 33
19 560 NA 62
20 0 NA 63
21 640 NA 62
22 0 NA 63
23 0 NA 36
24 16 NA 4
25 0 NA 41
Of course sometimes I need to create a fourth column by order and so on.
You can use the dplyr library and the select function.
library(dplyr)
df=structure(list(SaleCount = c(7L, 35L, 340L, 260L, 3L, 31L, 420L,
380L, 45L, 135L, 852L, 1L, 34L, 360L, 140L, 14L, 62L, 501L, 560L,
0L, 640L, 0L, 0L, 16L, 0L), DocumentNum = c(36L, 4L, 41L, 41L,
36L, 4L, 41L, 41L, 33L, 33L, 33L, 36L, 4L, 41L, 41L, 33L, 33L,
33L, 62L, 63L, 62L, 63L, 36L, 4L, 41L)), .Names = c("SaleCount",
"DocumentNum"), class = "data.frame", row.names = c(NA, -25L))
df["MY_NEW_COLUMN"] <- NA
select(df,SaleCount, MY_NEW_COLUMN, DocumentNum)
This is a question for an R Programming class, but I have been working on it for several hours, over a period of a few days. I have done internet searches and referenced three different books. I have tried very hard to solve it on my own. I am finally asking for help.
I was given a csv, which I read into the program. This is the resulting dataframe, named df:
name hw0 hw1 hw2 hw3 hw4 hw5 hw6 quiz1 quiz2 quiz3 quiz4 quiz5 quiz6 term1
1 20 14 30 100 50 60 36 12 15 30 15 25 25 100
2 A 20 13 30 100 50 60 30 11 15 0 14 25 25 100
3 B 20 14 30 100 50 60 36 8 11 24 8 13 9 95
4 C 20 14 28 100 50 60 36 12 4 25 13 24 14 95
5 D 20 12 30 100 50 0 33 7 15 26 12 22 0 100
6 E 20 14 30 90 30 0 0 10 15 30 15 21 15 100
7 F 20 13 30 100 48 0 36 12 15 30 15 25 23 95
8 G 20 14 26 85 40 42 33 11 15 23 11 17 16 90
9 H 20 0 0 85 50 0 0 0 15 0 0 15 10 85
10 I 20 14 15 0 10 48 30 11 0 27 11 14 16 60
11 J 20 14 29 80 35 0 36 11 13 24 12 14 0 70
12 K 20 14 29 97 50 60 36 4 7 19 11 20 15 100
13 L 20 14 30 100 45 0 36 10 6 26 8 16 7 80
14 M 20 14 30 100 50 60 36 7 15 28 14 25 25 100
15 N 20 11 0 95 20 0 0 8 14 26 7 9 0 95
16 O 20 12 28 97 0 40 0 11 10 27 11 15 15 70
17 P 20 13 0 90 45 0 20 4 13 30 10 20 17 90
18 Q 20 14 30 100 45 0 36 0 12 21 11 14 17 75
term2 term3 exam1 exam2 exam3 final
1 100 100 100 100 95 100
2 100 100 97 97 80 97
3 100 100 83 85 73 73
4 100 100 88 75 56 77
5 100 0 90 87 72 81
6 100 80 92 82 69 79
7 100 100 90 95 87 90
8 100 0 89 79 81 78
9 90 100 62 83 42 75
10 90 72 78 78 66 81
11 0 0 79 77 51 78
12 100 100 79 77 57 81
13 0 100 68 74 76 76
14 100 100 99 98 82 99
15 0 0 70 70 52 61
16 0 0 63 66 0 0
17 100 100 75 72 56 64
18 90 75 72 84 54 63
QUESTION:
checkStudent <- function(df, studentName);
This function extracts a particular student's grades data from a data frame and returns them.
REQUIRED FORMAT:
checkStudent <- function(df, studentName)
{
}
TIPS PROVIDED:
inputs:
df -- a data frame that contains all the grades data
studentName -- name of a student
return:
all the grades for the student whose name is given as studentName
purpose:
extracting a particular student's grades data from a data frame and returning them
PROJECT TESTER- line of code and expected results:
checkStudent(df,"A")
name hw0 hw1 hw2 hw3 hw4 hw5 hw6 quiz1 quiz2 quiz3 quiz4 quiz5
2 A 20 13 30 100 50 60 30 11 15 0 14 25
quiz6 term1 term2 term3 exam1 exam2 exam3 final
2 25 100 100 100 97 97 80 97
I feel like I have been given everything and still can't get it right. I have tried:
checkStudent <- function(df, studentName)
{
name <- studentName
df["name", ]
}
and
checkStudent <- function(df, studentName)
{
subset(df, "name" == studentName, 1:21)
}
and numerous other lines of code, too many to list.
Please help. I am truly stuck.
Again, this needs to be done strictly in R. If it matters, I'm using RStudio. Thank you so much.
You're really close.
Variables in R should never be encapsulated in quotes, but always are free standing. Additionally your code is just printing the row, it is not returning it.
Here's a slightly modify version of your first attempt, without the quotes.
checkStudent <- function(df, studentName)
{
name <- studentName
return(df[name, ])
}
Edit: Oops, I realized your rows aren't named as the students.
You'll need to make it more like this:
checkStudent <- function(df, studentName)
{
my_row <- which(df$name == studentName)
return(df[my_row, ])
}
Try with logical subsetting:
checkStudent <- function(x,y) x[x['name']==y,]
Test:
checkStudent(df,"A")
# name hw0 hw1 hw2 hw3 hw4 hw5 hw6 quiz1 quiz2 quiz3 quiz4 quiz5 quiz6 term1 term2 term3 exam1 exam2 exam3 final
#1 A 20 13 30 100 50 60 30 11 15 0 14 25 25 100 100 100 97 97 80 97
data:
df <- structure(list(name = structure(1:17, .Label = c("A", "B", "C",
"D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P",
"Q"), class = "factor"), hw0 = c(20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L), hw1 = c(13L,
14L, 14L, 12L, 14L, 13L, 14L, 0L, 14L, 14L, 14L, 14L, 14L, 11L,
12L, 13L, 14L), hw2 = c(30L, 30L, 28L, 30L, 30L, 30L, 26L, 0L,
15L, 29L, 29L, 30L, 30L, 0L, 28L, 0L, 30L), hw3 = c(100L, 100L,
100L, 100L, 90L, 100L, 85L, 85L, 0L, 80L, 97L, 100L, 100L, 95L,
97L, 90L, 100L), hw4 = c(50L, 50L, 50L, 50L, 30L, 48L, 40L, 50L,
10L, 35L, 50L, 45L, 50L, 20L, 0L, 45L, 45L), hw5 = c(60L, 60L,
60L, 0L, 0L, 0L, 42L, 0L, 48L, 0L, 60L, 0L, 60L, 0L, 40L, 0L,
0L), hw6 = c(30L, 36L, 36L, 33L, 0L, 36L, 33L, 0L, 30L, 36L,
36L, 36L, 36L, 0L, 0L, 20L, 36L), quiz1 = c(11L, 8L, 12L, 7L,
10L, 12L, 11L, 0L, 11L, 11L, 4L, 10L, 7L, 8L, 11L, 4L, 0L), quiz2 = c(15L,
11L, 4L, 15L, 15L, 15L, 15L, 15L, 0L, 13L, 7L, 6L, 15L, 14L,
10L, 13L, 12L), quiz3 = c(0L, 24L, 25L, 26L, 30L, 30L, 23L, 0L,
27L, 24L, 19L, 26L, 28L, 26L, 27L, 30L, 21L), quiz4 = c(14L,
8L, 13L, 12L, 15L, 15L, 11L, 0L, 11L, 12L, 11L, 8L, 14L, 7L,
11L, 10L, 11L), quiz5 = c(25L, 13L, 24L, 22L, 21L, 25L, 17L,
15L, 14L, 14L, 20L, 16L, 25L, 9L, 15L, 20L, 14L), quiz6 = c(25L,
9L, 14L, 0L, 15L, 23L, 16L, 10L, 16L, 0L, 15L, 7L, 25L, 0L, 15L,
17L, 17L), term1 = c(100L, 95L, 95L, 100L, 100L, 95L, 90L, 85L,
60L, 70L, 100L, 80L, 100L, 95L, 70L, 90L, 75L), term2 = c(100L,
100L, 100L, 100L, 100L, 100L, 100L, 90L, 90L, 0L, 100L, 0L, 100L,
0L, 0L, 100L, 90L), term3 = c(100L, 100L, 100L, 0L, 80L, 100L,
0L, 100L, 72L, 0L, 100L, 100L, 100L, 0L, 0L, 100L, 75L), exam1 = c(97L,
83L, 88L, 90L, 92L, 90L, 89L, 62L, 78L, 79L, 79L, 68L, 99L, 70L,
63L, 75L, 72L), exam2 = c(97L, 85L, 75L, 87L, 82L, 95L, 79L,
83L, 78L, 77L, 77L, 74L, 98L, 70L, 66L, 72L, 84L), exam3 = c(80L,
73L, 56L, 72L, 69L, 87L, 81L, 42L, 66L, 51L, 57L, 76L, 82L, 52L,
0L, 56L, 54L), final = c(97L, 73L, 77L, 81L, 79L, 90L, 78L, 75L,
81L, 78L, 81L, 76L, 99L, 61L, 0L, 64L, 63L)), .Names = c("name",
"hw0", "hw1", "hw2", "hw3", "hw4", "hw5", "hw6", "quiz1", "quiz2",
"quiz3", "quiz4", "quiz5", "quiz6", "term1", "term2", "term3",
"exam1", "exam2", "exam3", "final"), row.names = c(NA, -17L), class = "data.frame")
I am newbie in R and I am having troubles with summarizing data. I tried to follow tutorials in internet, but unfortunately I had errors all time.
I have a matrix where my response factor is "concentration"
In my experiment I have 3 treatments (a, b and c) and 5 replicates for each treatment. And I get the concentrations of 8 products (PRO1 - PRO8).
TRA PRO1 PRO2 PRO3 PRO4 PRO5 PRO6 PRO7 PRO8
1 a 83 85 59 46 64 8 76 74
2 a 61 71 73 15 87 95 61 9
3 a 78 12 35 23 56 95 67 11
4 a 48 30 75 94 57 15 58 58
5 a 51 92 30 60 22 9 64 5
6 b 46 17 66 79 30 99 3 38
7 b 40 25 11 18 66 25 55 38
8 b 34 94 83 63 30 100 56 31
9 b 3 81 26 73 32 56 4 12
10 b 18 40 13 51 4 44 75 4
11 c 68 28 20 15 13 56 5 82
12 c 50 85 65 85 13 13 34 69
13 c 75 37 11 55 58 69 85 67
14 c 71 30 83 46 87 67 59 70
15 c 10 76 50 20 98 81 57 76
I tried the summaryBy, doBy and these functions, however it did not work for me.
How can I sort my matrix in order to execute these functions and get the mean, sd? My intention is to make a barplot with error bars to see the differences between the treatments for each product.
Thanks
You may try
library(ggplot2)
library(dplyr)
library(tidyr)
gather(df1, Var, Val, -TRA) %>%
group_by(TRA, Var) %>%
summarise(Mean=mean(Val), SD=sd(Val)) %>%
ggplot(., aes(x=TRA, y=Mean, fill=Var))+
geom_bar(position=position_dodge(), stat='identity')+
geom_errorbar(aes(ymin=Mean-SD, ymax=Mean+SD), width=.2,
position=position_dodge(.9))
data
df1 <- structure(list(TRA = c("a", "a", "a", "a", "a", "b", "b", "b",
"b", "b", "c", "c", "c", "c", "c"), PRO1 = c(83L, 61L, 78L, 48L,
51L, 46L, 40L, 34L, 3L, 18L, 68L, 50L, 75L, 71L, 10L), PRO2 = c(85L,
71L, 12L, 30L, 92L, 17L, 25L, 94L, 81L, 40L, 28L, 85L, 37L, 30L,
76L), PRO3 = c(59L, 73L, 35L, 75L, 30L, 66L, 11L, 83L, 26L, 13L,
20L, 65L, 11L, 83L, 50L), PRO4 = c(46L, 15L, 23L, 94L, 60L, 79L,
18L, 63L, 73L, 51L, 15L, 85L, 55L, 46L, 20L), PRO5 = c(64L, 87L,
56L, 57L, 22L, 30L, 66L, 30L, 32L, 4L, 13L, 13L, 58L, 87L, 98L
), PRO6 = c(8L, 95L, 95L, 15L, 9L, 99L, 25L, 100L, 56L, 44L,
56L, 13L, 69L, 67L, 81L), PRO7 = c(76L, 61L, 67L, 58L, 64L, 3L,
55L, 56L, 4L, 75L, 5L, 34L, 85L, 59L, 57L), PRO8 = c(74L, 9L,
11L, 58L, 5L, 38L, 38L, 31L, 12L, 4L, 82L, 69L, 67L, 70L, 76L
)), .Names = c("TRA", "PRO1", "PRO2", "PRO3", "PRO4", "PRO5",
"PRO6", "PRO7", "PRO8"), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15"))