I've got a Data Frame (df) with 4 Columns and n rows
df <- structure(list(x = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 1L, 2L), y = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L), pch = c(9L, 10L, 11L, 7L, 12L, 9L,
7L, 5L, 8L, 1L, 8L, 2L, 5L, 8L, 5L), col = c(7L, 8L, 3L, 3L,
4L, 6L, 3L, 4L, 2L, 1L, 7L, 5L, 4L, 7L, 6L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15"))
x y pch col
1 1 1 9 7
2 2 1 10 8
3 3 1 11 3
4 4 1 7 3
5 5 1 12 4
6 6 1 9 6
7 7 1 7 3
8 8 1 5 4
9 9 1 8 2
10 10 1 1 1
11 11 1 8 7
12 12 1 2 5
13 13 1 5 4
14 1 2 8 7
15 2 2 5 6
and I want to compare the X and Y with another Data Frame 1x1 (df2)
df2 <- structure(list(V1 = 7, V2 = 1), class = "data.frame", row.names = c(NA,
-1L))
V1 V2
1 7 1
and if it is the same I want to take the entry( in this case number 7) to draw it into my grid with the pch and col which are written in the first Data Frame.
My attempt was compare it with a if loop but I don't know how to get the right column from the first Data frame. In this case x = 7 y = 1 pch = 7 and col = 3
if(input$V1 == playfield$x && input$V2 == playfield$y)
{
}
Appreciate every help or idea.
You can just do:
new_df <- playfield[playfield$x == input$V1 & playfield$y == input$V2,]
You could use right_join from dplyr
library(dplyr)
right_join(df, df2, by=c("x"="V1", "y"="V2"))
output:
x y pch col
1 7 1 7 3
data:
df <- structure(list(x = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 1L, 2L), y = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L), pch = c(9L, 10L, 11L, 7L, 12L, 9L,
7L, 5L, 8L, 1L, 8L, 2L, 5L, 8L, 5L), col = c(7L, 8L, 3L, 3L,
4L, 6L, 3L, 4L, 2L, 1L, 7L, 5L, 4L, 7L, 6L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15"))
df2 <- structure(list(V1 = 7, V2 = 1), class = "data.frame", row.names = c(NA,
-1L))
I have
>head(p)
study treatment response
1 14 SSA 3
2 1 SSTR 4
3 14 SSA 3
4 6 SSTR 3
5 10 SSA 4
I want to count the response for each study and subsequently add bind_rows to obtain response for all.
Thus, I have
p %>% as_tibble() %>%
mutate(nystudie=as.character(study),
best.resp =as.factor(response)) %>%
bind_rows(., mutate(., nystudie="All")) %>%
count(nystudie, best.resp)
Yielding
# A tibble: 27 x 3
nystudie best.resp n
<chr> <fct> <int>
1 1 3 2
2 1 4 3
3 10 4 2
4 11 3 1
However, I want to do a facet_wrap in ggplot using this tibblestratified for p$treatment, ala + facet_wrap(., treatment) + ...
Therefore, I am seeking help on how to optimize my script so the expected output gives something like:
# A tibble: 27 x 3
nystudie best.resp n treatment
<chr> <fct> <int> <fct>
1 1 3 2 "SSTR"
2 1 4 3 "SSTR"
3 10 4 2 "SSTR"
4 11 3 1 "SSA"
Data
p <- structure(list(study = structure(c(13L, 2L, 1L, 4L, 4L, 8L, 1L,
3L, 1L, 4L, 12L, 1L, 13L, 1L, 8L, 1L, 6L, 4L, 9L, 13L, 14L, 1L,
8L, 12L, 5L, 11L, 13L, 8L, 4L, 8L, 9L, 4L, 11L, 1L, 4L, 9L, 4L,
15L, 11L, 9L, 12L, 2L, 11L, 6L, 12L, 12L, 8L, 10L, 4L, 2L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "22"), class = "factor"), treatment = structure(c(2L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L,
1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L,
1L), .Label = c("SSTR", "SSA"), class = "factor"), response = c("1",
"3", "4", "3", "3", "3", "3", "3", "3", "3", "4", "4", "4", "3",
"2", "4", "4", "4", "4", "4", "4", "4", "1", "3", "3", "4", "4",
"1", "4", "1", "4", "4", "4", "3", "3", "2", "3", "4", "4", "2",
"3", "3", "3", "4", "3", "4", "2", "4", "4", "3")), row.names = c(NA,
-50L), class = "data.frame")
Try this:
#Code
p %>%
mutate(nystudie=as.character(study),
best.resp =as.factor(response)) %>%
bind_rows(., mutate(., nystudie="All")) %>%
group_by(nystudie,best.resp) %>%
summarise(N=n(),Val=unique(treatment))
Output:
# A tibble: 28 x 4
# Groups: nystudie, best.resp [26]
nystudie best.resp N Val
<chr> <fct> <int> <fct>
1 1 3 4 SSTR
2 1 4 4 SSTR
3 10 4 1 SSA
4 11 3 1 SSA
5 11 4 3 SSA
6 12 3 3 SSA
7 12 4 2 SSA
8 13 1 1 SSA
9 13 4 3 SSA
10 14 4 1 SSA
# ... with 18 more rows
I'm trying to use pivot wider to create multiple columns/variables containing values, but I NAs in columns I shouldn't.
Here is a representative sample of the data:
df <- structure(list(Condition = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Control", "Retraction1",
"Retraction2"), class = "factor"), First = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Journalist",
"Police", "Reviewer", "Spokesperson"), class = "factor"), Second = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Journalist",
"Police", "Reviewer", "Spokesperson"), class = "factor"), Third = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Journalist",
"Police", "Reviewer", "Spokesperson"), class = "factor"), Fourth = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Journalist",
"Police", "Reviewer", "Spokesperson"), class = "factor"), ID = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46",
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57",
"58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68",
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79",
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90",
"91", "92", "93", "94", "95", "96", "97", "98", "99", "100",
"101"), class = "factor"), Scenario = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 1L, 2L, 3L, 4L), .Label = c("J", "P", "R",
"S"), class = "factor"), Estimate = structure(c(4L, 8L, 7L, 11L,
9L, 12L, 10L, 2L, 5L, 6L, 4L, 7L, 11L, 9L, 12L, 10L, 2L, 3L,
5L, 6L, 4L, 8L, 7L, 11L, 9L, 12L, 10L, 2L, 5L, 6L, 4L, 8L, 7L,
11L, 9L, 12L, 10L, 2L, 5L, 6L, 1L, 1L, 1L, 1L), .Label = c("CompMean",
"P.H.Reps.", "P.H.Reps..1", "P.Rel.", "P.Rel1.Reps.", "P.Rel2.Reps.",
"P.Rep1.nH.nRel.", "P.Rep1.nH.Rel.", "P.Rep2.nH.nRel.nRep1.",
"P.Rep2.nH.nRel.Rep1.", "P.Rep2.nH.Rel.nRep1.", "P.Rep2.nH.Rel.Rep1."
), class = "factor"), value = c(90L, 8L, 82L, 11L, 82L, 11L,
82L, 100L, 99L, NA, 62L, 11L, 91L, 12L, 91L, 5L, 82L, 91L, 80L,
NA, 92L, 12L, 61L, 18L, 90L, 21L, 81L, 96L, 92L, NA, 91L, 10L,
72L, 22L, 62L, 21L, 73L, 99L, 98L, NA, 7L, 7L, 7L, 7L)), row.names = c(NA,
-44L), class = c("tbl_df", "tbl", "data.frame"))
head(df)
This is data from one subject. There should only be NAs in the P.Rel2.Reps. and no other.
However, there are NAs in some of the other columns when I use pivot wider like so:
pivot_wider(df, names_from = Estimate, values_from = value)
Here is an example of how the data look after pivoting wider.
df2 <- structure(list(Condition = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("Control", "Retraction1", "Retraction2"
), class = "factor"), First = structure(c(2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), .Label = c("Journalist", "Police", "Reviewer",
"Spokesperson"), class = "factor"), Second = structure(c(3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Journalist",
"Police", "Reviewer", "Spokesperson"), class = "factor"), Third = structure(c(1L,
1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Journalist",
"Police", "Reviewer", "Spokesperson"), class = "factor"), Fourth = structure(c(4L,
4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Journalist",
"Police", "Reviewer", "Spokesperson"), class = "factor"), ID = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L), .Label = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26",
"27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37",
"38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48",
"49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59",
"60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "70",
"71", "72", "73", "74", "75", "76", "77", "78", "79", "80", "81",
"82", "83", "84", "85", "86", "87", "88", "89", "90", "91", "92",
"93", "94", "95", "96", "97", "98", "99", "100", "101"), class = "factor"),
Scenario = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L), .Label = c("J", "P", "R", "S"), class = "factor"), P.Rel. = c(90L,
62L, 92L, 91L, 57L, 81L, 71L, 80L, 40L, 75L), P.Rep1.nH.Rel. = c(8L,
NA, 12L, 10L, 31L, NA, 19L, 17L, 25L, NA), P.Rep1.nH.nRel. = c(82L,
11L, 61L, 72L, 89L, 15L, 79L, 84L, 76L, 25L), P.Rep2.nH.Rel.nRep1. = c(11L,
91L, 18L, 22L, 35L, 64L, 30L, 22L, 25L, 50L), P.Rep2.nH.nRel.nRep1. = c(82L,
12L, 90L, 62L, 62L, 13L, 45L, 53L, 25L, 50L), P.Rep2.nH.Rel.Rep1. = c(11L,
91L, 21L, 21L, 15L, 52L, 9L, 10L, 100L, 50L), P.Rep2.nH.nRel.Rep1. = c(82L,
5L, 81L, 73L, 67L, 22L, 60L, 61L, 100L, 25L), P.H.Reps. = c(100L,
82L, 96L, 99L, 81L, 40L, 71L, 76L, 75L, 90L), P.Rel1.Reps. = c(99L,
80L, 92L, 98L, 81L, 80L, 89L, 79L, 75L, 76L), P.Rel2.Reps. = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), P.H.Reps..1 = c(NA,
91L, NA, NA, NA, 80L, NA, NA, NA, 100L), CompMean = c(7L,
7L, 7L, 7L, 7L, 7L, 7L, 6L, 4L, 7L)), row.names = c(NA, -10L
), class = c("tbl_df", "tbl", "data.frame"))
head(df2)
I have seen there is a similar post on this topic but it doesn't answer why NAs are being produced in my situation.
Do I need to add some other argument?
Looking at the data it looks like you have some corrupted data at one place. You can correct it by
df$Estimate <- replace(df$Estimate, df$Estimate == "P.H.Reps..1", "P.Rep1.nH.Rel.")
and then use pivot_wider which will give you NA only in column i.e P.Rel2.Reps.
tidyr::pivot_wider(df, names_from = Estimate, values_from = value)
NA values will result for any combination of categories for the new pivoted columns that aren't present in the original long data frame. For example, let's look at the rows of the long data frame with Estimate=="P.Rep1.nH.Rel.":
df %>% filter(Estimate=="P.Rep1.nH.Rel.")
Condition First Second Third Fourth ID Scenario Estimate value
1 Control Police Reviewer Journalist Spokesperson 1 J P.Rep1.nH.Rel. 8
2 Control Police Reviewer Journalist Spokesperson 1 R P.Rep1.nH.Rel. 12
3 Control Police Reviewer Journalist Spokesperson 1 S P.Rep1.nH.Rel. 10
Now look at the results of pivot_wider (I've kept only the relevant columns for brevity). Note in the output below that there's a missing value in the P.Rep1.nH.Rel. column. The missing value occurs when Scenario=="P" because the long data frame doesn't have a row for P.Rep1.nH.Rel. with Scenario=="P" resulting in a missing value in the wide data frame. Missing values are occurring in the P.H.Reps..1 column for a similar reason, as there's only one row with Estimate=="P.H.Reps..1 in the long data frame and it has Scenario=="P". Thus, the values are missing for the other three scenarios.
pivot_wider(df, names_from = Estimate, values_from = value) %>%
select(Condition:Scenario, P.Rep1.nH.Rel., P.H.Reps..1)
Condition First Second Third Fourth ID Scenario P.Rep1.nH.Rel. P.H.Reps..1
1 Control Police Reviewer Journalist Spokesperson 1 J 8 NA
2 Control Police Reviewer Journalist Spokesperson 1 P NA 91
3 Control Police Reviewer Journalist Spokesperson 1 R 12 NA
4 Control Police Reviewer Journalist Spokesperson 1 S 10 NA
This may be a data error, as suggested by #RonakShah, but if the data are correct then the NA values will naturally result when pivoting to wide format. You can fill the missing values with some other value by adding the argument values_fill=list(value=0) to pivot_wider (you can of course use any fill value you wish; I've just used 0 for illustration). Note that even if you use the values_fill argument, explicit missing values in the original long data will still be preserved in the wide data frame. Only missing values that result from the pivoting operation will be filled with a different value.
This question already has answers here:
Extract the maximum value within each group in a dataframe [duplicate]
(3 answers)
Closed 6 years ago.
I have a time series dataset DF where the first column is the timestep and the second column is the cellNo.. How can I drop all rows except the max(DF$cellno.) of each timestep?
> head(DF, n=100)
timestep cellNo.
1 1 1
2 1 2
3 1 3
4 1 4
5 1 5
6 1 6
7 1 7
8 1 8
9 1 9
10 1 10
11 1 11
12 1 12
13 1 13
14 1 14
15 1 15
16 1 16
17 1 17
18 1 18
19 1 19
20 1 20
21 1 21
22 1 22
23 1 23
24 1 24
25 1 25
26 1 26
27 1 27
28 1 28
29 1 29
30 1 30
31 1 31
32 1 32
33 2 1
34 2 2
35 2 3
36 2 4
37 2 5
38 2 6
39 2 7
40 2 8
41 2 9
42 2 10
43 2 11
44 2 12
45 2 13
46 2 14
47 2 15
48 2 16
49 2 17
50 2 18
51 2 19
52 2 20
53 2 21
54 2 22
55 2 23
56 2 24
57 2 25
58 2 26
59 2 27
60 2 28
61 2 29
62 2 30
63 2 31
64 2 32
65 3 1
66 3 2
67 3 3
68 3 4
69 3 5
70 3 6
71 3 7
72 3 8
73 3 9
74 3 10
75 3 11
76 3 12
77 3 13
78 3 14
79 3 15
80 3 16
81 3 17
82 3 18
83 3 19
84 3 20
85 3 21
86 3 22
87 3 23
88 3 24
89 3 25
90 3 26
91 3 27
92 3 28
93 3 29
94 3 30
95 3 31
96 3 32
97 4 1
98 4 2
99 4 3
100 4 4
If you want only max(cellno.) per timestep, you could do:
aggregate(cellNo.~timestep, DF, max)
# timestep cellNo.
# 1 1 32
# 2 2 32
# 3 3 32
# 4 4 4
Try this
# dput your data
df <- structure(list(timestep = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), cellNo. = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 1L, 2L, 3L, 4L)), .Names = c("timestep", "cellNo."), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46",
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57",
"58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68",
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79",
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90",
"91", "92", "93", "94", "95", "96", "97", "98", "99", "100"))
library(dplyr)
df %>% group_by(timestep) %>% summarise(max = max(cellNo.))
#Source: local data frame [4 x 2]
#timestep max
# (int) (int)
#1 1 32
#2 2 32
#3 3 32
#4 4 4
With data.table
library(data.table)
setDT(df1)[, .(Max = max(cellNo.)), timestep]
That's my data:
> head(data)
id C1 C2 C3 B1 B2 B3 Name
12 3 12 8 1 3 12 Agar
14 4 11 9 5 12 14 LB
18 7 17 6 7 14 16 YEF
20 9 15 4 3 11 17 KAN
so I used a melt function from reshape2 package to reorganize my data. Now it looks like that:
dt <- melt(data, measure.vars=2:7)
> head(dt)
n v variable value rt
1 id Name p C1 1
2 12 Agar p 3 2
3 14 LB p 4 3
4 18 YEF p 7 6
5 20 KAN p 9 3
6 id Name u C2 1
I did some calculations on my data and now there is an extra column. Let's call it "rt". I'd like to transform my data now to the previous "state" with this an extra column. Do you know any function which would be useful ?
dput(dt)
structure(list(n = structure(c(5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L), class = "factor", .Label = c("12", "14",
"18", "20", "id")), v = structure(c(4L, 1L, 3L, 5L, 2L, 4L, 1L,
3L, 5L, 2L, 4L, 1L, 3L, 5L, 2L, 4L, 1L, 3L, 5L, 2L, 4L, 1L, 3L,
5L, 2L, 4L, 1L, 3L, 5L, 2L), class = "factor", .Label = c("Agar",
"KAN", "LB", "Name", "YEF")), variable = structure(c(1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L), .Label = c("p",
"u", "k", "l", "t", "h"), class = "factor"), value = c("C1",
"3", "4", "7", "9", "C2", "12", "11", "17", "15", "C3", "8",
"9", "6", "4", "B1", "1", "5", "7", "3", "B2", "3", "12", "14",
"11", "B3", "12", "14", "16", "17")), .Names = c("n", "v", "variable",
"value"), row.names = c(NA, -30L), class = "data.frame")
In the "reshape2" universe, melt and *cast go hand-in-hand.
Here's an example of melting a data.frame and dcasting it back to its original form. You would need to take a similar approach with your data.
mydf <- data.frame(A = LETTERS[1:3], B = 1:3, C = 4:6)
mydf
# A B C
# 1 A 1 4
# 2 B 2 5
# 3 C 3 6
library(reshape2)
mDF <- melt(mydf, id.vars="A")
mDF
dcast(mDF, A ~ variable, value.var="value")
# A B C
# 1 A 1 4
# 2 B 2 5
# 3 C 3 6
In the dcast step, think of the items before the ~ as being the "id" variables, and those coming after as being the resulting column names. value.var should be the column from which the values will fill in the resulting "grid" created by the id variables and column names.