group by , summarise by frequency R [duplicate] - r

This question already has answers here:
How to select the rows with maximum values in each group with dplyr? [duplicate]
(6 answers)
Closed 2 years ago.
I want to group by data set based on some IDs, then leave the grouped data that has largest value in the column. Here is a description of my data set.
BSTN ASTN1 BSTN2 ASTN2 BSTN3 ASTN3 BSTN4 ASTN4 BSTN5 ASTN TRNID TRNID2 TRNID3 TRNID4 TRNID5 count
1 150 0 0 0 0 0 0 0 0 152 1674 0 0 0 0 1
2 150 0 0 0 0 0 0 0 0 152 1676 0 0 0 0 2
3 150 0 0 0 0 0 0 0 0 152 1678 0 0 0 0 2
4 150 0 0 0 0 0 0 0 0 152 1680 0 0 0 0 13
5 150 0 0 0 0 0 0 0 0 152 1682 0 0 0 0 3
6 150 0 0 0 0 0 0 0 0 152 1684 0 0 0 0 4
I want to group and summarise this data into a single row based on IDs the first 10 columns BSTN ASTN1 BSTN2 ASTN2 BSTN3 ASTN3 BSTN4 ASTN4 BSTN5 ASTN.
Then for the rest of the columns, TRNID TRNID2 TRNID3 TRNID4 TRNID5 I would like to replace them with the row with maximum value in column count.
What I want as my final output would look as below.
BSTN ASTN1 BSTN2 ASTN2 BSTN3 ASTN3 BSTN4 ASTN4 BSTN5 ASTN TRNID TRNID2 TRNID3 TRNID4 TRNID5 count
150 0 0 0 0 0 0 0 0 152 1680 0 0 0 0 13
How would summarise my data? I have 2,931,959 rows with more groups of BSTN, ASTNs.
dput(head(A_Routetable2))
structure(list(BSTN = c(150, 150, 150, 150, 150, 150), ASTN1 = c(0,
0, 0, 0, 0, 0), BSTN2 = c(0, 0, 0, 0, 0, 0), ASTN2 = c(0, 0,
0, 0, 0, 0), BSTN3 = c(0, 0, 0, 0, 0, 0), ASTN3 = c(0, 0, 0,
0, 0, 0), BSTN4 = c(0, 0, 0, 0, 0, 0), ASTN4 = c(0, 0, 0, 0,
0, 0), BSTN5 = c(0, 0, 0, 0, 0, 0), ASTN = c(152, 152, 152, 152,
152, 152), TRNID = c(1674, 1676, 1678, 1680, 1682, 1684), TRNID2 = c(0,
0, 0, 0, 0, 0), TRNID3 = c(0, 0, 0, 0, 0, 0), TRNID4 = c(0, 0,
0, 0, 0, 0), TRNID5 = c(0, 0, 0, 0, 0, 0), count = c(1L, 2L,
2L, 13L, 3L, 4L)), row.names = c(NA, -6L), groups = structure(list(
BSTN = c(150, 150, 150, 150, 150, 150), ASTN1 = c(0, 0, 0,
0, 0, 0), BSTN2 = c(0, 0, 0, 0, 0, 0), ASTN2 = c(0, 0, 0,
0, 0, 0), BSTN3 = c(0, 0, 0, 0, 0, 0), ASTN3 = c(0, 0, 0,
0, 0, 0), BSTN4 = c(0, 0, 0, 0, 0, 0), ASTN4 = c(0, 0, 0,
0, 0, 0), BSTN5 = c(0, 0, 0, 0, 0, 0), ASTN = c(152, 152,
152, 152, 152, 152), TRNID = c(1674, 1676, 1678, 1680, 1682,
1684), TRNID2 = c(0, 0, 0, 0, 0, 0), TRNID3 = c(0, 0, 0,
0, 0, 0), TRNID4 = c(0, 0, 0, 0, 0, 0), .rows = structure(list(
1L, 2L, 3L, 4L, 5L, 6L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))

You can group_by position and then select row with max value in count.
library(dplyr)
df %>% group_by(across(1:10)) %>% slice(which.max(count))
# BSTN ASTN1 BSTN2 ASTN2 BSTN3 ASTN3 BSTN4 ASTN4 BSTN5 ASTN TRNID TRNID2 TRNID3 TRNID4 TRNID5 count
# <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
#1 150 0 0 0 0 0 0 0 0 152 1680 0 0 0 0 13
Or group by range of columns
df %>% group_by(across(BSTN:ASTN)) %>%slice(which.max(count))
The dput shared by OP is grouped which results an error with across. We can ungroup the data first and run the above which runs without any error. However functions in the previous version of dplyr work without any error on it. For example - group_by_at
A_Routetable2 %>% group_by_at(1:10) %>% slice(which.max(count))

Related

Merge Tables in R

I'm trying to merge two data.frames in R so that the values in test_1 are overwritten if they exist in test_2
Every time I try and join/merge them I end up with a bunch of NAs.
I can't work out a simple logic for if.na then use test_2. Is that what I'd need to do, or is there an easier way?
test_1 = structure(list(rn = c("Red", "Blue",
"Green", "Yellow", "Pink", "Gold"
), X2022.08.01 = c(0, 0, 0, 0, 0, 0), X2022.08.02 = c(0, 0, 0,
0, 0, 0), X2022.08.03 = c(0, 0, 0, 0, 0, 0), X2022.08.04 = c(0,
0, 0, 0, 0, 0), X2022.08.05 = c(0, 0, 0, 0, 0, 0), X2022.08.08 = c(0,
0, 0, 0, 0, 0), X2022.08.09 = c(0, 0, 0, 0, 0, 0), X2022.08.10 = c(0,
0, 0, 0, 0, 0), X2022.08.11 = c(0, 0, 0, 0, 0, 0), X2022.08.12 = c(0,
0, 0, 0, 0, 0), X2022.08.15 = c(0, 0, 0, 0, 0, 0), X2022.08.16 = c(0,
0, 0, 0, 0, 0), X2022.08.17 = c(0, 0, 0, 0, 0, 0), X2022.08.18 = c(0,
0, 0, 0, 0, 0), X2022.08.19 = c(0, 0, 0, 0, 0, 0), X2022.08.22 = c(0,
0, 0, 0, 0, 0), X2022.08.23 = c(0, 0, 0, 0, 0, 0), X2022.08.24 = c(0,
0, 0, 0, 0, 0), X2022.08.25 = c(0, 0, 0, 0, 0, 0), X2022.08.26 = c(0,
0, 0, 0, 0, 0), X2022.08.29 = c(0, 0, 0, 0, 0, 0), X2022.08.30 = c(0,
0, 0, 0, 0, 0), X2022.08.31 = c(0, 0, 0, 0, 0, 0)), row.names = c(NA,
6L), class = "data.frame")
test_2 = structure(list(rn = c("Blue", "Pink",
"Red", "Yellow", "Green", "Gold"
), X2022.08.01 = c(10, 10, 10, 10, 10, 10), X2022.08.03 = c(10, 10, 10,
10, 10, 10), X2022.08.04 = c(10, 10, 10, 10, 10, 10), X2022.08.05 = c(10,
10, 10, 10, 10, 10), X2022.08.26 = c(10, 10, 10, 10, 10, 10)), row.names = c(NA,
6L), class = "data.frame")
the desired output would look like this:
test_output = structure(list(rn = c("Red", "Blue",
"Green", "Yellow", "Pink", "Gold"
), X2022.08.01 = c(10, 10, 10, 10, 10, 10), X2022.08.02 = c(0, 0, 0,
0, 0, 0), X2022.08.03 = c(10, 10, 10, 10, 10, 10), X2022.08.04 = c(10,
10, 10, 10, 10, 10), X2022.08.05 = c(10, 10, 10, 10, 10, 10), X2022.08.08 = c(0,
0, 0, 0, 0, 0), X2022.08.09 = c(0, 0, 0, 0, 0, 0), X2022.08.10 = c(0,
0, 0, 0, 0, 0), X2022.08.11 = c(0, 0, 0, 0, 0, 0), X2022.08.12 = c(0,
0, 0, 0, 0, 0), X2022.08.15 = c(0, 0, 0, 0, 0, 0), X2022.08.16 = c(0,
0, 0, 0, 0, 0), X2022.08.17 = c(0, 0, 0, 0, 0, 0), X2022.08.18 = c(0,
0, 0, 0, 0, 0), X2022.08.19 = c(0, 0, 0, 0, 0, 0), X2022.08.22 = c(0,
0, 0, 0, 0, 0), X2022.08.23 = c(0, 0, 0, 0, 0, 0), X2022.08.24 = c(0,
0, 0, 0, 0, 0), X2022.08.25 = c(0, 0, 0, 0, 0, 0), X2022.08.26 = c(10,
10, 10, 10, 10, 10), X2022.08.29 = c(0, 0, 0, 0, 0, 0), X2022.08.30 = c(0,
0, 0, 0, 0, 0), X2022.08.31 = c(0, 0, 0, 0, 0, 0)), row.names = c(NA,
6L), class = "data.frame")
If you use the column names of the source as indices on both sides of the assignment, you can get replacement with
test_1[ ,colnames(test_2)] <- test_2[ , colnames(test_2)]
test_1
rn X2022.08.01 X2022.08.02 X2022.08.03 X2022.08.04 X2022.08.05 X2022.08.08 X2022.08.09
1 Blue 10 0 10 10 10 0 0
2 Pink 10 0 10 10 10 0 0
3 Red 10 0 10 10 10 0 0
4 Yellow 10 0 10 10 10 0 0
5 Green 10 0 10 10 10 0 0
6 Gold 10 0 10 10 10 0 0
X2022.08.10 X2022.08.11 X2022.08.12 X2022.08.15 X2022.08.16 X2022.08.17 X2022.08.18 X2022.08.19
1 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0
X2022.08.22 X2022.08.23 X2022.08.24 X2022.08.25 X2022.08.26 X2022.08.29 X2022.08.30 X2022.08.31
1 0 0 0 0 10 0 0 0
2 0 0 0 0 10 0 0 0
3 0 0 0 0 10 0 0 0
4 0 0 0 0 10 0 0 0
5 0 0 0 0 10 0 0 0
6 0 0 0 0 10 0 0 0
I doubt there could be a more simple method.. It could even work with a more limited number of rows as long as the indices area proper subset of the column and row names of the target matrix or dataframe. Note: I'm not understanding the issue with NA's. There were not NA's in either structure.

Filtering a data frame by column names

i have two dataframes with another sizes but partly same row and column names.
I want to filter tpm_datExpr by finding the common column and row names with datTraits.
So I want my tpm_datExpr dataframe to have 278 columns.
> colnames(tpm_datExpr)[1:10]
[1] "D5247_S53_L006" "D5248_S54_L006" "D5249_S67_L008" "E02874_L1_S1_L001"
[5] "E02875_L1_S2_L001" "E02876_L1_S3_L001" "E02877_L1_S4_L001" "E02878_L1_S5_L001"
[9] "E02879_L1_S6_L001" "E02880_L1_S7_L001"
> rownames(datTraits)[1:10]
[1] "D5247_S53_L006" "D5248_S54_L006" "D5249_S67_L008" "E02874_L1_S1_L001"
[5] "E02875_L1_S2_L001" "E02876_L1_S3_L001" "E02877_L1_S4_L001" "E02878_L1_S5_L001"
[9] "E02879_L1_S6_L001" "E02880_L1_S7_L001"
> ncol(tpm_datExpr)
[1] 623
> nrow(datTraits)
[1] 278
You may use intersect on rownames and names. Example:
df1[intersect(rownames(df1), rownames(df2)),
intersect(names(df1), names(df2))]
# X1 X5 X6
# 2 0 0 0
# 4 0 0 0
# 8 0 0 0
# 9 0 0 0
Data:
df1 <- structure(list(X1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), X2 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), X3 = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0), X4 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), X5 = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0), X6 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), class = "data.frame", row.names = c(NA,
-10L))
df2 <- structure(list(X1 = c(0, 0, 0, 0), X5 = c(0, 0, 0, 0), X6 = c(0,
0, 0, 0)), row.names = c(2L, 4L, 8L, 9L), class = "data.frame")

Sorting overlapping categorical variables in {gtsummary}

require(gtsummary)
test <- structure(list(`1` = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0), `2` = c(1,0, 0, 0, 0, 1, 0, 1, 0, 0), `3` = c(0, 0, 0, 0, 0, 0, 0, 0, 0,0), `4` = c(1, 1, 0, 0, 1, 0, 0, 0, 0, 0), `5` = c(1, 0, 1, 1,0, 1, 1, 0, 0, 0), `6` = c(0, 0, 0, 1, 0, 0, 1, 0, 0, 0), `7` = c(0,0, 0, 0, 0, 0, 0, 0, 0, 0), `8` = c(0, 0, 0, 0, 0, 0, 0, 0, 0,0), `9` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `10` = c(0, 0, 0,0, 0, 0, 0, 0, 0, 1)), row.names = c(NA, -10L), class = c("tbl_df","tbl", "data.frame"))
In this example data, I have 10 categorical variables.
`1` `2` `3` `4` `5` `6` `7` `8` `9` `10`
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 1 0 1 1 0 0 0 0 0
2 0 0 0 1 0 0 0 0 0 0
3 0 0 0 0 1 0 0 0 0 0
4 0 0 0 0 1 1 0 0 0 0
5 0 0 0 1 0 0 0 0 0 0
6 0 1 0 0 1 0 0 0 0 0
7 0 0 0 0 1 1 0 0 0 0
8 0 1 0 0 0 0 0 0 0 0
9 1 0 0 0 0 0 0 0 0 0
10 0 0 0 0 0 0 0 0 0 1
Since they can overlap each other, I have put them in different columns,
using 0 and 1, indicatting "yes" or "no" to having (or not having) the categorical variable.
When test %>% tbl_summary(), it creates:
I would like to sort this by frequency, but
test %>% tbl_summary(sort = list(everything() ~ "frequency"))
does not work.
Is there anyway to do this?
Thank you in advance.
The tbl_summary(sort=) argument sorts levels within a variable, not the order the variables appear in the table. Variables are appear in the table in the same order they appear in the data frame.
We can update the order in the data frame using the code below.
library(gtsummary)
#> #Uighur
packageVersion("gtsummary")
#> [1] '1.5.0'
test <- structure(list(`1` = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0), `2` = c(1,0, 0, 0, 0, 1, 0, 1, 0, 0), `3` = c(0, 0, 0, 0, 0, 0, 0, 0, 0,0), `4` = c(1, 1, 0, 0, 1, 0, 0, 0, 0, 0), `5` = c(1, 0, 1, 1,0, 1, 1, 0, 0, 0), `6` = c(0, 0, 0, 1, 0, 0, 1, 0, 0, 0), `7` = c(0,0, 0, 0, 0, 0, 0, 0, 0, 0), `8` = c(0, 0, 0, 0, 0, 0, 0, 0, 0,0), `9` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `10` = c(0, 0, 0,0, 0, 0, 0, 0, 0, 1)), row.names = c(NA, -10L), class = c("tbl_df","tbl", "data.frame"))
# order variables by prevelence
prev <- purrr::map_dbl(test, mean) %>% sort(decreasing = TRUE)
test %>%
select(all_of(names(prev))) %>%
tbl_summary() %>%
as_kable() # convert to kable for SO
Characteristic
N = 10
5
5 (50%)
2
3 (30%)
4
3 (30%)
6
2 (20%)
1
1 (10%)
10
1 (10%)
3
0 (0%)
7
0 (0%)
8
0 (0%)
9
0 (0%)
Created on 2021-12-10 by the reprex package (v2.0.1)

Include all variables in tsibble formula

I want to fit a linear regression model using the tsibble package and I have a bunch of dummy variables that I want to include in my analysis. A sample dataset would be the following:
library(tsibble)
library(dplyr)
library(fable)
ex = structure(list(id = c("KEY1", "KEY1", "KEY1", "KEY1", "KEY1",
"KEY1", "KEY1", "KEY1", "KEY1", "KEY1", "KEY1", "KEY1", "KEY1",
"KEY1", "KEY1"), sales = c(0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), date = structure(c(15003, 15004, 15005, 15006, 15007,
15008, 15009, 15010, 15011, 15012, 15013, 15014, 15015, 15016,
15017), class = "Date"), wday = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L), dummy_1 = c(0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), dummy_2 = c(0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0), dummy_3 = c(0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -15L), key = structure(list(
id = "KEY1", .rows = list(1:15)), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), index = structure("date", ordered = TRUE), index2 = "date", interval = structure(list(
year = 0, quarter = 0, month = 0, week = 0, day = 1, hour = 0,
minute = 0, second = 0, millisecond = 0, microsecond = 0,
nanosecond = 0, unit = 0), class = "interval"), class = c("tbl_ts",
"tbl_df", "tbl", "data.frame"))
> ex
# A tsibble: 15 x 7 [1D]
# Key: id [1]
id sales date wday dummy_1 dummy_2 dummy_3
<chr> <dbl> <date> <int> <dbl> <dbl> <dbl>
1 KEY1 0 2011-01-29 1 0 0 0
2 KEY1 5 2011-01-30 2 0 0 0
3 KEY1 0 2011-01-31 3 0 0 1
4 KEY1 0 2011-02-01 4 1 0 0
5 KEY1 0 2011-02-02 5 0 0 0
6 KEY1 0 2011-02-03 6 0 0 0
7 KEY1 0 2011-02-04 7 0 1 0
8 KEY1 0 2011-02-05 1 0 0 0
9 KEY1 0 2011-02-06 2 0 0 0
10 KEY1 0 2011-02-07 3 0 0 0
11 KEY1 0 2011-02-08 4 0 0 0
12 KEY1 0 2011-02-09 5 0 0 0
13 KEY1 0 2011-02-10 6 0 0 0
14 KEY1 0 2011-02-11 7 0 0 0
15 KEY1 0 2011-02-12 1 0 0 0
They are too many dummies to specify manually so I was hoping for something faster. Normally I would use the . symbol in the formula in the following way:
fit = ex %>%
model(TSLM(sales ~ trend() + season() + .))
But this does not work:
Warning message:
1 error encountered for TSLM(sales ~ trend() + season() + .)
[1] '.' in formula and no 'data' argument
Is there a systematic tsibble way around this or do I have to create the formula on the fly using the names of the dataset?
We could create a formula with reformulate using the 'dummy' column names
nm1 <- names(ex)[startsWith(names(ex), 'dummy')]
ex %>%
model(lm = TSLM(reformulate(c(nm1, 'trend()', 'season()'), 'sales') ))

Plot the specific rows on the same graph

I use that function to plot all of the rows from my data frame into pdf file.
library(gplots)
# open the pdf file
pdf(file='Plots of all rows.pdf')
x <- 1:(ncol(tbl_alles)-1)
for(i in 1:nrow(tbl_alles)){
# plot onto a new pdf page
plot(x=x,y=tbl_alles[i,-1],type='b',main=tbl_alles[i,1],xlab='X',ylab='Y')
}
# close the pdf file
dev.off()
How to just plot like 3 rows on the same graph with different colours ?
As a legend use the name of the row...
my data:
> dput(head(tbl_alles))
structure(list(`10` = c(0, 0, 0, 0, 0, 0), `20` = c(0, 0, 0,
0, 0, 0), `52.5` = c(0, 0, 0, 0, 0, 0), `81` = c(0, 0, 1, 0,
0, 0), `110` = c(0, 0, 0, 0, 0, 0), `140.5` = c(0, 0, 0, 0, 0,
0), `189` = c(0, 0, 0, 0, 0, 0), `222.5` = c(0, 0, 0, 0, 0, 0
), `278` = c(0, 0, 0, 0, 0, 0), `340` = c(0, 0, 0, 0, 0, 0),
`397` = c(0, 1, 0, 0, 0, 0), `453.5` = c(0, 0.66069369, 0,
0, 0, 1), `529` = c(0, 0.521435654, 0, 0, 1, 0), `580` = c(0,
0.437291195, 0, 0, 1, 0), `630.5` = c(0, 0.52204783, 0, 0,
0, 0), `683.5` = c(0, 0.52429838, 0, 0, 0, 0), `735.5` = c(1,
0.3768651, 0, 1, 0, 0), `784` = c(0, 0, 0, 0, 0, 0), `832` = c(0,
0, 0, 0, 0, 0), `882.5` = c(0, 0, 0, 0, 0, 0), `926.5` = c(0,
0, 0, 0, 0, 0), `973` = c(0, 0, 0, 0, 0, 0), `1108` = c(0,
0, 0, 0, 0, 0), `1200` = c(0, 0, 0, 0, 0, 0)), .Names = c("10",
"20", "52.5", "81", "110", "140.5", "189", "222.5", "278", "340",
"397", "453.5", "529", "580", "630.5", "683.5", "735.5", "784",
"832", "882.5", "926.5", "973", "1108", "1200"), row.names = c("at1g01050.1",
"at1g01080.1", "at1g01090.1", "at1g01220.1", "at1g01420.1", "at1g01470.1"
), class = "data.frame")
That's an output which I would like to see. Just put both plots on the some graph with different colours.
Edit:
> tbl_alles[1066,]
10 20 52.5 81 110 140.5 189 222.5 278 340 397 453.5 529 580 630.5 683.5 735.5 784 832
at3g01510.1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
882.5 926.5 973 1108 1200
at3g01510.1 0 0 0 0 0
> tbl_alles[2269,]
10 20 52.5 81 110 140.5 189 222.5 278 340 397 453.5 529 580 630.5 683.5 735.5 784 832
at5g26570.1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1
882.5 926.5 973 1108 1200
at5g26570.1 0 0 0 0 0
> tbl_alles[109,]
10 20 52.5 81 110 140.5 189 222.5 278 340 397 453.5 529 580 630.5 683.5 735.5
at1g10760.1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.5707348 0.8569183
784 832 882.5 926.5 973 1108 1200
at1g10760.1 0.9070698 1 0 0 0 0 0
That's what I would like to achieve:
I always forget about matplot, I'm not familiar with gplot, so I think it fits this well
Here's a small df I made to replicate your problem
df<-structure(list(`10` = c(-0.371350351694367, -1.25087314646208,
-0.657843532649542), `20` = c(-1.1851240454548, -2.78219491586262,
0.710133251591709), `52.5` = c(1.68304915266843, -0.496047552485386,
0.450948253352661), `81` = c(0.129735967979554, 0.254350517817915,
-0.18288033694209), `110` = c(-1.13792416656106, 0.685305257987392,
-0.00333217347885503), `140.5` = c(-3.60035706471287, 0.147934251860607,
1.21899119774361), `189` = c(0.20579357437275, -0.287714362235557,
-0.116174227557464), `222.5` = c(-2.27228706498774, 1.14330151676478,
0.437320821753322), `278` = c(0.0184198982292088, 1.03280897369263,
-0.809374548090546), `340` = c(1.10314005160547, -0.423635755487127,
1.69263287858465), `397` = c(1.11000715987197, -0.666713965188873,
-0.616331157049669), `453.5` = c(-0.21763982556254, -0.668405154120432,
-0.101290244002727), `529` = c(-1.23612437632145, 0.596727546451954,
1.65926804193271), `580` = c(-1.36914007441172, 0.553436632958187,
0.370452646201169), `630.5` = c(-1.28739943545904, 1.51605377604701,
0.535674548844182), `683.5` = c(0.0380431318762389, 0.259944835916881,
-1.12267356731606), `735.5` = c(0.307116139352162, 0.619942543650423,
1.43847332323359), `784` = c(0.517011770731407, -0.271348876993244,
0.382706886840812), `832` = c(0.358756221875511, 0.902328658122764,
2.19653579973421), `882.5` = c(0.60565816196684, -1.69443962691366,
-0.338433483486653), `926.5` = c(2.15044754686289, -0.979574461038407,
0.116260893315264), `973` = c(-1.3051680247044, -0.735063827396212,
-1.55018820456708), `1108` = c(-0.108476761260576, 1.21094890415222,
-1.04130290709525), `1200` = c(-0.963125050259433, -1.12921931676616,
-0.357160373571803)), .Names = c("10", "20", "52.5", "81", "110",
"140.5", "189", "222.5", "278", "340", "397", "453.5", "529",
"580", "630.5", "683.5", "735.5", "784", "832", "882.5", "926.5",
"973", "1108", "1200"), row.names = c("at1g01050.1", "at1g01080.1",
"at1g01090.1"), class = "data.frame")
Now your code, notice I use matplot, transpose the df, convert it to a matrix (for matplot), and specify a type for each of the data sets we'll be plotting
library(gplots)
# open the pdf file
pdf(file='Plots of all rows.pdf')
rowsToPlot<-c(1,2,3)
# plot onto a new pdf page
matplot(as.matrix(t(df[rowsToPlot,])),type=rep("l", length(rowsToPlot)), col=rainbow(length(rowsToPlot)))
# close the pdf file
dev.off()
And my picture

Resources