Display second Y axis using dygraph - r

Trying to have two Y axis with different scales. The second Y axis scale doesn't show and the data plotted almost entirely off screen. This is what I have:
dygraph(bmsp1, main = "Black MO SP")%>%
dyAxis("y", label = "Depth (m) ", valueRange = c(0, 1.0))%>%
dyAxis("y2", label = "Temp (c) ", valueRange = c(0, 25.0))
Plot of depth and temp
I also tried this but get the error:
dygraph(bmsp1, main = "Black MO SP")%>%
+ dyAxis("y", label = "Depth (m) ", valueRange = c(0, 1.0))%>%
+ dyAxis("y2", label = "Temp (c) ", valueRange = c(0, 25.0))%>%
+ dyAxis("Temp", axis('y2'))
Error in dyAxis(., "Temp", axis("y2"))
I haven't figured out how to add the data using the dput() (file size too large). Here is snapshot from head()
> head(bmsp1)
Depth Temp (c)
2015-09-30 09:00:00 0.003 21.378
2015-09-30 09:15:00 0.228 17.475
2015-09-30 09:30:00 0.228 17.475
2015-09-30 09:45:00 0.224 17.475
2015-09-30 10:00:00 0.225 17.475
2015-09-30 10:15:00 0.224 17.475
Here is dput() for 75 rows (I think).
> dput(head(bmsp1, 75))
structure(c(0.003, 0.228, 0.228, 0.224, 0.225, 0.224, 0.227,
0.226, 0.23, 0.218, 0.223, 0.224, 0.229, 0.226, 0.226, 0.222,
0.228, 0.233, 0.233, 0.233, 0.232, 0.225, 0.217, 0.209, 0.204,
0.212, 0.222, 0.212, 0.23, 0.224, 0.216, 0.228, 0.231, 0.23,
0.223, 0.223, 0.232, 0.224, 0.223, 0.225, 0.224, 0.219, 0.215,
0.211, 0.211, 0.215, 0.221, 0.213, 0.216, 0.222, 0.222, 0.224,
0.217, 0.212, 0.214, 0.212, 0.209, 0.21, 0.207, 0.207, 0.206,
0.205, 0.204, 0.204, 0.203, 0.198, 0.197, 0.199, 0.194, 0.184,
0.179, 0.189, 0.195, 0.192, 0.19, 21.378, 17.475, 17.475, 17.475,
17.475, 17.475, 17.475, 17.475, 17.475, 17.475, 17.475, 17.475,
17.475, 17.475, 17.475, 17.475, 17.475, 17.475, 17.475, 17.475,
17.475, 17.57, 17.57, 17.57, 17.57, 17.57, 17.475, 17.57, 17.475,
17.475, 17.475, 17.475, 17.475, 17.475, 17.475, 17.475, 17.475,
17.475, 17.379, 17.379, 17.379, 17.379, 17.379, 17.379, 17.379,
17.379, 17.284, 17.284, 17.284, 17.284, 17.284, 17.284, 17.189,
17.189, 17.189, 17.189, 17.094, 17.094, 17.094, 17.094, 16.999,
16.999, 16.999, 16.999, 16.903, 16.903, 16.903, 16.903, 16.903,
16.808, 16.808, 16.808, 16.808, 16.713, 16.713), .indexTZ = "UTC", .indexCLASS = c("POSIXct",
"POSIXt"), tclass = c("POSIXct", "POSIXt"), tzone = "UTC", class = c("xts",
"zoo"), index = structure(c(1443603600, 1443604500, 1443605400,
1443606300, 1443607200, 1443608100, 1443609000, 1443609900, 1443610800,
1443611700, 1443612600, 1443613500, 1443614400, 1443615300, 1443616200,
1443617100, 1443618000, 1443618900, 1443619800, 1443620700, 1443621600,
1443622500, 1443623400, 1443624300, 1443625200, 1443626100, 1443627000,
1443627900, 1443628800, 1443629700, 1443630600, 1443631500, 1443632400,
1443633300, 1443634200, 1443635100, 1443636000, 1443636900, 1443637800,
1443638700, 1443639600, 1443640500, 1443641400, 1443642300, 1443643200,
1443644100, 1443645000, 1443645900, 1443646800, 1443647700, 1443648600,
1443649500, 1443650400, 1443651300, 1443652200, 1443653100, 1443654000,
1443654900, 1443655800, 1443656700, 1443657600, 1443658500, 1443659400,
1443660300, 1443661200, 1443662100, 1443663000, 1443663900, 1443664800,
1443665700, 1443666600, 1443667500, 1443668400, 1443669300, 1443670200
), tzone = "UTC", tclass = c("POSIXct", "POSIXt")), .Dim = c(75L,
2L), .Dimnames = list(NULL, c("Depth", "Temp")))

this will work. there was "=" missing in your last line.
dygraph(bmsp1, main = "Black MO SP")%>%
dyAxis("y", label = "Depth", valueRange = c(0, 1.0), independentTicks = TRUE)%>%
dyAxis("y2", label = "Temp ", valueRange = c(0, 25.0), independentTicks = TRUE) %>%
dySeries("Temp", axis=('y2'))

Related

How do you filter out individuals on a figure after creating a PCA plot in Factoextra?

I am a research student coming to grips with R for the first time.
I am trying to make a PCA plot from a series of body measurements, the specimens names and a subspecies tag (BIN) are in sperate columns. The BIN column contains the BIN ID for each sample.
The difficulty I am facing is filtering out individuals with certain BIN's.
My desired output is to produce a PCA plot identical to the one below but only displaying the named BIN's ("ACZ5516", "ADF3772") and not the remaining BIN's.
Revised image
#import data set
Anotylus<-read.csv("DataSO.csv", header = TRUE, sep = ",",
row.names = 1)
#row.names sets specimen ID as specimen name
#set BIN as factor
Anotylus$BIN<-as.factor(Anotylus$BIN)
# Number of BINs and number of individuals in each
table(Anotylus["BIN"])
#create PCA of data set, excludes column for BIN (column 12)
Ano.pca<-PCA(Anotylus[,c(1:11)], graph = FALSE)
#visualise PCA with all individuals in the d.f.
fviz_pca_ind(Ano.pca,
geom.ind = "point",
col.ind = Anotylus$BIN,
repel = TRUE,
legend.title = "BIN",
addEllipses = TRUE)
#With individuals from selected BINs
top<-list(name=c("ACZ5516", "ADF3772"))
fviz_pca_ind(Ano.pca,
geom.ind = "point",
col.ind = Anotylus$BIN,#
select.ind = top,
repel = TRUE,
legend.title = "BIN",
addEllipses = TRUE)
#no samples visible at all
#wouild like to see only the two named
I have tried using a subset of the data but the Principal Components variation changes and produces different a result.
How do I filter the individuals displayed to a curated list?
Any advice or guidance is deeply appreciated!
Best,
Dante
Sample data set below
> dput(Anotylus)
structure(list(Total.Anten.Length..mm. = c(0.66, 0.635, 0.676,
0.559, 1.249, 0.675, 0.704, 0.649, 0.661, 0.795, 0.836, 0.888,
0.941, 0.781, 0.899, 0.918, 0.854, 0.834, 0.888, 0.884, 0.879,
0.776, 0.954, 0.853, 0.96, 0.527, 0.515, 0.653, 0.491, 0.474,
0.538, 0.694, 1.01, 0.53, 0.641, 0.509, 0.918, 0.849, 0.452,
0.536), Body.Length...mm. = c(1.842, 1.664, 1.901, 1.917, 3.061,
1.961, 1.862, 1.99, 1.85, 1.449, 2.455, 2.077, 2.578, 2.478,
2.798, 2.589, 2.291, 2.882, 2.472, 2.55, 2.53, 2.757, 2.689,
2.166, 2.894, 1.944, 1.48, 2.385, 1.715, 1.674, 1.532, 2.27,
2.598, 1.677, 1.67, 1.68, 2.374, 2.877, 1.699, 1.656),
Eye.Area..mm2. = c(0.01,
0.009, 0.01, 0.006, 0.026, 0.007, 0.01, 0.01, 0.009, 0.006, 0.016,
0.014, 0.015, 0.018, 0.02, 0.016, 0.019, 0.015, 0.013, 0.011,
0.015, 0.014, 0.017, 0.014, 0.012, 0.007, 0.006, 0.02, 0.007,
0.006, 0.005, 0.013, 0.013, 0.006, 0.007, 0.005, 0.013, 0.006,
0.008, 0.005), Eye.Width..mm. = c(0.046, 0.036, 0.054, 0.033,
0.071, 0.04, 0.046, 0.047, 0.044, 0.05, 0.059, 0.053, 0.073,
0.063, 0.068, 0.051, 0.044, 0.07, 0.064, 0.061, 0.054, 0.042,
0.038, 0.059, 0.059, 0.043, 0.046, 0.079, 0.037, 0.035, 0.037,
0.054, 0.047, 0.045, 0.045, 0.028, 0.05, 0.037, 0.043, 0.045),
Head.Width..mm. = c(0.359, 0.362, 0.377, 0.317, 0.731, 0.456,
0.38, 0.414, 0.359, 0.453, 0.568, 0.449, 0.519, 0.517, 0.516,
0.515, 0.512, 0.513, 0.511, 0.456, 0.503, 0.474, 0.598, 0.453,
0.574, 0.309, 0.306, 0.574, 0.314, 0.298, 0.295, 0.386, 0.557,
0.289, 0.318, 0.306, 0.505, 0.291, 0.298, 0.263),
Pronotum.Width..mm. = c(0.413,
0.455, 0.439, 0.352, 0.741, 0.462, 0.467, 0.461, 0.442, 0.493,
0.573, 0.549, 0.584, 0.617, 0.632, 0.61, 0.614, 0.624, 0.631,
0.533, 0.587, 0.562, 0.609, 0.522, 0.621, 0.342, 0.341, 0.598,
0.336, 0.314, 0.331, 0.467, 0.547, 0.343, 0.342, 0.317, 0.545,
0.328, 0.329, 0.284), Pronotum.Length..mm. = c(0.304, 0.326,
0.334, 0.24, 0.48, 0.317, 0.303, 0.329, 0.302, 0.36, 0.418,
0.383, 0.424, 0.428, 0.399, 0.442, 0.404, 0.461, 0.435, 0.376,
0.393, 0.403, 0.373, 0.41, 0.435, 0.259, 0.247, 0.403, 0.257,
0.252, 0.23, 0.387, 0.388, 0.248, 0.26, 0.215, 0.336, 0.223,
0.231, 0.247), Elytra.Width..mm. = c(0.558, 0.552, 0.586,
0.43, 0.854, 0.506, 0.528, 0.586, 0.548, 0.54, 0.75, 0.716,
0.794, 0.816, 0.746, 0.82, 0.786, 0.8, 0.722, 0.69, 0.758,
0.766, 0.736, 0.668, 0.852, 0.468, 0.462, 0.741, 0.461, 0.323,
0.406, 0.637, 0.617, 0.41, 0.366, 0.422, 0.718, 0.42, 0.408,
0.278), Elytra.Length..mm. = c(0.469, 0.437, 0.386, 0.346,
0.631, 0.428, 0.464, 0.451, 0.445, 0.532, 0.583, 0.543, 0.558,
0.62, 0.625, 0.623, 0.613, 0.605, 0.623, 0.588, 0.606, 0.48,
0.568, 0.568, 0.598, 0.373, 0.352, 0.516, 0.365, 0.326, 0.327,
0.502, 0.464, 0.346, 0.344, 0.319, 0.519, 0.346, 0.329, 0.346
), Pronotum.Value = c(0.288, 0.319, 0.306, 0.331, 0.179,
0.278, 0.224, 0.211, 0.204, 0.273, 0.26, 0.33, 0.241, 0.218,
0.203, 0.209, 0.241, 0.227, 0.31, 0.236, 0.341, 0.288, 0.283,
0.263, 0.279, 0.173, 0.162, 0.22, 0.183, 0.209, 0.193, 0.185,
0.236, 0.181, 0.172, 0.227, 0.275, 0.164, 0.21, 0.217),
Elytra.Value = c(0.314,
0.319, 0.393, 0.243, 0.205, 0.297, 0.21, 0.205, 0.244, 0.359,
0.288, 0.335, 0.375, 0.291, 0.243, 0.238, 0.288, 0.283, 0.351,
0.271, 0.48, 0.415, 0.325, 0.294, 0.193, 0.182, 0.271, 0.237,
0.216, 0.246, 0.214, 0.193, 0.233, 0.205, 0.18, 0.262, 0.225,
0.176, 0.303, 0.251), BIN = structure(c(1L, 1L, 1L, 3L, 8L,
1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 7L, 3L, 3L, 6L, 3L, 3L, 3L, 2L, 5L, 3L, 3L,
3L, 5L, 3L, 3L, 3L), .Label = c("ACZ5516", "ACZ5742", "ADF3772",
"ADF4138", "ADG1201", "ADH9095", "ADI3175", "ADR2790"), class =
"factor")), row.names = c("CCDB-22214-D03",
"CCDB-22214-D06", "CCDB-22214-D08", "CCDB-22214-G09", "CCDB-22214-
H02",
"CCDB-22214-H09", "CCDB-22215-A11", "CCDB-22215-A12", "CCDB-22215-
F04",
"CCDB-23850-B07", "CCDB-23851-C04", "CCDB-23851-C05", "CCDB-23851-
C11",
"CCDB-23851-C12", "CCDB-23851-D02", "CCDB-23851-D03", "CCDB-23851-
D04",
"CCDB-23851-D06", "CCDB-23851-E08", "CCDB-23851-E09", "CCDB-23851-
E11",
"CCDB-23851-F03", "CCDB-23851-G05", "CCDB-23851-G09", "CCDB-23858-
B08",
"CCDB-23858-G12", "CCDB-23858-H01", "CCDB-23859-B10", "CCDB-23859-
E07",
"CCDB-23859-E10", "CCDB-23859-E11", "CCDB-25504-E04", "CCDB-25505-
E02",
"CCDB-25510-B12", "CCDB-25510-D02", "CCDB-25510-E09", "CCDB-25511-
B06",
"CCDB-25511-B12", "CCDB-25511-E11", "CCDB-25512-E12"), class =
"data.frame")
Apparently factoextra "produces ggplot2-based elegant data visualization with less typing". From what I can tell, fviz_pca_ind is essentially plotting the PCA coordinate for each individual point, and compute a multivariate normal distribution as an ellipse.
Here's the replication of the plot you have attached in stripped down ggplot code:
#constructing a plotting data frame with the BIN identifier and each pca qualitative coordinates
df <- cbind.data.frame(BIN = Anotylus$BIN, Ano.pca$ind$coord)
ggplot(df, aes(x=Dim.1, y=Dim.2, color=BIN)) +
geom_point() +
stat_ellipse(type="norm")
Note that as there are only 1 or 2 points for all BIN other than ACZ5516 and ADF3772, there will be "Too few points to calculate an ellipse" and as such no ellipse is plotted.
In order to "hide" the other BIN in your figure, you can either just plot the BIN you wanted or you can create a new grouping (ACZ5516, ADF3772 and others) in the plotting data and set the points you do not want to focus on in less visible colour.
library(dplyr)
# Plot only BIN ACZ5516 and ADF3772
df %>%
filter(BIN %in% c("ACZ5516", "ADF3772")) %>%
ggplot(aes(x=Dim.1, y=Dim.2, color=BIN)) +
geom_point() +
stat_ellipse(type="norm")
# Create a new grouping for BIN other than ACZ5516 and ADF3772
df2 <- df %>%
mutate(BIN = ifelse(BIN %in% c("ACZ5516", "ADF3772"), as.character(BIN), "Others"))
df2 %>%
ggplot(aes(x=Dim.1, y=Dim.2, color=BIN)) +
geom_point() +
stat_ellipse(data = df %>% filter(BIN %in% c("ACZ5516", "ADF3772")), type="norm") +
scale_colour_manual(values = c("darkgreen", "orange", "gray"))

Multiple t-test on independent group with a large dataframe

I've seen many similar posts but the vast majority of them are at least 3 years old and I'm not really sure they apply to my situations, so here we go.
A colleague asked for my help on a multiple t-test on her project.
Basically she has 20 observation x 30 variable dataframe that looks like this:
| Group | Lipid 1 | Lipid 2 | ... | Lipid 28|
| -------- | -------------- |
| A |
|B |
| |
|B |
What we want to do is a group comparison of each lipide (meaning a t-test for Lipide 1 between group A and B, then a t-test for Lipide 2 and so on).
We do not want to compare Lipids between them.
And of course, we'd like to not have to copy/paste the same 3 lines of code, especially since we've got 2 other dataframe with the same variable but different conditions.
I've tried one solution I saw in here but it gives me an error I'm not sure to understand:
sapply(foetal[,2:20], function(i) t.test(i ~ foetal$ID))
Error in if (stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) stop("data are essentially constant") : missing value where TRUE/FALSE needed In addition: Warning messages: 1: In mean.default(x) : l'argument n'est ni numérique, ni logique : renvoi de NA 2: In var(x) : NAs introduced by coercion 3: In mean.default(y) : l'argument n'est ni numérique, ni logique : renvoi de NA 4: In var(y) : Error in if (stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) stop("data are essentially constant") : missing value where TRUE/FALSE needed
Another solution I saw would by to use the gather function to get one column with the Lipids, one column for the value of each Lipids, then create a list column, spread the dataframe and mutate a new-column containing the p-value of the t-test.
tips %>%
select(tip, total_bill, sex) %>%
gather(key = variable, value = value, -sex) %>%
group_by(sex, variable) %>%
summarise(value = list(value)) %>%
spread(sex, value) %>%
group_by(variable) %>%
mutate(p_value = t.test(unlist(Female), unlist(Male))$p.value,
t_value = t.test(unlist(Female), unlist(Male))$statistic)
(https://sebastiansauer.github.io/multiple-t-tests-with-dplyr/)
I'm honestly not sure what to do. Does anyone have tips or anything?
Here's the dput() for the data.... Not really sure why it's necessary though...
dput(dummy)
structure(list(ID = c("A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B"),
Lipid.1 = c(0.737, 0.419, 0.468, 0.805, 1.036, 0.825, 0.286,
1.166, 0.898, 0.504, 1.433, 0.41, 0.325, 0.866, 0.337, 0.876,
0.636, 0.953, 0.481, 0.602), Lipid.2 = c(0.001, 0.017, 0.013,
0.025, 0.018, 0.003, 0.007, NA, 0.01, 0.002, 0.01, 0.022,
0.005, NA, 0.018, NA, 0.015, 0.016, NA, 0.01), Lipid.3 = c(0.035,
0.018, 0.036, 0.024, 0.023, 0.027, 0.036, 0.037, 0.013, 0.037,
0.03, 0.04, 0.038, 0.033, 0.016, 0.034, 0.029, 0.033, 0.018,
0.029), Lipid.4 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), Lipid.5 = c(0.09,
0.099, 0.12, 0.058, 0.136, 0.103, 0.153, 0.148, 0.047, 0.085,
0.098, 0.133, 0.099, 0.121, 0.084, 0.065, 0.11, 0.088, 0.065,
0.043), Lipid.6 = c(0.39, 0.555, 0.568, 0.6, 0.626, 0.378,
0.657, 0.57, 0.271, 0.41, 0.474, 0.617, 0.491, 0.738, 0.459,
0.365, 0.499, 0.388, 0.271, 0.275), Lipid.7 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Lipid.8 = c(0.186, 0.197, 0.191, 0.125, 0.209,
0.107, 0.174, 0.143, 0.055, 0.134, 0.148, 0.193, 0.184, 0.213,
0.134, 0.085, 0.165, 0.215, 0.163, 0.061), Lipid.9 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, "0,007"), Lipid.10 = c("0,242", "0,254", "0,134",
"0,226", "0,243", "0,122", "0,082", "0,119", "0,098", "0,093",
"0,27", "0,284", "0,258", "0,236", "0,173", "0,106", "0,138",
"0,066", "0,072", "0,081"), Lipid.11 = c("0,053", "0,114",
"0,038", "0,094", "0,073", "0,067", "0,028", "0,022", "0,021",
"0,05", "0,085", "0,102", "0,122", "0,096", "0,027", "0,03",
NA, "0,078", "0,066", NA), Lipid.12 = c(0.223, 0.261, 0.258,
0.212, 0.168, 0.101, 0.191, 0.09, 0.195, 0.082, 0.155, 0.2,
0.167, 0.231, 0.145, 0.089, 0.239, 0.141, 0.106, 0.124),
Lipid.13 = c(0.737, 0.763, 0.707, 0.587, 0.545, 0.317, 0.74,
0.602, 0.481, 0.531, 0.632, 0.448, 0.62, 0.766, 0.397, 0.623,
0.997, 0.578, 0.418, 0.412), Lipid.14 = c(0.683, 0.666, 0.507,
0.366, 0.443, 0.266, 0.493, 0.345, 0.368, 0.355, 0.432, 0.411,
0.491, 0.565, 0.357, 0.285, 0.604, 0.426, 0.538, 0.295),
Lipid.15 = c(0.911, 1.017, 0.503, 0.76, 0.741, 0.486, 0.648,
0.581, 0.955, 0.515, 0.932, 0.707, 0.626, 0.928, 0.836, 0.537,
0.654, 0.351, 0.498, 0.529), Lipid.16 = c(0.148, 0.116, 0.069,
0.104, 0.091, 0.064, 0.093, 0.123, 0.11, 0.097, 0.283, 0.076,
0.095, 0.194, 0.06, 0.061, 0.086, 0.051, 0.064, 0.059), Lipid.17 = c("0,155",
"0,274", "0,149", "0,127", "0,174", "nd", "0,109", "0,134",
"0,1", "0,09", "0,25", "0,112", "0,088", "0,243", "0,092",
"0,073", "0,153", "0,12", "0,14", "0,06"), Lipid.18 = c(3.143,
3.441, 4.359, 1.945, 2.573, 2.267, 3.585, 3.405, 2.296, 1.998,
3.468, 2.98, 3.626, 3.635, 3.236, 2.092, 2.586, 2.08, 1.718,
1.736), Lipid.19 = c(37.993, 36.148, 40.244, 30.395, 37.339,
35.742, 47.316, 47.555, 34.351, 32.377, 38.694, 39.413, 36.114,
41.235, 32.779, 32.222, 36.418, 36.918, 33.334, 31.421),
Lipid.20 = c(6.613, 5.913, 9.662, 3.789, 7.485, 6.297, 8.254,
8.07, 4.905, 5.686, 7.742, 7.533, 6.875, 7.908, 7.022, 5.446,
6.1, 6.782, 6.062, 6.089), Lipid.21 = c(7.235, 6.759, 8.331,
4.931, 6.558, 4.186, 5.99, 5.629, 3.066, 3.439, 7.102, 7.655,
6.606, 7.858, 5.804, 3.135, 3.218, 3.639, 2.975, 3.13), Lipid.22 = c(6.453,
6.664, 9.048, 4.341, 8.03, 7.599, 10.24, 10.954, 5.873, 6.687,
8.005, 8.908, 6.708, 8.06, 5.931, 6.083, 5.734, 5.587, 5.388,
6.088), Lipid.23 = c(4.943, 3.164, 5.153, 2.51, 4.071, 5.255,
7.636, 8.376, 4.726, 5.56, 4.762, 5.044, 4.549, 4.875, 4.57,
5.147, 4.396, 4.031, 3.556, 4.38), Lipid.24 = c(3.973, 4.279,
5.928, 3.066, 4.95, 4.667, 7.949, 7.268, 4.948, 3.72, 5.137,
5.539, 4.006, 5.276, 3.909, 4.163, 4.954, 5.02, 3.961, 4.201
), Lipid.25 = c(7.638, 5.224, 8.417, 3.902, 7.267, 6.007,
8.256, 7.457, 4.801, 4.86, 7.581, 8.173, 7.57, 8.591, 7.482,
5.091, 5.651, 6.577, 5.415, 5.76), Lipid.26 = c(10.225, 8.293,
13.188, 5.607, 10.993, 4.491, 5.767, 5.011, 3.589, 3.145,
11.471, 12.183, 9.686, 12.562, 9.697, 3.34, 4.186, 4.485,
3.23, 4.229), Lipid.27 = c(5.848, 4.856, 6.503, 3.534, 5.358,
8.933, 14.034, 12.806, 7.781, 8.094, 6.765, 6.867, 5.539,
7.772, 5.883, 7.832, 8.607, 7.586, 6.628, 7.563), Lipid.28 = c(32.941,
30.579, 31.358, 15.861, 30.353, 25.222, 35.662, 34.035, 20.338,
24.682, 30.698, 34.024, 31.608, 37.539, 24.901, 20.131, 23.126,
30.803, 25.639, 18.935)), class = "data.frame", row.names = c(NA,
-20L))
If you would like to have the full t-test output, you could just loop over the columns:
If we start with your df:
data <- structure(list(ID = c("A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B"),
Lipid.1 = c(0.737, 0.419, 0.468, 0.805, 1.036, 0.825, 0.286,
1.166, 0.898, 0.504, 1.433, 0.41, 0.325, 0.866, 0.337, 0.876,
0.636, 0.953, 0.481, 0.602), Lipid.2 = c(0.001, 0.017, 0.013,
0.025, 0.018, 0.003, 0.007, NA, 0.01, 0.002, 0.01, 0.022,
0.005, NA, 0.018, NA, 0.015, 0.016, NA, 0.01), Lipid.3 = c(0.035,
0.018, 0.036, 0.024, 0.023, 0.027, 0.036, 0.037, 0.013, 0.037,
0.03, 0.04, 0.038, 0.033, 0.016, 0.034, 0.029, 0.033, 0.018,
0.029), Lipid.4 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), Lipid.5 = c(0.09,
0.099, 0.12, 0.058, 0.136, 0.103, 0.153, 0.148, 0.047, 0.085,
0.098, 0.133, 0.099, 0.121, 0.084, 0.065, 0.11, 0.088, 0.065,
0.043), Lipid.6 = c(0.39, 0.555, 0.568, 0.6, 0.626, 0.378,
0.657, 0.57, 0.271, 0.41, 0.474, 0.617, 0.491, 0.738, 0.459,
0.365, 0.499, 0.388, 0.271, 0.275), Lipid.7 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Lipid.8 = c(0.186, 0.197, 0.191, 0.125, 0.209,
0.107, 0.174, 0.143, 0.055, 0.134, 0.148, 0.193, 0.184, 0.213,
0.134, 0.085, 0.165, 0.215, 0.163, 0.061), Lipid.9 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, "0,007"), Lipid.10 = c("0,242", "0,254", "0,134",
"0,226", "0,243", "0,122", "0,082", "0,119", "0,098", "0,093",
"0,27", "0,284", "0,258", "0,236", "0,173", "0,106", "0,138",
"0,066", "0,072", "0,081"), Lipid.11 = c("0,053", "0,114",
"0,038", "0,094", "0,073", "0,067", "0,028", "0,022", "0,021",
"0,05", "0,085", "0,102", "0,122", "0,096", "0,027", "0,03",
NA, "0,078", "0,066", NA), Lipid.12 = c(0.223, 0.261, 0.258,
0.212, 0.168, 0.101, 0.191, 0.09, 0.195, 0.082, 0.155, 0.2,
0.167, 0.231, 0.145, 0.089, 0.239, 0.141, 0.106, 0.124),
Lipid.13 = c(0.737, 0.763, 0.707, 0.587, 0.545, 0.317, 0.74,
0.602, 0.481, 0.531, 0.632, 0.448, 0.62, 0.766, 0.397, 0.623,
0.997, 0.578, 0.418, 0.412), Lipid.14 = c(0.683, 0.666, 0.507,
0.366, 0.443, 0.266, 0.493, 0.345, 0.368, 0.355, 0.432, 0.411,
0.491, 0.565, 0.357, 0.285, 0.604, 0.426, 0.538, 0.295),
Lipid.15 = c(0.911, 1.017, 0.503, 0.76, 0.741, 0.486, 0.648,
0.581, 0.955, 0.515, 0.932, 0.707, 0.626, 0.928, 0.836, 0.537,
0.654, 0.351, 0.498, 0.529), Lipid.16 = c(0.148, 0.116, 0.069,
0.104, 0.091, 0.064, 0.093, 0.123, 0.11, 0.097, 0.283, 0.076,
0.095, 0.194, 0.06, 0.061, 0.086, 0.051, 0.064, 0.059), Lipid.17 = c("0,155",
"0,274", "0,149", "0,127", "0,174", "nd", "0,109", "0,134",
"0,1", "0,09", "0,25", "0,112", "0,088", "0,243", "0,092",
"0,073", "0,153", "0,12", "0,14", "0,06"), Lipid.18 = c(3.143,
3.441, 4.359, 1.945, 2.573, 2.267, 3.585, 3.405, 2.296, 1.998,
3.468, 2.98, 3.626, 3.635, 3.236, 2.092, 2.586, 2.08, 1.718,
1.736), Lipid.19 = c(37.993, 36.148, 40.244, 30.395, 37.339,
35.742, 47.316, 47.555, 34.351, 32.377, 38.694, 39.413, 36.114,
41.235, 32.779, 32.222, 36.418, 36.918, 33.334, 31.421),
Lipid.20 = c(6.613, 5.913, 9.662, 3.789, 7.485, 6.297, 8.254,
8.07, 4.905, 5.686, 7.742, 7.533, 6.875, 7.908, 7.022, 5.446,
6.1, 6.782, 6.062, 6.089), Lipid.21 = c(7.235, 6.759, 8.331,
4.931, 6.558, 4.186, 5.99, 5.629, 3.066, 3.439, 7.102, 7.655,
6.606, 7.858, 5.804, 3.135, 3.218, 3.639, 2.975, 3.13), Lipid.22 = c(6.453,
6.664, 9.048, 4.341, 8.03, 7.599, 10.24, 10.954, 5.873, 6.687,
8.005, 8.908, 6.708, 8.06, 5.931, 6.083, 5.734, 5.587, 5.388,
6.088), Lipid.23 = c(4.943, 3.164, 5.153, 2.51, 4.071, 5.255,
7.636, 8.376, 4.726, 5.56, 4.762, 5.044, 4.549, 4.875, 4.57,
5.147, 4.396, 4.031, 3.556, 4.38), Lipid.24 = c(3.973, 4.279,
5.928, 3.066, 4.95, 4.667, 7.949, 7.268, 4.948, 3.72, 5.137,
5.539, 4.006, 5.276, 3.909, 4.163, 4.954, 5.02, 3.961, 4.201
), Lipid.25 = c(7.638, 5.224, 8.417, 3.902, 7.267, 6.007,
8.256, 7.457, 4.801, 4.86, 7.581, 8.173, 7.57, 8.591, 7.482,
5.091, 5.651, 6.577, 5.415, 5.76), Lipid.26 = c(10.225, 8.293,
13.188, 5.607, 10.993, 4.491, 5.767, 5.011, 3.589, 3.145,
11.471, 12.183, 9.686, 12.562, 9.697, 3.34, 4.186, 4.485,
3.23, 4.229), Lipid.27 = c(5.848, 4.856, 6.503, 3.534, 5.358,
8.933, 14.034, 12.806, 7.781, 8.094, 6.765, 6.867, 5.539,
7.772, 5.883, 7.832, 8.607, 7.586, 6.628, 7.563), Lipid.28 = c(32.941,
30.579, 31.358, 15.861, 30.353, 25.222, 35.662, 34.035, 20.338,
24.682, 30.698, 34.024, 31.608, 37.539, 24.901, 20.131, 23.126,
30.803, 25.639, 18.935)), class = "data.frame", row.names = c(NA,
-20L))
clean up a the df:
# remove the columns which only contain NA:
data$Lipid.4 <- NULL
data$Lipid.7 <- NULL
data$Lipid.9 <- NULL
# convert from string to numeric (I do it now manually with each column. You could use a for-loop)
data$Lipid.10 <- gsub(",", ".", data$Lipid.10) # convert comma to dot
data$Lipid.10 <- as.numeric(data$Lipid.10) # convert from string to numeric
data$Lipid.11 <- gsub(",", ".", data$Lipid.11)
data$Lipid.11 <- as.numeric(data$Lipid.11)
data$Lipid.17 <- gsub(",", ".", data$Lipid.17)
data$Lipid.17 <- as.numeric(data$Lipid.17)
# get the lipid column names
all_lipids <- colnames(data)
all_lipids <- all_lipids[all_lipids != "ID"] # we don't need the ID column for the loop
# now loop over each column an perform a t-test
for (column in all_lipids) {
print(column)
print(t.test(data[,column] ~ data$ID))
}
You get for each lipid:
[1] "Lipid.1"
Welch Two Sample t-test
data: data[, column] by data$ID
t = 0.15843, df = 17.391, p-value = 0.8759
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.2766112 0.3216112
sample estimates:
mean in group A mean in group B
0.7144 0.6919
And just a final coment: you perform a lot of comparisons. You may consider to correct for multiple testing.
Let's start with the data you pasted in is dirty! Instead of numbers, you have thongs. For example, Lipid.10
Lipid.10 = c("0,242", "0,254", "0,134",
"0,226", "0,243", "0,122", "0,082", "0,119", "0,098", "0,093",
"0,27", "0,284", "0,258", "0,236", "0,173", "0,106", "0,138",
"0,066", "0,072", "0,081")
Besides, you have variables that only contain NA values
Lipid.4 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_)
So I had to clean them up a bit.
structure(list(ID = c("A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B"),
Lipid.1 = c(0.737, 0.419, 0.468, 0.805, 1.036, 0.825, 0.286,
1.166, 0.898, 0.504, 1.433, 0.41, 0.325, 0.866, 0.337, 0.876,
0.636, 0.953, 0.481, 0.602), Lipid.2 = c(0.001, 0.017, 0.013,
0.025, 0.018, 0.003, 0.007, NA, 0.01, 0.002, 0.01, 0.022,
0.005, NA, 0.018, NA, 0.015, 0.016, NA, 0.01), Lipid.3 = c(0.035,
0.018, 0.036, 0.024, 0.023, 0.027, 0.036, 0.037, 0.013, 0.037,
0.03, 0.04, 0.038, 0.033, 0.016, 0.034, 0.029, 0.033, 0.018,
0.029), Lipid.4 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), Lipid.5 = c(0.09,
0.099, 0.12, 0.058, 0.136, 0.103, 0.153, 0.148, 0.047, 0.085,
0.098, 0.133, 0.099, 0.121, 0.084, 0.065, 0.11, 0.088, 0.065,
0.043), Lipid.6 = c(0.39, 0.555, 0.568, 0.6, 0.626, 0.378,
0.657, 0.57, 0.271, 0.41, 0.474, 0.617, 0.491, 0.738, 0.459,
0.365, 0.499, 0.388, 0.271, 0.275), Lipid.7 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Lipid.8 = c(0.186, 0.197, 0.191, 0.125, 0.209,
0.107, 0.174, 0.143, 0.055, 0.134, 0.148, 0.193, 0.184, 0.213,
0.134, 0.085, 0.165, 0.215, 0.163, 0.061), Lipid.9 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 0.007), Lipid.10 = c(0.242, 0.254, 0.134, 0.226,
0.243, 0.122, 0.082, 0.119, 0.098, 0.093, 0.27, 0.284, 0.258,
0.236, 0.173, 0.106, 0.138, 0.066, 0.072, 0.081), Lipid.11 = c(0.053,
0.114, 0.038, 0.094, 0.073, 0.067, 0.028, 0.022, 0.021, 0.05,
0.085, 0.102, 0.122, 0.096, 0.027, 0.03, NA, 0.078, 0.066,
NA), Lipid.12 = c(0.223, 0.261, 0.258, 0.212, 0.168, 0.101,
0.191, 0.09, 0.195, 0.082, 0.155, 0.2, 0.167, 0.231, 0.145,
0.089, 0.239, 0.141, 0.106, 0.124), Lipid.13 = c(0.737, 0.763,
0.707, 0.587, 0.545, 0.317, 0.74, 0.602, 0.481, 0.531, 0.632,
0.448, 0.62, 0.766, 0.397, 0.623, 0.997, 0.578, 0.418, 0.412
), Lipid.14 = c(0.683, 0.666, 0.507, 0.366, 0.443, 0.266,
0.493, 0.345, 0.368, 0.355, 0.432, 0.411, 0.491, 0.565, 0.357,
0.285, 0.604, 0.426, 0.538, 0.295), Lipid.15 = c(0.911, 1.017,
0.503, 0.76, 0.741, 0.486, 0.648, 0.581, 0.955, 0.515, 0.932,
0.707, 0.626, 0.928, 0.836, 0.537, 0.654, 0.351, 0.498, 0.529
), Lipid.16 = c(0.148, 0.116, 0.069, 0.104, 0.091, 0.064,
0.093, 0.123, 0.11, 0.097, 0.283, 0.076, 0.095, 0.194, 0.06,
0.061, 0.086, 0.051, 0.064, 0.059), Lipid.17 = c(0.155, 0.274,
0.149, 0.127, 0.174, NA, 0.109, 0.134, 0.1, 0.09, 0.25, 0.112,
0.088, 0.243, 0.092, 0.073, 0.153, 0.12, 0.14, 0.06), Lipid.18 = c(3.143,
3.441, 4.359, 1.945, 2.573, 2.267, 3.585, 3.405, 2.296, 1.998,
3.468, 2.98, 3.626, 3.635, 3.236, 2.092, 2.586, 2.08, 1.718,
1.736), Lipid.19 = c(37.993, 36.148, 40.244, 30.395, 37.339,
35.742, 47.316, 47.555, 34.351, 32.377, 38.694, 39.413, 36.114,
41.235, 32.779, 32.222, 36.418, 36.918, 33.334, 31.421),
Lipid.20 = c(6.613, 5.913, 9.662, 3.789, 7.485, 6.297, 8.254,
8.07, 4.905, 5.686, 7.742, 7.533, 6.875, 7.908, 7.022, 5.446,
6.1, 6.782, 6.062, 6.089), Lipid.21 = c(7.235, 6.759, 8.331,
4.931, 6.558, 4.186, 5.99, 5.629, 3.066, 3.439, 7.102, 7.655,
6.606, 7.858, 5.804, 3.135, 3.218, 3.639, 2.975, 3.13), Lipid.22 = c(6.453,
6.664, 9.048, 4.341, 8.03, 7.599, 10.24, 10.954, 5.873, 6.687,
8.005, 8.908, 6.708, 8.06, 5.931, 6.083, 5.734, 5.587, 5.388,
6.088), Lipid.23 = c(4.943, 3.164, 5.153, 2.51, 4.071, 5.255,
7.636, 8.376, 4.726, 5.56, 4.762, 5.044, 4.549, 4.875, 4.57,
5.147, 4.396, 4.031, 3.556, 4.38), Lipid.24 = c(3.973, 4.279,
5.928, 3.066, 4.95, 4.667, 7.949, 7.268, 4.948, 3.72, 5.137,
5.539, 4.006, 5.276, 3.909, 4.163, 4.954, 5.02, 3.961, 4.201
), Lipid.25 = c(7.638, 5.224, 8.417, 3.902, 7.267, 6.007,
8.256, 7.457, 4.801, 4.86, 7.581, 8.173, 7.57, 8.591, 7.482,
5.091, 5.651, 6.577, 5.415, 5.76), Lipid.26 = c(10.225, 8.293,
13.188, 5.607, 10.993, 4.491, 5.767, 5.011, 3.589, 3.145,
11.471, 12.183, 9.686, 12.562, 9.697, 3.34, 4.186, 4.485,
3.23, 4.229), Lipid.27 = c(5.848, 4.856, 6.503, 3.534, 5.358,
8.933, 14.034, 12.806, 7.781, 8.094, 6.765, 6.867, 5.539,
7.772, 5.883, 7.832, 8.607, 7.586, 6.628, 7.563), Lipid.28 = c(32.941,
30.579, 31.358, 15.861, 30.353, 25.222, 35.662, 34.035, 20.338,
24.682, 30.698, 34.024, 31.608, 37.539, 24.901, 20.131, 23.126,
30.803, 25.639, 18.935)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
The rest is easy.
library(tidyverse)
ft = function(data){
tryCatch(
{tout = t.test(data$val ~ data$ID))
tibble(
t = tout$statistic,
p = tout$p.value,
stderr = tout$stderr
)
}, error = function(msg){
return(tibble(t = NA, p = NA, stderr = NA))
})
}
df %>%
pivot_longer(starts_with("Lipid"), names_to = "Lipid", values_to = "val") %>%
group_by(Lipid) %>%
nest() %>%
mutate(testt = map(data, ft)) %>%
select(Lipid, testt) %>%
unnest(testt)
output
# A tibble: 28 x 4
# Groups: Lipid [28]
Lipid t p stderr
<chr> <dbl> <dbl> <dbl>
1 Lipid.1 0.158 0.876 0.142
2 Lipid.2 -0.870 0.399 0.00350
3 Lipid.3 -0.377 0.711 0.00372
4 Lipid.4 NA NA NA
5 Lipid.5 0.930 0.366 0.0143
6 Lipid.6 0.730 0.475 0.0614
7 Lipid.7 NA NA NA
8 Lipid.8 -0.180 0.859 0.0223
9 Lipid.9 NA NA NA
10 Lipid.10 -0.200 0.844 0.0355
# ... with 18 more rows
Customize the ft function as needed.
I had to use the tryCatch function in ft because of variables that contain only NA values.
FYou can use the multtest library in R too, for multiple two-sample t-tests, as shown in the following code:
library(multtest)
df <- as.data.frame(t(as.matrix(dummy)))
X <- apply(as.matrix.noquote(df[2:nrow(df),]), 2, as.numeric)
cl <- ifelse(df[1,] == 'A', 1, 0) # class labels
welch_t_stat <- mt.teststat(X, cl, test='t')
welch_t_stat
# [1] 0.15843467 -0.86954194 -0.37680666 NA 0.92978706 0.72969094 NA -0.17962582 NA NA NAv
# [12] 0.69705527 0.16001073 0.15733921 0.59540273 -0.05557413 NA 0.52706460 0.99860493 -0.14561137 0.58894166 1.25114061
# [23] 1.03458080 0.86540315 -0.62788116 -0.28806189 0.60206042 0.12954702
As can be seen from the above result, there are 28 Welch t-tests performed for 28 lipids in the dataframe.
Since you obtained individual t-statistics, now, you can compute the p-values and apply FWER corrections with Bonferroni / Holm or FDR corrections with Benjamini & Hochberg methods (useful when you have large number of tests):
raw_p <- 2 * (1 - pnorm(abs(welch_t_stat))) # raw p-values assuming normal
# or use pt() with appropriate df
procedures <- c("Bonferroni", "Holm", "BH")
adjusted <- mt.rawp2adjp(raw_p, procedures)

Find point of systematic decrease in R

I have the following data frame:
df <- structure(list(x = c(1059.6, 1061.4, 1063.4, 1064.9, 1066.3,
1068, 1069.8, 1071.4, 1072.9, 1074.4, 1075.9, 1077.5, 1079.1,
1080.5, 1082.1, 1083.8, 1085.1, 1086.7, 1088.1, 1089.5, 1091.6,
1093.1, 1094.5, 1095.8, 1097.1, 1098.4, 1099.8, 1101.1, 1102.5,
1103.9, 1105.3, 1106.6, 1108, 1109.4, 1110.8, 1112.2, 1113.7,
1115.2, 1116.5, 1117.9, 1119.1, 1120.4, 1121.8, 1123.1, 1124.8,
1126.2, 1127.4, 1128.8, 1130.2, 1131.8, 1133.3, 1134.6, 1138.5,
1141.2, 1142.4, 1143.6, 1144.8, 1146.8, 1148.2, 1149.6, 1150.9,
1152.2, 1153.4, 1154.7, 1155.9, 1157.1, 1158.3, 1159.5, 1161.9,
1163.4, 1164.7, 1166, 1167.2, 1169, 1170.3, 1171.5, 1172.8, 1173.9,
1175.1, 1176.8, 1178, 1179.2, 1180.3, 1181.6, 1182.8, 1184.1,
1185.8, 1187, 1188.2, 1189.4, 1190.5, 1191.8, 1193, 1194.3, 1195.5,
1205.8, 1206.9, 1208, 1209, 1210.2, 1211.3, 1212.4, 1213.6, 1214.7,
1217.2, 1218.6, 1222.3, 1223.6, 1224.7, 1225.9, 1227.1, 1228.2,
1229.3, 1230.4, 1231.6, 1232.7, 1233.6, 1234.6, 1235.7, 1236.9,
1238.4, 1239.5, 1240.6, 1241.6, 1242.7, 1243.7, 1244.8, 1245.9,
1247, 1248.1, 1249.2, 1250.3, 1251.3, 1252.6, 1253.7, 1254.8,
1255.8, 1256.8, 1257.8, 1258.8, 1261.4, 1262.5, 1263.5, 1264.5,
1265.6, 1266.6, 1267.8, 1268.8, 1270.1, 1271.1, 1272.1, 1273.2,
1274.1, 1275.2, 1276.3, 1279, 1280, 1281, 1282.1, 1283.1, 1284.1,
1285, 1286, 1287, 1288, 1289, 1290, 1291.1, 1292.3, 1293.3, 1294.4,
1298.6, 1299.6, 1300.5, 1301.5, 1302.5, 1303.5, 1304.6, 1305.5,
1306.4, 1307.6, 1308.6, 1309.7, 1310.7, 1311.7, 1312.7, 1315.2,
1316.3, 1317.3, 1318.3, 1319.3, 1320.3, 1321.3, 1322.3, 1323.2,
1326.8, 1327.8, 1329, 1330, 1331, 1332, 1333, 1333.9, 1335, 1336,
1337.3, 1338.3, 1339.3, 1340.5, 1341.6, 1342.7, 1343.8, 1344.9,
1345.9, 1346.8, 1347.8, 1348.8, 1350, 1351.1, 1352, 1353.3, 1354.3,
1355.3, 1356.2, 1357.1, 1358, 1359.2, 1360.2, 1364.4, 1365.5,
1366.6, 1367.6, 1368.7, 1369.8, 1371, 1372, 1373, 1374.1, 1375,
1376, 1376.9, 1377.8, 1378.7, 1379.6, 1380.5, 1381.4, 1382.3,
1383.3, 1384.2, 1385.2, 1387.6, 1388.5, 1389.5, 1390.4, 1391.4,
1392.5, 1393.6, 1394.6, 1395.6, 1397, 1397.9, 1398.8, 1399.8,
1400.6, 1401.6, 1402.5, 1403.4, 1404.2, 1405.1, 1407.4, 1408.3,
1409.2, 1410.1, 1411.2, 1412.2, 1413.2, 1414.2, 1415.6, 1416.7,
1417.8, 1418.9, 1420.2, 1421.5, 1424.6, 1425.7, 1427, 1428.1,
1429.3, 1430.7, 1431.9, 1433.1, 1434.5, 1435.7, 1436.8, 1438,
1439.4, 1440.6, 1441.9, 1443, 1444.4, 1445.6, 1447.3, 1448.5,
1449.7, 1450.9, 1452.1, 1453.2, 1454.5, 1455.6, 1456.8, 1458.1,
1459.3, 1460.3, 1461.4, 1462.4, 1463.9, 1465.1, 1466.3, 1469.8,
1471.1, 1472.6, 1473.8, 1475, 1476.2, 1477.5, 1479.1, 1480.7,
1482, 1483.2, 1484.9, 1486.2, 1487.5, 1488.8, 1490, 1491.3, 1492.4,
1503, 1504.3, 1506.3, 1507.5, 1508.8, 1510.2, 1511.4, 1512.5,
1513.8, 1515.6, 1517.1, 1520.1, 1523.9, 1526.5, 1527.9, 1529.8,
1531.2, 1532.4, 1533.7, 1536, 1537.4, 1538.8, 1540.2, 1541.5,
1542.9, 1544.2, 1545.6, 1546.9, 1548.3, 1549.7, 1551.1, 1552.7,
1554.1, 1556.4, 1557.8, 1559.2, 1560.6, 1562, 1563.4, 1564.7,
1566.2, 1567.5, 1568.9, 1570.2, 1571.4, 1573.9, 1576.7, 1581.5,
1582.8, 1584.7, 1586.2, 1587.7, 1589.3, 1591, 1592.8, 1594.7,
1596.4, 1598.5, 1600.6, 1602.4, 1604.6, 1606.9, 1609, 1611, 1612.6,
1614.4, 1616.3, 1618.6, 1620.6, 1622.4, 1624.5, 1627.2, 1629.3,
1631.4, 1635, 1636.9, 1638.6, 1640.5, 1642.1, 1643.7, 1645.5,
1647.1, 1648.7, 1650.9, 1653, 1655.2, 1657.1, 1659.1, 1661.5,
1663.6, 1665.9, 1668.1, 1671.7, 1674, 1676.2, 1678.1, 1679.7,
1681.6, 1683.6, 1685.7, 1688, 1693.7, 1695.7, 1697.6, 1699.7,
1701.7, 1704.1), y = c(1.876, 2.027, 2.087, 2.231, 2.18, 1.922,
1.921, 1.851, 1.961, 2.035, 2.043, 2.043, 1.838, 2.032, 2.112,
1.976, 2.046, 2.117, 2.062, 2.07, 1.748, 1.917, 2.092, 2.283,
2.158, 2.119, 2.023, 1.971, 1.882, 2.058, 2.141, 2.241, 2.079,
1.946, 1.959, 2.117, 1.923, 2.015, 2.066, 1.98, 2.091, 1.929,
1.987, 1.852, 1.935, 2.127, 1.982, 2.182, 2.099, 2.03, 1.912,
1.998, 2.491, 2.359, 2.188, 1.965, 1.906, 1.772, 1.927, 2.077,
2.381, 2.191, 2.089, 2.086, 2.017, 2.028, 1.832, 1.88, 2.053,
2.177, 1.995, 2.045, 2.116, 1.961, 1.99, 2.227, 2.235, 2.208,
2.249, 1.992, 2.045, 2.152, 2.237, 2.239, 2.247, 2.114, 1.956,
2.042, 1.926, 2.396, 2.184, 2.208, 2.016, 2.177, 2.29, 2.469,
2.502, 2.115, 2.081, 2.091, 2.188, 2.118, 2.179, 2.067, 1.962,
2.181, 2.246, 2.526, 2.145, 1.961, 2.299, 2.306, 2.34, 2.133,
1.974, 1.997, 2.47, 2.24, 2.247, 2.137, 1.965, 2.232, 2.225,
2.417, 2.362, 2.155, 2.034, 2.151, 2.176, 2.183, 2.372, 2.145,
2.284, 1.967, 2.299, 2.299, 2.183, 2.292, 2.193, 2.249, 2.32,
2.333, 2.286, 2.216, 2.233, 2.453, 2.373, 2.284, 2.074, 2.014,
2.153, 2.353, 2.465, 2.373, 2.181, 2.424, 2.334, 2.349, 2.39,
2.513, 2.526, 2.268, 2.098, 2.326, 2.385, 2.306, 2.378, 2.126,
2.191, 2.363, 2.222, 2.723, 2.686, 2.4, 2.251, 2.121, 2.104,
2.16, 2.333, 2.151, 2.116, 2.136, 2.293, 2.281, 2.313, 2.374,
2.585, 2.521, 2.656, 2.66, 2.399, 2.442, 2.413, 2.528, 2.212,
2.58, 2.667, 2.153, 2.736, 2.486, 2.406, 2.39, 2.403, 2.504,
2.502, 2.158, 2.617, 2.434, 2.364, 2.497, 2.456, 2.263, 2.432,
2.562, 2.453, 2.249, 2.18, 2.141, 2.324, 2.176, 2.184, 2.153,
2.332, 2.202, 2.332, 2.125, 2.156, 2.189, 2.71, 2.458, 2.502,
2.285, 2.527, 2.437, 2.418, 2.507, 2.087, 2.321, 2.701, 2.486,
2.389, 2.335, 2.26, 2.108, 2.164, 2.286, 2.103, 2.257, 2.137,
2.076, 2.378, 2.637, 2.446, 2.448, 2.539, 2.253, 2.099, 2.59,
2.405, 2.219, 2.542, 2.532, 2.507, 2.439, 2.463, 2.342, 2.329,
2.436, 2.511, 2.557, 2.603, 2.5, 2.428, 2.204, 2.307, 2.174,
2.193, 1.793, 2.116, 2.107, 2.209, 1.967, 1.834, 2.713, 2.647,
2.379, 2.229, 2.11, 1.964, 1.985, 2.162, 1.996, 2.074, 1.994,
1.839, 1.838, 1.743, 1.668, 1.91, 1.735, 1.714, 1.421, 1.767,
1.816, 1.755, 1.755, 1.698, 1.608, 1.556, 1.511, 1.394, 1.425,
1.579, 1.495, 1.627, 1.305, 1.471, 1.469, 1.67, 1.697, 1.42,
1.483, 1.274, 1.341, 1.235, 1.295, 1.401, 1.463, 1.313, 1.176,
1.333, 1.373, 1.299, 1.086, 1.139, 1.237, 1.303, 1.143, 1.13,
1.114, 1.096, 1.248, 1.302, 1.19, 1.069, 1.1, 1.027, 0.897, 1.09,
0.922, 1.116, 0.963, 1.011, 1.053, 1.025, 0.985, 0.981, 1.025,
1.117, 1.141, 1.135, 1.068, 0.982, 1.028, 1.06, 1.004, 1.112,
1.108, 1.04, 0.857, 0.91, 0.98, 1.081, 1.025, 0.996, 0.931, 1,
1.074, 0.987, 0.996, 1.125, 0.9, 0.607, 1.17, 1.08, 1, 0.909,
0.841, 0.924, 0.818, 0.846, 0.732, 1.006, 0.717, 0.594, 0.786,
0.685, 0.619, 0.684, 0.69, 0.633, 0.564, 0.689, 0.555, 0.445,
0.696, 0.677, 0.729, 0.541, 0.362, 0.312, 0.568, 0.711, 0.515,
0.622, 0.583, 0.631, 0.645, 0.696, 0.535, 0.424, 0.469, 0.519,
0.511, 0.485, 0.436, 0.412, 0.351, 0.556, 0.255, 0.519, 0.399,
0.497, 0.477, 0.564, 0.462, 0.433, 0.616, 0.547, 0.42, 0.499,
0.415, 0.368)), row.names = c(NA, -443L), class = c("tbl_df",
"tbl", "data.frame"), .Names = c("x", "y"))
Plot:
And I need to find the point that y starts to systematically decrease.
I know that the real point is x == 1405. However, is there a way to automatically detect it?
I am not expecting to find the exact x point. A really good approximation would do the job.
I already tried to perform a break point analysis with the segmented package, but with not much success. The best number I could get was x == 1363, but I am looking for a closer approximation.
Here's how to get a fitted smooth of the data using loess. When you say "starts to systematically decrease," I think you mean something like "when the slope gets negative beyond a certain threshold," since it seems to me that it visually peaks and starts to decline around the 1350's. I could manually get the peak to occur later by smoothing more than default, using span = 0.4.
library(broom)
fit <- loess(y ~ x, df, span = 0.4)
df_aug <- augment(fit)
Using that model, the peak looks to be around the 1370's.
library(dplyr); library(ggplot2)
df_aug %>% filter(.fitted == max(.fitted))
# # A tibble: 1 x 5
# y x .fitted .se.fit .resid
# <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 2.09 1373 2.39 0.0181 -0.307
I presume you could get a better result if you can more definitively describe what model should be used to define "systematically decrease."
You might alternately extract the slope and acceleration from the loess curve, but it's not clear that'd get you much closer you your expected result:
# Extract slope & acceleration
df_aug_slope <- df_aug %>%
mutate(slope = (.fitted - lag(.fitted)) /
(x - lag(x)),
curve = (slope - lag(slope)) /
(x - lag(x)))
ggplot(df_aug_slope, aes(x)) +
geom_point(aes(y=y)) +
geom_line(aes(y=.fitted), color ="red") +
geom_line(aes(y= slope * 100), color = "blue") +
geom_line(aes(y= curve * 1000), color = "green") +
geom_vline(xintercept = 1405, lty = "dashed") +
theme_minimal()

How to force ggplot to order x-axis or y axis as we want in the plot?

I have a data frame with the first column is like this
df<- structure(list(phenelzine = c(-0.0269, 0.0217, 0.4609, -0.0011,
0.0016, -0.0334, 0.5159, 0.4687, -0.5715, -0.1466, 0.1096, 0.1006,
-0.1021, 0.0093, -0.2616, 0.1496, 0.4463, -0.0833, 0.0573, 0.0602,
0.1345, -0.0911, -0.1444, -0.0624, -0.2009, -0.0579, 0.1701,
-0.161, -0.376, 0.1239, -0.1529, -0.1217, 0.2659, 0.1975, 0.0598,
-0.1397, 0.1959, -0.0342, -0.0771, 0.0017, 0.0562, 0.0605, -0.0976,
0.3257, -0.0385, -0.0687, 0.3087, 0.0992, -0.2253, -0.3171, -0.055,
0.2071, -0.1928, -0.113, -0.2142), denatonium.benzoate = c(-0.1734,
-0.2142, 0.2142, -0.1376, -0.0129, 0.0254, 0.06, 0.1768, 0.2295,
0.1772, 0.1978, -0.0556, 0.0971, 0.0979, 0.2073, 0.061, 0.2782,
-0.048, -0.0825, 0.1922, -0.0395, 0.1594, -0.2117, -0.0999, 5e-04,
-0.0595, -0.2083, 0.1004, -0.1279, -0.0613, -0.0576, -0.0567,
0.1006, -0.0213, -0.1109, 0.0313, 0.0641, -0.2919, -0.0119, 0.1644,
-0.1197, -0.1138, 0.078, -0.2729, -0.022, -0.1864, -0.0709, 0.0211,
0.0183, 0.0175, 0.1655, 0.0064, -0.0701, 0.0274, 0.1163), triamterene = c(-0.0253,
0.2896, 0.1606, -0.086, -0.1886, 0.0355, 0.0552, 0.0733, 0.0131,
0.6887, -0.0161, -0.3962, 0.1294, 0.0316, -0.1851, -0.1193, 0.0308,
-0.1663, 0.0417, -0.0568, 0.1047, 0.0162, 0.3497, 0.0708, -0.0317,
-0.1471, -0.1588, -0.4929, 0.078, 0.1263, -0.2024, -0.0021, -0.3611,
-0.0494, -0.4211, -0.0226, 0.5045, -0.0757, 0.0328, -0.0198,
-0.2622, 0.2297, -0.0454, 0.4094, 0.0826, -0.4326, 0.0316, 0.3048,
-0.2047, 0.1124, -0.2369, 0.1803, -0.1735, 0.2399, -0.0509),
talampicillin = c(-0.1761, -0.0355, 0.0295, 0.1979, 0.0545,
0.0673, -0.0797, -0.1566, 0.0496, 0.2623, -0.197, 0.0192,
-0.2673, 0.0687, -0.0058, -0.4072, -0.06, -0.2315, 0.2532,
-0.0395, 0.0911, -0.1348, -0.018, 0.3084, 0.3751, 0.1659,
-0.1882, 0.1874, -0.0979, -0.0829, -0.1693, 0.0179, -0.0264,
0.1013, 0.4272, 0.1253, -0.0733, 0, 0.1552, -0.3395, -0.0738,
0, 0, -0.1746, -0.2039, 0.2907, 0.0363, 0.2168, -0.1428,
-0.2898, 0.0793, 0.349, -0.043, 0.1547, -0.0557), triamcinolone = c(0.0115,
0.3329, -0.0752, 0.2784, -0.5543, 0.0139, -0.6692, -0.4599,
-4e-04, 0.0115, 0.0624, -0.2127, 0.1339, 0.1186, -0.0732,
0.1033, -0.0672, 0.0038, -0.0445, -0.2484, 0.0075, 0.1113,
0.3911, 0.0205, -0.0126, -0.0763, 0.0073, 0.4081, -0.2826,
-0.0348, 0.0957, -0.0069, 0.0103, 0.0607, 0.7144, 0.0012,
0.0216, 0.1009, -0.0395, -0.0432, 0.08, -0.1039, -0.38, 0.0532,
-0.0404, 0.4977, 0.0735, -0.0793, -0.4312, 0.0332, -0.0964,
0.1067, -0.0468, 0.0052, 0.1366), sulfaphenazole = c(-0.0886,
0.1081, 0.2002, 0.0625, 0.0403, 0.0256, -0.0074, -0.0678,
-0.0393, 0.3201, 0.4213, -0.3058, -0.1228, 0.0797, 0.0591,
-0.0541, -0.0721, -0.1914, -0.3058, 0.4353, 0.176, -0.1903,
0.142, -0.078, 0.0747, -0.3066, 0.2954, -0.2864, 0.0823,
-0.2976, 0.0354, 0.1075, 0.0696, 0.0413, -0.1505, -0.0958,
0.0996, 0.3221, -0.0582, 0.0723, 0.016, -0.3852, 0.0542,
0.0229, 0.1946, -0.0557, -0.0675, -0.2177, 0.2971, 0.3417,
-0.4023, -0.2623, 0.0103, -0.3864, 0.591), procyclidine = c(0.03,
0.0372, 0.335, 0.1666, -0.0048, -0.1153, 0.1449, -0.0488,
0.1038, -0.0245, 0.1008, 0.0194, -0.0315, -0.0325, 0.0161,
0.0316, 0.19, -0.3043, -0.1044, 0.2325, -0.0602, 0.0076,
0.1493, -0.049, 0.0593, -0.0343, 0.0667, 0.1323, 0.2388,
0.0962, -0.2329, -0.2198, 0.0395, -0.0078, 0.1016, 0.03,
-0.1293, -0.0076, -0.1876, -0.2648, 0.0044, 0.0609, 0.0403,
-0.0914, -0.1242, -0.0627, 0.067, 0.0665, 0.1154, 0.286,
-0.2009, 0.2039, -0.0567, -0.0365, -0.0198), pentoxifylline = c(0,
0.3439, -0.0614, 0.0181, -0.0149, -0.0216, -0.1211, -0.1816,
-0.0204, 0.1023, -0.0059, -0.008, -0.1121, 0.2029, 0.052,
-0.0935, 0, 0.0595, 0.0271, -0.0482, 0.0246, -0.1369, 0.1106,
0.4988, -0.0599, 0.0021, -0.0233, 0.2809, 5e-04, -0.0204,
0.0586, 0.0278, 0.0246, 0.0534, 0.0165, -0.1816, -0.071,
-0.0296, -0.1173, 0.0814, -0.0672, 0.0946, -0.2164, 0.0299,
-0.1341, 0.0439, 0.1124, 0.0125, -0.4091, -0.1134, 0.0098,
0.1957, 0.0044, 0.0922, 0.1851), suloctidil = c(-0.0772,
-0.1651, -0.0543, -0.037, 0.2182, -0.1884, 0.1866, 0.2013,
-0.3388, 0.0493, 0.0223, 0.3441, 0.0887, -0.1477, -0.0719,
-0.029, -0.0162, 0.0602, 0.1497, -0.0699, 0.1472, 0.3213,
-0.1266, 0.9143, 0.072, 0.4574, -0.1897, -0.7059, 0.3822,
-0.1055, -0.0744, 0.179, -0.5096, -0.4966, -0.8764, -0.3903,
0.174, 0.2181, -0.122, -0.0097, -0.2281, -0.1387, -0.0506,
0.1179, 0.1911, -0.9006, -0.0161, -0.2298, 0.1777, -0.1341,
-0.0264, -0.3478, -0.188, 0.035, 0.0441), etacrynic.acid = c(0.1017,
-0.4238, -0.0089, -0.1116, 0.1265, -0.0529, -0.121, -0.0243,
-0.0033, -1e-04, 0.0952, 0.4075, -0.3078, 0.1265, -0.0612,
-0.1239, 0.0381, 0.0491, 0.1062, -0.0398, 0.2044, 0.0341,
-0.0654, 0.2705, 0.0408, 0.0064, -0.0858, -0.19, -0.4934,
-0.0524, 2e-04, -0.1496, -0.1018, -0.3303, -0.665, -0.6764,
0, -0.0248, -0.091, 0.052, -0.0327, 0.2288, 0.8196, -0.1288,
0.038, -0.4468, 0.0013, -0.2137, 0.6752, 0.1275, 0.0198,
0.1209, -0.0735, 1e-04, -0.0706), diphenylpyraline = c(0.0495,
-0.1318, -0.0723, -0.0485, 0.0175, -0.1585, 0.0054, 0.0565,
-0.0637, 0.0366, 0.0155, 0.1003, -0.11, -0.129, 0.0673, -0.0519,
0.0284, -0.1663, 0.0224, -0.1806, -0.1265, 0.1559, -0.1337,
-0.0892, 0.08, -0.2713, -0.0064, -0.0193, -0.2862, -0.0627,
0.1242, 0.0607, -0.1815, -0.107, 0.038, -0.0171, 0.1978,
0.1513, -0.1016, -0.086, -0.0019, 0.042, -0.0073, -0.0751,
-0.0388, 0.0844, -0.0678, -0.0556, 0.1765, -0.0377, -0.0143,
0.3352, 0.0206, 0.1197, -0.1429), carbenoxolone = c(0.0769,
0.0987, 0.1992, -0.016, -0.0276, 0.2596, -0.0086, -0.0038,
-0.2432, -0.0237, 0.0696, -0.1436, 0.1942, -0.0411, -0.0948,
0.2645, -0.114, -0.0887, -0.6041, 0.0061, 0.0875, -0.1449,
-0.1735, -0.0606, -0.0785, 0.3483, 0.1196, 0.2659, 0.0614,
-0.1372, 0.1543, 0.0434, -0.0253, -0.0028, 0.0956, 0.3397,
-0.0771, 0.2627, -0.0028, 0.1337, 0.0046, -0.0757, -0.0112,
-0.084, 0.0312, 0.0935, -0.0709, 0.0829, -0.1061, -0.1504,
0.0122, 6e-04, 0.3138, 0.0632, 0.1019), arecoline = c(0.0185,
0.1999, -0.0313, -0.1868, -0.0626, 0.0298, 0.03, 0, -0.2209,
0.0101, -0.0693, -0.1656, -0.1048, 0.2098, 0.0393, -0.1354,
0.0328, -0.0311, -0.1967, -0.2653, 0.204, 0.1737, 0.36, -0.1034,
-0.3326, 0.0613, -0.2044, 0.1967, 0.0042, 0.2329, 0.2409,
-0.0335, 0.0068, 0.5101, 0.2002, 0.1867, 0.2134, -0.1072,
0.0854, 0.2414, -0.0653, 0.3851, 0.1358, -0.0102, 0.0268,
0.2497, -0.1376, 0.1798, 0.0427, 0.1593, -0.29, 0.0343, 0.2249,
-0.1301, -0.0987), chenodeoxycholic.acid = c(-0.0825, 0.2098,
-0.0911, -0.0287, 0.0473, 0.1348, -0.06, -0.0285, -0.1473,
-0.0162, -0.0863, 0.0652, -0.0256, 0.1345, -0.1175, -0.0141,
0.2985, 0.2963, 0.1925, -0.212, -0.2106, -0.1128, -0.3121,
0.0867, 0.0214, 0.0346, 0.0173, 0.1101, -0.3066, 0.1115,
0.0415, 0.285, 0.0787, -0.0985, -0.027, -0.2767, 0.1572,
-0.0518, 0.0815, 0.0168, -0.2047, -0.1517, -0.0076, -0.0359,
0.0596, -0.0706, 0.1006, 0.0099, 0.1661, -0.0435, 0.0331,
0.0996, 0.197, -0.0067, -0.0328), torasemide = c(0.0987,
0.1829, 0.1693, -0.0482, 0.0534, 0.1624, 0.0047, -0.1721,
0.248, 0.1715, -0.2109, -0.0909, 0.0513, -0.1358, 0.6297,
0.0762, -0.3473, -0.2189, -0.126, 0.3054, -0.0451, -0.2243,
0.0914, -0.0199, -6e-04, -0.1102, -0.0082, 0.0242, -0.053,
-0.0825, -0.1874, 0.1251, 0, -0.0457, -0.1179, -0.3169, 0.0559,
0.1999, 0.262, 0.0462, 0.1038, -0.0999, -0.1278, -0.1944,
0.0074, 0.1785, 0.0486, -0.027, -0.2748, -0.0428, 0.0696,
-0.1544, 0.117, 0.1665, 0), troglitazone = c(-0.1513, 0.2568,
0.1377, 0.2474, -0.0359, -0.0859, 0.2014, 0.096, 0.0751,
0.1304, -0.1376, -0.1718, 0.0063, 0.2732, -0.2237, 0.1951,
-0.0496, -0.0087, 0.0245, -0.1212, 0.194, -0.0971, -0.0184,
0.1071, 0.0129, -0.061, 0.1704, -0.0021, 0.3985, 0.0963,
0.1591, -0.381, 0.2277, 0.5395, 0.0611, -0.0204, 0.0614,
-0.2816, 0.0308, -0.0642, -0.1722, -0.0191, -0.0374, 0.0436,
-0.3296, 0.0379, -0.0517, 0.2978, 0.0834, -0.1304, 0.1039,
0.2279, 0.5988, 0.0424, -0.2677), mepenzolate.bromide = c(0.0392,
-0.0041, -0.0531, -0.1213, -0.0919, 0.3096, 0.0545, 0.0922,
0.112, 0.0606, -0.25, -0.0086, 0.2729, 0.1307, 0.2124, -0.039,
-0.1005, 0.0125, 0.0644, -0.0064, 0.0707, 0.1931, -0.5523,
-0.285, -0.3085, 0.0343, 0.0622, -0.1849, -0.0436, 0.1069,
0.072, 0.0312, 0.1016, -0.1611, -0.139, 0.0047, 0.0297, -0.1409,
-0.0834, -0.0087, 0.2839, -0.03, -0.0378, -0.0705, 0.0836,
-0.0824, 0.0803, -0.1378, 0.014, -0.0351, 0.0049, 0.0153,
-0.093, 0.0263, 0.0855), megestrol = c(-0.1106, 0.2547, -0.0488,
0, -0.0406, -0.0826, -0.1795, -0.3099, -0.0316, 0.1563, 0,
-0.1189, -0.0343, -0.0482, 0.8041, 0.5202, 0.0733, 0.0419,
0.2212, -0.0888, 0.0268, 0.0886, -0.0476, -0.3235, -0.1348,
-0.2665, 0.294, 0.122, -0.1695, 0.1186, 0.0927, 0, -0.0429,
-0.0492, 0.0963, -0.0934, 0.1289, 0.1447, -0.0308, -0.1145,
0.1861, -0.0464, 0.0992, 0, 0.0771, -0.1804, 0.0899, 0.1031,
0, 0.1035, 0.1404, 0, -0.1281, -0.2463, -0.2874), dexpropranolol = c(-0.1942,
-0.0051, 0.0561, 0.0166, 0.0029, -0.1707, 0.2173, 0.1178,
-0.0683, -0.2903, -0.2874, 0.1764, -0.1135, 0.2574, 0, -0.1194,
-0.4694, -0.285, 0.0364, -0.0362, 0.158, -0.0648, -0.0046,
0.1435, -0.0946, 0.11, 0.1712, 0.3853, 0.1692, -0.0977, 0.3577,
0.2089, 0.3066, -0.0788, 0.3092, 0.113, -0.1234, 0.5753,
0.0359, 0.1014, 0.2017, 0.2949, 0.019, -0.0855, 0.0428, 0.2171,
-0.0951, -0.0761, -0.1451, -0.2832, -0.2653, -0.166, 0.0376,
-0.0214, 0.0131), nipecotic.acid = c(0.2653, -0.3115, -0.0069,
0.118, -0.0998, -0.0289, 0.1051, 0.0132, -0.0028, -0.1072,
-0.1888, -0.0312, 0.3868, -0.0019, -0.1878, -0.0798, -0.3881,
-0.2162, -0.1189, 0.1569, -0.0537, 0.0196, -0.0013, -0.0818,
-0.1885, 0.2266, 0.2862, 0.1302, 0.0734, 0.0936, -0.0159,
-0.0974, -0.0253, 0.2637, -0.2069, 0.2298, -0.1518, -0.13,
-0.0344, -0.0623, -0.0307, 0.1186, -0.2535, 0.0387, 0.0374,
0.0443, -0.0783, 0.1588, -0.0239, 0.1104, 0.1518, 0.1921,
-0.0744, -0.0125, -0.19), omeprazole = c(0.0272, -0.3796,
-0.0125, 0.0585, -0.1073, -0.0761, 0.3838, 0.5699, 0.0027,
-0.3822, 0.1132, -0.1588, 0.2396, -0.217, 0.0434, 0.1336,
-0.0608, -0.0294, 0.1702, -0.2499, 0.0568, 0.0348, -0.0486,
0, 0.2792, -0.1318, 0.0249, 0.005, 0.1688, -0.3908, -0.2366,
-0.0204, -0.0672, -0.0181, -0.0514, 0.139, 0.0582, 0.109,
0.1018, -0.2249, 0.0432, -0.1882, 0.0818, -0.1082, -0.0456,
-0.0764, 0.4292, 0.0553, -0.1346, -0.2173, 0, 0.1391, 0.1908,
-0.0633, 0.0385), etanidazole = c(-0.1036, 0.0281, 0.0039,
0.0123, -0.2305, -0.0542, 0.0485, -0.0686, -0.1829, 0.2637,
-0.1158, -0.2029, 0.104, 0.3169, 0.1387, 0.0844, -0.1385,
-0.3046, 0.0706, -0.076, -0.1772, 0, -0.393, 0.0841, -0.2469,
-0.1261, 0.1085, 0.0265, 0.0141, 0.0183, 0.2398, -0.0353,
-0.5217, -0.1577, 0.4357, -0.1347, 0.0362, 0.4197, -0.0326,
0.1955, -0.0352, -0.0122, -0.0658, -0.1482, -0.1485, 0.3406,
0.0618, 0.0822, 0.2024, -0.3203, 0.1646, -0.1412, 0.3221,
-8e-04, 0.0472), acenocoumarol = c(-0.0087, 0.2005, -0.1051,
-0.067, -0.0655, -0.0467, -0.4081, -0.1085, -0.0556, 0.0145,
-0.0865, 0.1961, -0.1842, -0.0398, -0.0944, 0.046, 0.0632,
-0.1008, 0.0837, 0.0023, 0.0482, 0.0195, -0.2322, -0.0293,
0.0671, -8e-04, -0.013, -0.0905, -0.0019, -0.0333, 0.0421,
-0.0712, -0.0771, 0.1843, 0.0589, 0.0229, -0.0976, -0.0184,
0.1559, -0.0297, -0.1089, -0.0461, -0.0322, 0.1253, 0.0408,
-0.1147, 0.082, 0.1072, 0.1094, 0.1066, 0.104, 0.1947, 0.1116,
-0.1421, -0.0934), dacarbazine = c(-0.0065, -0.7082, 0.0979,
0.0048, -0.1409, 0.0541, -0.0793, -0.1102, -0.1513, -0.002,
-0.0898, 0.0443, 0.0686, -0.1122, -0.0239, 0.0126, -0.0093,
0.0366, 0.0061, -0.1217, 0.0259, -0.1117, -0.1178, -0.2467,
-0.1128, -0.0671, 0.0347, 0.4719, -0.3849, -0.0517, 0.0764,
0.2058, 0.3147, 0.3275, 0.551, 0.1175, 0.0383, -0.1004, 0.0425,
0.1717, 0.174, 0.0122, -0.1466, 0.0381, -0.0656, 0.2572,
-0.0851, 0.0996, -0.2686, -0.1501, 0.0994, 0.001, 0.0891,
0.0652, -0.2386), diphemanil.metilsulfate = c(-0.0312, -0.2611,
-0.0098, -0.216, 0.24, 0.0909, 0.1171, -0.156, 0.0986, -0.0946,
0.1027, -0.1465, 0.0611, 0.0637, 0.0677, -0.0017, -0.0884,
-0.0023, 0.012, -0.3062, -0.0396, -0.0197, -0.2654, -0.1362,
-0.1075, 0.0738, 0.0039, -0.047, -0.0562, -0.0751, 0.0048,
-0.0273, 0.1636, -0.003, 0.1565, 0.2033, -0.1181, -0.046,
0.0056, 0.0896, -0.0309, -0.0146, -0.0784, 0.002, 0.1111,
0.2156, 0.0915, 0.1174, 0.2337, -0.0151, -0.056, 0.0705,
0.0522, -0.0171, -0.3229), meprylcaine = c(-0.0382, -0.1127,
0, 0, 0.0964, 0.0097, -0.0231, -0.0747, 0.2147, 0.0618, -0.2313,
0.0878, -0.3681, 0.0497, -0.0105, 0.2084, 0.0492, 0.0423,
0.0634, -0.0457, -0.0883, 0.0104, 0.1429, -0.1485, 0.4838,
0.0139, 0.2511, -0.1276, -0.1517, -0.0574, -0.0525, -0.0053,
-0.0796, 0.1029, -0.0393, -0.0587, -0.1787, -0.2193, 0.1723,
0.1684, 0.1708, 0.1961, -0.027, 0.0921, -0.0293, -0.2186,
-0.1013, -0.043, 0.2408, -0.2149, 0.2316, -0.0559, 0.2917,
-0.0128, -0.2173), mevalolactone = c(-0.0393, -0.174, -0.1632,
0.4181, -0.038, 0.1698, -0.1771, -0.168, -0.3952, 0.1739,
0.1304, 0, -0.0317, -0.1822, -0.2021, -0.0743, 0.3155, 0.0782,
0.2533, -0.1159, 0.0135, 0.2274, -0.1785, -0.4384, -0.268,
0.1331, -0.4809, 0.6483, -0.7939, -0.1549, -0.0138, -0.0604,
0.043, 0.1208, 0.4608, 0.4028, 0.1985, 0.1172, 0.0787, 0.099,
-0.0329, 0.1553, -0.0503, -0.1355, 0.0382, 0.4655, -0.1948,
-0.1411, -0.0149, 0.0259, -0.4708, 0.0715, 0.0608, 0, 0.5629
), buspirone = c(-0.1276, -0.0097, -0.1365, 0.0801, -0.0499,
0.0499, 0.2245, 0.0739, -0.0485, -0.0042, -0.2356, -0.1325,
0, 0, -0.0756, 0.0099, 0.069, 0.062, 0.0404, 0.2093, -0.0973,
-0.237, -0.2371, 0.1403, -0.0553, -0.043, 0.0219, 0.0596,
0.0999, -0.1358, 0.1098, -0.06, 0.1429, -0.0758, 0.1528,
-0.0426, 0.1053, 0.2444, -0.0031, -0.2131, -0.1855, 0.144,
0.0037, 0.1078, 0.2742, 0.247, 0.0755, 0.0187, 0.0075, -0.0934,
-0.2814, 0.1086, -0.379, -0.1291, -0.0597), sulfafurazole = c(0.0336,
0.0083, 0.0309, 0.1891, -0.0991, 0.0259, 0.084, 0.0888, 0.0672,
-0.0859, -0.0196, -0.4085, 0.0676, 0.0357, -0.0486, -0.2493,
-0.053, -0.128, 0.103, -0.0471, 0.0839, 0.3382, 0.1353, 0.2906,
0.1022, -0.1326, -0.0457, 0.3148, 0.2374, 0.0719, 0.0469,
-0.2298, 0.1334, 0.1581, -0.1396, -0.2614, -2e-04, 0.0297,
-0.0349, -0.1307, -0.1516, -0.0526, 0.0468, 0.0534, -0.0268,
0.2099, -0.0794, 0.2252, 0.095, -0.1607, 0.1593, 0.0157,
0.0283, 0.1304, -8e-04), ciclopirox = c(0.0958, 0.2534, -0.1897,
-0.0457, 0.1704, -0.2632, -0.2596, -0.1076, 0.3008, 0.1348,
-0.0817, 0.2929, -0.2249, -0.1357, -0.0498, 0.3122, 0.1269,
0.0809, 0.05, -0.2514, -0.3665, -0.1104, 0.0793, 0.8682,
0.3056, 0.2149, -0.3415, 0.1955, 0.2518, -0.1525, 0.0429,
0.4573, -0.556, -0.2508, 0.0437, 0.0459, 0.13, 0.104, 8e-04,
-0.5053, -0.4061, -0.2273, 0.1871, 0.1646, -0.0224, 0.1323,
-0.1473, -0.0917, -0.214, 0.0844, -0.4498, 0.0313, 0.0385,
-0.1485, -0.0553)), .Names = c("phenelzine", "denatonium.benzoate",
"triamterene", "talampicillin", "triamcinolone", "sulfaphenazole",
"procyclidine", "pentoxifylline", "suloctidil", "etacrynic.acid",
"diphenylpyraline", "carbenoxolone", "arecoline", "chenodeoxycholic.acid",
"torasemide", "troglitazone", "mepenzolate.bromide", "megestrol",
"dexpropranolol", "nipecotic.acid", "omeprazole", "etanidazole",
"acenocoumarol", "dacarbazine", "diphemanil.metilsulfate", "meprylcaine",
"mevalolactone", "buspirone", "sulfafurazole", "ciclopirox"), row.names = c("200665_s_at",
"201125_s_at", "201150_s_at", "201162_at", "201560_at", "202133_at",
"202435_s_at", "202436_s_at", "202766_s_at", "203083_at", "203238_s_at",
"203640_at", "204223_at", "204468_s_at", "204589_at", "204619_s_at",
"205381_at", "205422_s_at", "205713_s_at", "205941_s_at", "208626_s_at",
"209581_at", "209747_at", "209875_s_at", "209894_at", "210004_at",
"210078_s_at", "210495_x_at", "210511_s_at", "210517_s_at", "210809_s_at",
"211071_s_at", "211597_s_at", "211709_s_at", "211719_x_at", "212464_s_at",
"212488_at", "212667_at", "213125_at", "213351_s_at", "213352_at",
"213413_at", "213656_s_at", "214770_at", "214927_at", "216442_x_at",
"217428_s_at", "217949_s_at", "218718_at", "219054_at", "219087_at",
"219179_at", "219922_s_at", "222379_at", "37022_at"), class = "data.frame")
I want to heatmap all the columns based on only those rows that have mean of higher than 0.02.
what I do is as follows:
tab <- melt(transform(df, rowname = row.names(df)))
t<- data.frame(sort(rowMeans(df)))
t[t[,1] > 0.02,, drop=FALSE]
library(ggplot2)
ggplot(subset(tab, rowname %in% rownames(t)[t > 0.02]),
aes(x = rowname, y = variable, fill = value)) +
geom_tile() +
scale_fill_gradient2(high="red",mid="white",low="blue") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
but it does not do the job, it does put the x axis as it wants not based on the order, for example I want to be from 0.02 to 0.05
so the other looks like
204619_s_at 0.02055667
211709_s_at 0.02357333
216442_x_at 0.02364000
201162_at 0.02389667
217949_s_at 0.03153333
201150_s_at 0.03251000
204468_s_at 0.03279000
211719_x_at 0.03447667
219922_s_at 0.03968667
212667_at 0.04798667
212488_at 0.04833333
209875_s_at 0.05173333
203083_at 0.05493667
210495_x_at 0.05858333
219179_at 0.05932667
in axis and not what it shows
You can try
rn <- rownames(t[t[,1] > 0.02,, drop=FALSE])
tab1 <- subset(tab, rowname %in% rownames(t)[t > 0.02])
tab1$rowname <- factor(tab1$rowname, levels=rn)
library(ggplot2)
ggplot(tab1,aes(x = rowname, y = variable, fill = value)) +
geom_tile() +
scale_fill_gradient2(high="red",mid="white",low="blue") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
Or keeping most of the steps within the %>%
library(dplyr)
library(tidyr)
library(ggplot2)
bind_cols(data.frame(rowname=row.names(df)), df) %>%
filter(rowMeans(.[-1]) >0.02) %>%
gather(variable, value,-rowname) %>%
mutate(rowname=factor(rowname, levels=rn)) %>%
ggplot(., aes(x=rowname, y=variable, fill=value))+
geom_tile()+
scale_fill_gradient2(high='red', mid='white', low='blue')+
theme(axis.text.x = element_text(angle = 90, vjust=0.5)) +
xlab('x axis') +
ylab('y axis')

how to select a specific numbers of rows with multiple condition?

I have a data frame like below
df<- structure(list(phenelzine = c(-0.0269, 0.0217, 0.4609, -0.0011,
0.0016, -0.0334, 0.5159, 0.4687, -0.5715, -0.1466, 0.1096, 0.1006,
-0.1021, 0.0093, -0.2616), denatonium.benzoate = c(-0.1734, -0.2142,
0.2142, -0.1376, -0.0129, 0.0254, 0.06, 0.1768, 0.2295, 0.1772,
0.1978, -0.0556, 0.0971, 0.0979, 0.2073), triamterene = c(-0.0253,
0.2896, 0.1606, -0.086, -0.1886, 0.0355, 0.0552, 0.0733, 0.0131,
0.6887, -0.0161, -0.3962, 0.1294, 0.0316, -0.1851), talampicillin = c(-0.1761,
-0.0355, 0.0295, 0.1979, 0.0545, 0.0673, -0.0797, -0.1566, 0.0496,
0.2623, -0.197, 0.0192, -0.2673, 0.0687, -0.0058), triamcinolone = c(0.0115,
0.3329, -0.0752, 0.2784, -0.5543, 0.0139, -0.6692, -0.4599, -4e-04,
0.0115, 0.0624, -0.2127, 0.1339, 0.1186, -0.0732), sulfaphenazole = c(-0.0886,
0.1081, 0.2002, 0.0625, 0.0403, 0.0256, -0.0074, -0.0678, -0.0393,
0.3201, 0.4213, -0.3058, -0.1228, 0.0797, 0.0591), procyclidine = c(0.03,
0.0372, 0.335, 0.1666, -0.0048, -0.1153, 0.1449, -0.0488, 0.1038,
-0.0245, 0.1008, 0.0194, -0.0315, -0.0325, 0.0161), pentoxifylline = c(0,
0.3439, -0.0614, 0.0181, -0.0149, -0.0216, -0.1211, -0.1816,
-0.0204, 0.1023, -0.0059, -0.008, -0.1121, 0.2029, 0.052), suloctidil = c(-0.0772,
-0.1651, -0.0543, -0.037, 0.2182, -0.1884, 0.1866, 0.2013, -0.3388,
0.0493, 0.0223, 0.3441, 0.0887, -0.1477, -0.0719), etacrynic.acid = c(0.1017,
-0.4238, -0.0089, -0.1116, 0.1265, -0.0529, -0.121, -0.0243,
-0.0033, -1e-04, 0.0952, 0.4075, -0.3078, 0.1265, -0.0612), diphenylpyraline = c(0.0495,
-0.1318, -0.0723, -0.0485, 0.0175, -0.1585, 0.0054, 0.0565, -0.0637,
0.0366, 0.0155, 0.1003, -0.11, -0.129, 0.0673), carbenoxolone = c(0.0769,
0.0987, 0.1992, -0.016, -0.0276, 0.2596, -0.0086, -0.0038, -0.2432,
-0.0237, 0.0696, -0.1436, 0.1942, -0.0411, -0.0948), arecoline = c(0.0185,
0.1999, -0.0313, -0.1868, -0.0626, 0.0298, 0.03, 0, -0.2209,
0.0101, -0.0693, -0.1656, -0.1048, 0.2098, 0.0393), chenodeoxycholic.acid = c(-0.0825,
0.2098, -0.0911, -0.0287, 0.0473, 0.1348, -0.06, -0.0285, -0.1473,
-0.0162, -0.0863, 0.0652, -0.0256, 0.1345, -0.1175), torasemide = c(0.0987,
0.1829, 0.1693, -0.0482, 0.0534, 0.1624, 0.0047, -0.1721, 0.248,
0.1715, -0.2109, -0.0909, 0.0513, -0.1358, 0.6297), troglitazone = c(-0.1513,
0.2568, 0.1377, 0.2474, -0.0359, -0.0859, 0.2014, 0.096, 0.0751,
0.1304, -0.1376, -0.1718, 0.0063, 0.2732, -0.2237), mepenzolate.bromide = c(0.0392,
-0.0041, -0.0531, -0.1213, -0.0919, 0.3096, 0.0545, 0.0922, 0.112,
0.0606, -0.25, -0.0086, 0.2729, 0.1307, 0.2124), megestrol = c(-0.1106,
0.2547, -0.0488, 0, -0.0406, -0.0826, -0.1795, -0.3099, -0.0316,
0.1563, 0, -0.1189, -0.0343, -0.0482, 0.8041), dexpropranolol = c(-0.1942,
-0.0051, 0.0561, 0.0166, 0.0029, -0.1707, 0.2173, 0.1178, -0.0683,
-0.2903, -0.2874, 0.1764, -0.1135, 0.2574, 0), nipecotic.acid = c(0.2653,
-0.3115, -0.0069, 0.118, -0.0998, -0.0289, 0.1051, 0.0132, -0.0028,
-0.1072, -0.1888, -0.0312, 0.3868, -0.0019, -0.1878), omeprazole = c(0.0272,
-0.3796, -0.0125, 0.0585, -0.1073, -0.0761, 0.3838, 0.5699, 0.0027,
-0.3822, 0.1132, -0.1588, 0.2396, -0.217, 0.0434), etanidazole = c(-0.1036,
0.0281, 0.0039, 0.0123, -0.2305, -0.0542, 0.0485, -0.0686, -0.1829,
0.2637, -0.1158, -0.2029, 0.104, 0.3169, 0.1387), acenocoumarol = c(-0.0087,
0.2005, -0.1051, -0.067, -0.0655, -0.0467, -0.4081, -0.1085,
-0.0556, 0.0145, -0.0865, 0.1961, -0.1842, -0.0398, -0.0944),
dacarbazine = c(-0.0065, -0.7082, 0.0979, 0.0048, -0.1409,
0.0541, -0.0793, -0.1102, -0.1513, -0.002, -0.0898, 0.0443,
0.0686, -0.1122, -0.0239), diphemanil.metilsulfate = c(-0.0312,
-0.2611, -0.0098, -0.216, 0.24, 0.0909, 0.1171, -0.156, 0.0986,
-0.0946, 0.1027, -0.1465, 0.0611, 0.0637, 0.0677), meprylcaine = c(-0.0382,
-0.1127, 0, 0, 0.0964, 0.0097, -0.0231, -0.0747, 0.2147,
0.0618, -0.2313, 0.0878, -0.3681, 0.0497, -0.0105), mevalolactone = c(-0.0393,
-0.174, -0.1632, 0.4181, -0.038, 0.1698, -0.1771, -0.168,
-0.3952, 0.1739, 0.1304, 0, -0.0317, -0.1822, -0.2021), buspirone = c(-0.1276,
-0.0097, -0.1365, 0.0801, -0.0499, 0.0499, 0.2245, 0.0739,
-0.0485, -0.0042, -0.2356, -0.1325, 0, 0, -0.0756), sulfafurazole = c(0.0336,
0.0083, 0.0309, 0.1891, -0.0991, 0.0259, 0.084, 0.0888, 0.0672,
-0.0859, -0.0196, -0.4085, 0.0676, 0.0357, -0.0486), ciclopirox = c(0.0958,
0.2534, -0.1897, -0.0457, 0.1704, -0.2632, -0.2596, -0.1076,
0.3008, 0.1348, -0.0817, 0.2929, -0.2249, -0.1357, -0.0498
)), .Names = c("phenelzine", "denatonium.benzoate", "triamterene",
"talampicillin", "triamcinolone", "sulfaphenazole", "procyclidine",
"pentoxifylline", "suloctidil", "etacrynic.acid", "diphenylpyraline",
"carbenoxolone", "arecoline", "chenodeoxycholic.acid", "torasemide",
"troglitazone", "mepenzolate.bromide", "megestrol", "dexpropranolol",
"nipecotic.acid", "omeprazole", "etanidazole", "acenocoumarol",
"dacarbazine", "diphemanil.metilsulfate", "meprylcaine", "mevalolactone",
"buspirone", "sulfafurazole", "ciclopirox"), row.names = c("200665_s_at",
"201125_s_at", "201150_s_at", "201162_at", "201560_at", "202133_at",
"202435_s_at", "202436_s_at", "202766_s_at", "203083_at", "203238_s_at",
"203640_at", "204223_at", "204468_s_at", "204589_at"), class = "data.frame")
what I want to do, is after I melt the data, I only keep those that t>0.02 but not all, I want to be able to say, the last 10, or the last 5 or a specific number I like , This should apply on all variables and not only one variable
what i do is as follows:
# sort the mean of the df
t<- data.frame(sort(rowMeans(df)))
# select only those that are higher than 0.02
rn <- rownames(t[t[,1] > 0.02,, drop=FALSE])
# melt the data frame
tab <- melt(transform(df, rowname = row.names(df)))
# select only those that are higher than 0.02
tab1 <- subset(tab, rowname %in% rownames(t)[t > 0.02])
You can use tailto get the last rows of a vector or a data.frame:
tail(tab1 ,5) ## the last five rows
tail(t,5) ## the last five conditions rows

Resources