Analyze Data Frames In A List And Bind The Results - r

The data I have is a list of data frames. I want to loop through each of the data frame to find:
If there are columns with duplicate column names. If yes, then I
want to merge them by using rbind() in a parent data frame
called output and remove all other columns of such data frames.
I also want to check if there is any data frame that doesn't have duplicate
columns. If yes, then remove all the columns except the first one. Then
cbind() with output such that if rows are more or less than what was
created by (1) then zero should be added.
I tried using lappy(), but my logic to get above two isn't working at one go. Any suggestion will help.
output <- lapply(data, function(x) {
})
Input Data List Containing Data Frames
list(A = structure(list(`A-DIODE` = c(1.2, 0.4), `A-DIODE` = c(1.3,
0.6)), row.names = c(NA, -2L), class = "data.frame"), B = structure(list(
`B-DIODE` = c(1.4, 0.8), `B-ACC1` = c(1.5, 1), `B-ACC2` = c(1.6,
1.2), `B-ANA0` = c(1.7, 1.4), `B-ANA1` = c(1.8, 1.6), `B-BRICKID` = c(1.9,
1.8), `B-CC0` = c(2L, 2L), `B-CC1` = c(2.1, 2.2), `B-DIGDN` = c(2.2,
2.4), `B-DIGDP` = c(2.3, 2.6), `B-DN1` = c(2.4, 2.8), `B-DN2` = c(2.5,
3), `B-DP1` = c(2.6, 3.2), `B-DP2` = c(2.7, 3.4), `B-SCL` = c(2.8,
3.6), `B-SDA` = c(2.9, 3.8), `B-USB0DN` = 3:4, `B-USB0DP` = c(3.1,
4.2), `B-USB1DN` = c(3.2, 4.4), `B-USB1DP` = c(3.3, 4.6),
`B-ACC1` = c(3.4, 4.8), `B-ACC2` = c(3.5, 5), `B-ANA0` = c(3.6,
5.2), `B-ANA1` = c(3.7, 5.4), `B-BRICKID` = c(3.8, 5.6),
`B-CC0` = c(3.9, 5.8), `B-CC1` = c(4L, 6L), `B-DIGDN` = c(4.1,
6.2), `B-DIGDP` = c(4.2, 6.4), `B-DN1` = c(4.3, 6.6), `B-DN2` = c(4.4,
6.8), `B-DP1` = c(4.5, 7), `B-DP2` = c(4.6, 7.2), `B-SCL` = c(4.7,
7.4), `B-SDA` = c(4.8, 7.6), `B-USB0DN` = c(4.9, 7.8), `B-USB0DP` = c(5L,
8L), `B-USB1DN` = c(5.1, 8.2), `B-USB1DP` = c(5.2, 8.4),
`B-NA` = c(5.3, 8.6), `B-ACC2PWRLKG_0v4` = c(5.4, 8.8), `B-ACC2PWRLKG_0v4` = c(5.5,
9), `B-P_IN_Leak` = c(5.6, 9.2)), row.names = c(NA, -2L), class = "data.frame"))
Desired Output
> A
A-DIODE
1.2
0.4
1.3
0.6
> B
B-DIODE
1.4
0.8
> Output
A-DIODE B-DIODE
1.2 1.4
0.4 0.8
1.3 0
0.6 0

Loop through the list, create a condition with if/else that checks the length of the unique column names and returns the unlisted single data.frame when there is only a single unique column or else return the first column. Finally, with cbind.fill (from rowr) bind the list of data.frame columns together, specifying the fill as 0
lst2 <- lapply(lst1, function(x) if(length(unique(names(x))) ==1)
setNames(data.frame(unlist(x)), names(x)[1]) else x[1])
do.call(rowr::cbind.fill, c(lst2, list(fill = 0)))
# A.DIODE B.DIODE
#1 1.2 1.4
#2 0.4 0.8
#3 1.3 0.0
#4 0.6 0.0

Related

ggplot2 Bubble Chart panel

I am working on trying to plot a bubble chart with multiple data variables on the same chart. I will try my best to describe what I would like the final output to appear and attempts from reading online and some questions posted on the forum.
I am just getting familiar with ggplot but if there is solution with another package, I am open to it.
C1 within circles represents color 1, C2 within squares represents color 2, C3 within triangles represents color 3.
I would think these will be 8 independent panels since the y-axis is different for each one.
Really appreciate the help.
The final outputs I would prefer is bubbles that have different size and color. I think individual panels of 8 x 3 would be ideal because the y axis for each panel can be changed. But I cannot figure out how to structure the data to allow for creating 8 x 3 panels with individual y axes and the symbol size. Thanks Closest I came across: R ggplot bubble chart localised bubbles display without in single chart
Preferred output:
library (tidyverse)
library (reshape2)
library(ggplot2)
data.tb <- structure(list(Name = structure(c(7L, 8L, 9L, 1L, 10L, 2L, 11L,
3L, 12L, 13L, 4L, 14L, 5L, 15L, 6L), .Label = c("avg_row3", "avg_row4",
"avg_row5", "avg_row6_7", "avg_row8", "avg_row9", "row1", "row2",
"row3", "row4", "row5", "row6", "row7", "row8", "row9"), class = "factor"),
col1 = c(6333, 8847, 1495292, 169, 28994.1, 3.3, 12857.6,
1.5, 107154, 230344, 38.15, 837364, 132.8, 1226140, 176.74
), col2 = c(20347, 40594, 6229886, 153.5, 122769.8, 3, 44653.4,
1.1, 362972, 944725, 32.21, 3488736, 118.16, 5108506, 158.06
), total_col1_2 = c(23301, 49441, 7725178, 156.3, 151763.9,
3.1, 57511, 1.2, 470126, 1175069, 33.28, 4326100, 120.78,
6334646, 161.4), col3 = c(3313, 4668, 751824.1, 161.1, 14689.2,
3.2, 6784.2, 1.5, 107154, 230344, 72.3, 421021, 162.49, 616496,
204.37), col4 = c(10220, 20940, 3053539.5, 145.8, 60675.8,
2.9, 23034, 1.1, 362972, 944725, 62.45, 1709982, 144.11,
2503902, 182.02), total_col3_4 = c(13533, 25608, 3805363.6,
148.6, 75365, 2.9, 29818.2, 1.2, 470126, 1175069, 64.25,
2131004, 147.46, 3120398, 186.1), col5 = c(3020, 4179, 743468.1,
177.9, 14304.9, 3.4, 6073.5, 1.5, 0, 0, 0, 416342, 99.63,
609644, 145.88), col6 = c(10127, 19654, 3176346.3, 161.6,
62094, 3.2, 21619.4, 1.1, 0, 0, 0, 1778754, 90.5, 2604604,
132.52), total_col5_6 = c(13147, 23833, 3919814, 164.5, 76398.9,
3.2, 27692.9, 1.2, 0, 0, 0, 2195096, 92.1, 3214248, 134.87
)), class = "data.frame", row.names = c(NA, -15L))
data_long.tb <- melt (data.tb, id.vars = c("Name"))
data_long.tb <- data_long.tb %>% mutate(group_num =
case_when(
variable %in% c("col1", "col2", "total_col1_2") ~ "group1",
variable %in% c("col3", "col4", "total_col3_4") ~ "group2",
variable %in% c("col5", "col6", "total_col5_6") ~ "group3",
))
My attempts:
theme_set(theme_bw()) # pre-set the bw theme.
ggplot(data_long.tb,
aes(variable, value)) +
geom_jitter (aes(col=variable, size=value))
theme_set(theme_bw()) # pre-set the bw theme.
ggplot(data_long.tb,
aes(variable, value)) +
geom_jitter (aes(col=variable, size=value)) + facet_wrap(~ group_num)
Not sure this is quite it but hopefully closer.
library(tidyverse)
df %>%
pivot_longer(-Name) %>% # reshape longer for ggplot
mutate(Name = Name %>% fct_inorder %>% fct_rev) %>% # define order of Name,
# reversed so first is at bottom.
group_by(Name) %>% # within each Name...
mutate(x_pos = row_number(), # x_pos counts up in order...
shape = case_when(name %in% c("col1", "col2", "total_col1_2") ~ "circle",
name %in% c("col3", "col4", "total_col3_4") ~ "square",
TRUE ~ "triangle")) %>% # and name determines shape
ungroup() %>%
ggplot(aes(x_pos, Name, size = value, shape = shape, color = shape)) +
geom_point() +
scale_size_area() +
scale_color_manual(values = c("circle" = "red", "square" = "forestgreen",
"triangle" = "purple")) +
scale_shape_manual(values = c("circle" = 19, "square" = 15, "triangle" = 17)) +
theme_minimal()
Sample data
df <- structure(list(Name = c("row1", "row2", "row3", "avg_row3", "row4",
"avg_row4", "row5", "avg_row5", "row6", "row7", "avg_row6_7",
"row8", "avg_row8", "row9", "avg_row9"), col1 = c(6333, 8847,
1495292, 169, 28994.1, 3.3, 12857.6, 1.5, 107154, 230344, 38.15,
837364, 132.8, 1226140, 176.74), col2 = c(20347, 40594, 6229886,
153.5, 122769.8, 3, 44653.4, 1.1, 362972, 944725, 32.21, 3488736,
118.16, 5108506, 158.06), total_col1_2 = c(23301, 49441, 7725178,
156.3, 151763.9, 3.1, 57511, 1.2, 470126, 1175069, 33.28, 4326100,
120.78, 6334646, 161.4), col3 = c(3313, 4668, 751824.1, 161.1,
14689.2, 3.2, 6784.2, 1.5, 107154, 230344, 72.3, 421021, 162.49,
616496, 204.37), col4 = c(10220, 20940, 3053539.5, 145.8, 60675.8,
2.9, 23034, 1.1, 362972, 944725, 62.45, 1709982, 144.11, 2503902,
182.02), total_col3_4 = c(13533, 25608, 3805363.6, 148.6, 75365,
2.9, 29818.2, 1.2, 470126, 1175069, 64.25, 2131004, 147.46, 3120398,
186.1), col5 = c(3020, 4179, 743468.1, 177.9, 14304.9, 3.4, 6073.5,
1.5, 0, 0, 0, 416342, 99.63, 609644, 145.88), col6 = c(10127,
19654, 3176346.3, 161.6, 62094, 3.2, 21619.4, 1.1, 0, 0, 0, 1778754,
90.5, 2604604, 132.52), total_col5_6 = c(13147, 23833, 3919814,
164.5, 76398.9, 3.2, 27692.9, 1.2, 0, 0, 0, 2195096, 92.1, 3214248,
134.87)), row.names = c(NA, -15L), class = c("tbl_df", "tbl",
"data.frame"))

Is there a way I can move my first column in this excel dataset to be the column that specifies the numbers 1 to 8 [duplicate]

This question already has answers here:
Convert the values in a column into row names in an existing data frame
(5 answers)
Closed 1 year ago.
I´d like to change the first data column named "Especies" and the other species names below it; (i.e "Strix_varia, Strix_rufipes...) and make them become the numbers 1 to 8 enclosed in red from link.
I´m working with Moran´s I and having the column "Especies" as data throws me incorrect results.
Any help will be great!
Thanks!
Heres my dput():
structure(list(Especies = c("Strix_varia", "Strix_rufipes", "Strix_occidentalis",
"Strix_aluco", "Strix_uralensis", "Strix_woodfordii", "Strix_leptogrammica",
"Strix_nebulosa"), Notas.segundo = c(2.9, 4.3, 2.9, 1.3, 1, 3,
3.1, 1.1), Notas.llamado = c(6.3, 13.5, 12.2, 5, 3, 6, 4, 9.3
), Duracion.llamado = c(2.9, 2.9, 5.3, 4, 4.5, 1.6, 1.5, 7.3),
Frecuencia.minima = c(149.4, 157.4, 167, 314.7, 75.3, 149.3,
212.2, 147.5), Frecuencia.maxima = c(518.6, 564.8, 594.3,
846.2, 394.9, 438.4, 396.8, 263.8), Ancho.banda = c(369.1,
407.3, 427.2, 531.5, 319.6, 289, 184.6, 116.3), Frecuencia.central = c(522.1,
551.8, 589.9, 844, 385.9, 429, 374.9, 255.2)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -8L))
Assuming that in your table, you have one row per species and species do not repeat, simple data$Especies = seq_along(data$Especies) will do the job. I would suggest keeping the original table so you remember what code belongs to the species, such as with data$id = seq_along(data$Especies).
data = structure(list(Especies = c("Strix_varia", "Strix_rufipes", "Strix_occidentalis",
"Strix_aluco", "Strix_uralensis", "Strix_woodfordii", "Strix_leptogrammica",
"Strix_nebulosa"), Notas.segundo = c(2.9, 4.3, 2.9, 1.3, 1, 3,
3.1, 1.1), Notas.llamado = c(6.3, 13.5, 12.2, 5, 3, 6, 4, 9.3
), Duracion.llamado = c(2.9, 2.9, 5.3, 4, 4.5, 1.6, 1.5, 7.3),
Frecuencia.minima = c(149.4, 157.4, 167, 314.7, 75.3, 149.3,
212.2, 147.5), Frecuencia.maxima = c(518.6, 564.8, 594.3,
846.2, 394.9, 438.4, 396.8, 263.8), Ancho.banda = c(369.1,
407.3, 427.2, 531.5, 319.6, 289, 184.6, 116.3), Frecuencia.central = c(522.1,
551.8, 589.9, 844, 385.9, 429, 374.9, 255.2)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -8L))
# If you don't want to overwrite your data:
data$id = seq_along(data$Especies)
# If you are OK with overwriting your data
data$Especies = seq_along(data$Especies)
If the species names are not unique and can repeat, this means that two species will have a different id. If that is not what you want, you can use factor():
data$id = as.numeric(factor(data$Especies))
Alternatively, you can create an encoding of your own by creating a named vector and use it to translate species names to id:
names = unique(data$Especies)
coding = seq_along(names)
names(coding) = names
data$id = coding[data$Especies]

How To Row Combine And Drop Columns In A List of Data Frames Based On Conditions

Below is the example data which is list containing different data frames. I want to get one data frame out of it based on following two conditions.
First:
For each data frame in the list starting column 1 keep rbind()ing columns that have exact same column name as the previous one. The moment a different column name is encounter, drop that and all the columns till the last one.
For example: If column 1 is named Banana, then column 2 is named Banana, but column 3 is Orange and then again column 4 is Banana. Then column 1 and 2 will rbind() and column 3 and 4 will be dropped.
Another example: If column 1 is named Banana then column 2 is named Orange, but column 3 is named Banana, then only column 1 will survive as starting column 2 the column name is different and I don't care about column 3 name even though it's same as column 1.
Second:
After I run the list of data frame through above condition, then I want to combine all the data frames in the list to get one data frame which I think can be achieved using following code.
Here, lst2 is output of first condition.
do.call(rowr::cbind.fill, c(lst2, list(fill = 0)))
Above code credit #akrun. Any suggestions will be helpful.
Sample Data
list(A = structure(list(`A-DIODE` = c(1.2, 0.4), `A-DIODE` = c(1.3,
0.6)), row.names = c(NA, -2L), class = "data.frame"), B = structure(list(
`B-DIODE` = c(1.4, 0.8), `B-ACC1` = c(1.5, 1), `B-ACC2` = c(1.6,
1.2), `B-ANA0` = c(1.7, 1.4), `B-ANA1` = c(1.8, 1.6), `B-BRICKID` = c(1.9,
1.8), `B-CC0` = c(2L, 2L), `B-CC1` = c(2.1, 2.2), `B-DIGDN` = c(2.2,
2.4), `B-DIGDP` = c(2.3, 2.6), `B-DN1` = c(2.4, 2.8), `B-DN2` = c(2.5,
3), `B-DP1` = c(2.6, 3.2), `B-DP2` = c(2.7, 3.4), `B-SCL` = c(2.8,
3.6), `B-SDA` = c(2.9, 3.8), `B-USB0DN` = 3:4, `B-USB0DP` = c(3.1,
4.2), `B-USB1DN` = c(3.2, 4.4), `B-USB1DP` = c(3.3, 4.6),
`B-ACC1` = c(3.4, 4.8), `B-ACC2` = c(3.5, 5), `B-ANA0` = c(3.6,
5.2), `B-ANA1` = c(3.7, 5.4), `B-BRICKID` = c(3.8, 5.6),
`B-CC0` = c(3.9, 5.8), `B-CC1` = c(4L, 6L), `B-DIGDN` = c(4.1,
6.2), `B-DIGDP` = c(4.2, 6.4), `B-DN1` = c(4.3, 6.6), `B-DN2` = c(4.4,
6.8), `B-DP1` = c(4.5, 7), `B-DP2` = c(4.6, 7.2), `B-SCL` = c(4.7,
7.4), `B-SDA` = c(4.8, 7.6), `B-USB0DN` = c(4.9, 7.8), `B-USB0DP` = c(5L,
8L), `B-USB1DN` = c(5.1, 8.2), `B-USB1DP` = c(5.2, 8.4),
`B-NA` = c(5.3, 8.6), `B-ACC2PWRLKG_0v4` = c(5.4, 8.8), `B-ACC2PWRLKG_0v4` = c(5.5,
9), `B-P_IN_Leak` = c(5.6, 9.2)), row.names = c(NA, -2L), class = "data.frame"))
Update 1
After #ØysteinS answer I realized that there should be a third condition too:
Third:
If there is only a single column in one of the data frame in the list, then only that column be added to the parent data frame.
This should do the job:
data <- list(A = structure(list(`A-DIODE` = c(1.2, 0.4), `A-DIODE` = c(1.3,
0.6)), row.names = c(NA, -2L), class = "data.frame"), B = structure(list(
`B-DIODE` = c(1.4, 0.8), `B-ACC1` = c(1.5, 1), `B-ACC2` = c(1.6,
1.2), `B-ANA0` = c(1.7, 1.4), `B-ANA1` = c(1.8, 1.6), `B-BRICKID` = c(1.9,
1.8), `B-CC0` = c(2L, 2L), `B-CC1` = c(2.1, 2.2), `B-DIGDN` = c(2.2,
2.4), `B-DIGDP` = c(2.3, 2.6), `B-DN1` = c(2.4, 2.8), `B-DN2` = c(2.5,
3), `B-DP1` = c(2.6, 3.2), `B-DP2` = c(2.7, 3.4), `B-SCL` = c(2.8,
3.6), `B-SDA` = c(2.9, 3.8), `B-USB0DN` = 3:4, `B-USB0DP` = c(3.1,
4.2), `B-USB1DN` = c(3.2, 4.4), `B-USB1DP` = c(3.3, 4.6),
`B-ACC1` = c(3.4, 4.8), `B-ACC2` = c(3.5, 5), `B-ANA0` = c(3.6,
5.2), `B-ANA1` = c(3.7, 5.4), `B-BRICKID` = c(3.8, 5.6),
`B-CC0` = c(3.9, 5.8), `B-CC1` = c(4L, 6L), `B-DIGDN` = c(4.1,
6.2), `B-DIGDP` = c(4.2, 6.4), `B-DN1` = c(4.3, 6.6), `B-DN2` = c(4.4,
6.8), `B-DP1` = c(4.5, 7), `B-DP2` = c(4.6, 7.2), `B-SCL` = c(4.7,
7.4), `B-SDA` = c(4.8, 7.6), `B-USB0DN` = c(4.9, 7.8), `B-USB0DP` = c(5L,
8L), `B-USB1DN` = c(5.1, 8.2), `B-USB1DP` = c(5.2, 8.4),
`B-NA` = c(5.3, 8.6), `B-ACC2PWRLKG_0v4` = c(5.4, 8.8), `B-ACC2PWRLKG_0v4` = c(5.5,
9), `B-P_IN_Leak` = c(5.6, 9.2)), row.names = c(NA, -2L), class = "data.frame"))
# Use lapply to apply the same function to each data frame in the list.
combined_frames <- lapply(data, function(df){
first_name <- names(df)[[1]]
result <- df[, 1, drop = FALSE]
# Keep adding if name is the same as the first
if (ncol(df) != 1) {
for(i in seq(2, length(names(df)), by = 1)){
if(names(df)[[i]] == names(df)[[1]]){
result <- rbind(result, df[, i, drop = FALSE])
} else {
# Otherwise, break out of loop
break
}
}
}
return(result)
})
# Yes, your suggested code seems to work as expected for the last task
do.call(rowr::cbind.fill, c(combined_frames, list(fill = 0)))
#> A.DIODE B.DIODE
#> 1 1.2 1.4
#> 2 0.4 0.8
#> 3 1.3 0.0
#> 4 0.6 0.0
One easy option would be to loop through the list, get the run-length-id of the column names, extract only those equal to 1, unlist, convert to data.frame with the first column name and then with cbind.fill bind the list of data.frame`s together
library(data.table)
lst1 <- lapply(data, function(x)
setNames(data.frame(unlist(x[rleid(names(x)) == 1])), names(x)[1]))
do.call(rowr::cbind.fill, c(lst1, list(fill = 0)))
# A.DIODE B.DIODE
#1 1.2 1.4
#2 0.4 0.8
#3 1.3 0.0
#4 0.6 0.0

How To Change Column Names of Data Frames In A List

I have a list of data frames and want to change part of column name of each of the sub data frame this list holds.
list(A = structure(list(`A-DIODE` = c(1.2, 0.4), `A-DIODE` = c(1.3,
0.6)), row.names = c(NA, -2L), class = "data.frame"), B = structure(list(
`B-DIODE` = c(1.4, 0.8), `B-ACC1` = c(1.5, 1), `B-ACC2` = c(1.6,
1.2), `B-ANA0` = c(1.7, 1.4), `B-ANA1` = c(1.8, 1.6), `B-BRICKID` = c(1.9,
1.8), `B-CC0` = c(2L, 2L), `B-CC1` = c(2.1, 2.2), `B-DIGDN` = c(2.2,
2.4), `B-DIGDP` = c(2.3, 2.6), `B-DN1` = c(2.4, 2.8), `B-DN2` = c(2.5,
3), `B-DP1` = c(2.6, 3.2), `B-DP2` = c(2.7, 3.4), `B-SCL` = c(2.8,
3.6), `B-SDA` = c(2.9, 3.8), `B-USB0DN` = 3:4, `B-USB0DP` = c(3.1,
4.2), `B-USB1DN` = c(3.2, 4.4), `B-USB1DP` = c(3.3, 4.6),
`B-ACC1` = c(3.4, 4.8), `B-ACC2` = c(3.5, 5), `B-ANA0` = c(3.6,
5.2), `B-ANA1` = c(3.7, 5.4), `B-BRICKID` = c(3.8, 5.6),
`B-CC0` = c(3.9, 5.8), `B-CC1` = c(4L, 6L), `B-DIGDN` = c(4.1,
6.2), `B-DIGDP` = c(4.2, 6.4), `B-DN1` = c(4.3, 6.6), `B-DN2` = c(4.4,
6.8), `B-DP1` = c(4.5, 7), `B-DP2` = c(4.6, 7.2), `B-SCL` = c(4.7,
7.4), `B-SDA` = c(4.8, 7.6), `B-USB0DN` = c(4.9, 7.8), `B-USB0DP` = c(5L,
8L), `B-USB1DN` = c(5.1, 8.2), `B-USB1DP` = c(5.2, 8.4),
`B-NA` = c(5.3, 8.6), `B-ACC2PWRLKG_0v4` = c(5.4, 8.8), `B-ACC2PWRLKG_0v4` = c(5.5,
9), `B-P_IN_Leak` = c(5.6, 9.2)), row.names = c(NA, -2L), class = "data.frame"))
I want to change A- in above data frame in a list to Z- and then B- in another data frame in the list to P-.
I tired below code but it doesn't seem to work. Any suggestions on how I can do this?
names(data$`A-DIODE`) <- "Z-DIODE"
names(data$`B-DIODE`) <- "P-DIODE"
....
....
You could use lapply() with a vectorized function from the stringi package to do the replacement.
library(stringi)
lapply(x0, function(x) {
out <- stri_replace_all_regex(names(x), c("^A-", "^B-"), c("Z-", "P-"), vectorize_all = FALSE)
names(x) <- out
x
})
You can use the lapply function. lapply will allow you to change the names in each of dataframe in the list.
In your above example, lets name your list as data. A and B are the dataframes nested in your list. To change the column names for particular names in the dataframes in the list
data=lapply(data,function(x){
names(x)[which(names(x)=="A-DIODE")]="Z-DIODE"
names(x)[which(names(x)=="B-DIODE")]="P-DIODE"
x
}
)

How to plot dynamically progressive plot properly in R

I have this dataframe called mydf. My code below plots the hybrid combination of plot for the efficiency in Y axis. What I want to do is replace the measurement in X axis for each sample combination (each line) to be represented by the measurement columns. So for efficiency1 I want it to be represented precisely by the values in measurement1 column instead of general 1 to 7 measurement I have in the code and I want to do this for all efficiency levels with their respective measurement columns. Can someone please help me achieve this goal.
mydf<-structure(list(sample_A = structure(c(1L, 2L, 2L, 2L, 3L, 4L), .Label = c("2568",
"2669", "2670", "2671", "2946", "LPH-001-10_AK1", "LPH-001-12_AK2",
"LPH-001-9"), class = "factor"), sample_B = structure(c(1L, 2L,
3L, 4L, 3L, 4L), .Label = c("2568", "2669", "2670", "2671", "2946",
"LPH-001-10_AK1", "LPH-001-12_AK2", "LPH-001-9"), class = "factor"),
efficiency1 = c(1.02, 0.964, 0.415, 0.422, 0.98, 0.986),
efficiency2 = c(1, 0.944, 0.395, 0.402, 0.96, 0.966), efficiency3 = c(0.9,
0.844, 0.295, 0.302, 0.86, 0.866), efficiency4 = c(0.32,
0.264, -0.285, -0.278, 0.28, 0.286), efficiency5 = c(0.02,
-0.0360000000000001, -0.585, -0.578, -0.0200000000000001,
-0.0140000000000001), efficiency6 = c(0.12, 0.0639999999999999,
-0.485, -0.478, 0.08, 0.086), efficiency7 = c(0.02, -0.036,
-0.585, -0.578, -0.02, -0.014), measurement1 = c(1, 1.2,
1, 1.3, 1.3, 1), measurement2 = c(2, 2.1, 2, 2.2, 2.3, 2),
measurement3 = c(3, 3.1, 3, 3.2, 3.3, 3), measurement4 = c(4,
4.1, 4, 4.2, 4.3, 4.1), measurement5 = c(5.1, 5.1, 4, 4.2,
4.3, 4.1), measurement6 = c(5.1, 6.1, 6, 6.2, 6.3, 6.1),
measurement7 = c(7.1, 7.1, 7, 7.2, 6.3, 7.1)), .Names = c("sample_A",
"sample_B", "efficiency1", "efficiency2", "efficiency3", "efficiency4",
"efficiency5", "efficiency6", "efficiency7", "measurement1",
"measurement2", "measurement3", "measurement4", "measurement5",
"measurement6", "measurement7"), row.names = c(NA, 6L), class = "data.frame")
Code I have:
effCis <- grep('^efficiency',names(mydf));
xlim <- c(1,length(effCis));
ylim <- range(mydf[,effCis],na.rm=T);
ylim[1L] <- floor(ylim[1L]/0.1)*0.1;
ylim[2L] <- ceiling(ylim[2L]/0.1)*0.1;
xticks <- seq_along(effCis);
yticks <- seq(ylim[1L],ylim[2L],0.1);
plot(NA,xlim=xlim,ylim=ylim,xlab='measurement',ylab='efficiency',xaxs='i',yaxs='i',axes=F);
abline(v=xticks,col='lightgrey');
abline(h=yticks,col='lightgrey');
abline(h=0,lwd=2);
axis(1L,xticks,xticks,font=2L,cex.axis=0.7);
axis(2L,yticks,sprintf('%.1f',yticks),las=1L,font=2L,cex.axis=0.7);
hybrid.col <- data.frame(hybrid=seq_len(nrow(mydf)),col=c('red','green','blue','gold','cyan','magenta'),stringsAsFactors=F);
splineN <- 200L;
for (ri in seq_len(nrow(hybrid.col))) {
hybrid <- hybrid.col$hybrid[ri];
col <- hybrid.col$col[ri];
x <- xticks;
y <- c(as.matrix(mydf[hybrid,effCis]));
points(x,y,pch=16L,col=col,xpd=NA);
with(spline(x,y,splineN),{
lines(x,y,col=col,lwd=2,xpd=NA);
localwin <- which(x>2 & x<3);
tp <- which.min(abs(diff(y[localwin])));
if (length(tp)>0L) points(x[localwin[tp]],y[localwin[tp]],col=col,pch=4L);
localwin <- which(x>2 & x<5);
tp <- which.min(diff(y[localwin]));
if (length(tp)>0L) {
m <- diff(y[localwin[seq(tp,len=2L)]])/diff(x[localwin[seq(tp,len=2L)]]);
if (is.finite(m)) abline(y[localwin[tp]]-m*x[localwin[tp]],m,col=col,lty=2L);
};
});
};
Here's how I would do it, you can play around with pretty labels (see function ?pretty). The parts I changed have spaces around them. Consider that this is not C so ; are not necessary. Putting some spaces and naming arguments makes the code perhaps more readable.
effCis <- grep('^efficiency',names(mydf));
find.measurements <- grep("^measurement", names(mydf))
xlim <- c(1,length(effCis));
ylim <- range(mydf[,effCis],na.rm=T);
ylim[1L] <- floor(ylim[1L]/0.1)*0.1;
ylim[2L] <- ceiling(ylim[2L]/0.1)*0.1;
yticks <- seq(ylim[1L],ylim[2L],0.1);
xticks <- seq(from = min(mydf[, find.measurements]), to = max(mydf[, find.measurements]), length.out = 7)
plot(NA,xlim=c(min(xticks), max(xticks)), ylim=ylim,xlab='measurement',ylab='efficiency',xaxs='i',yaxs='i',axes=F)
abline(v=xticks,col='lightgrey');
abline(h=yticks,col='lightgrey');
abline(h=0,lwd=2);
axis(side = 1, at = xticks)
axis(2L,yticks,sprintf('%.1f',yticks),las=1L,font=2L,cex.axis=0.7);
hybrid.col <- data.frame(hybrid=seq_len(nrow(mydf)),col=c('red','green','blue','gold','cyan','magenta'),stringsAsFactors=F);
splineN <- 200L;
for (ri in seq_len(nrow(hybrid.col))) {
hybrid <- hybrid.col$hybrid[ri];
col <- hybrid.col$col[ri];
x <- xticks;
y <- c(as.matrix(mydf[hybrid,effCis]));
points(x,y,pch=16L,col=col,xpd=NA);
with(spline(x,y,splineN),{
lines(x,y,col=col,lwd=2,xpd=NA);
localwin <- which(x>2 & x<3);
tp <- which.min(abs(diff(y[localwin])));
if (length(tp)>0L) points(x[localwin[tp]],y[localwin[tp]],col=col,pch=4L);
localwin <- which(x>2 & x<5);
tp <- which.min(diff(y[localwin]));
if (length(tp)>0L) {
m <- diff(y[localwin[seq(tp,len=2L)]])/diff(x[localwin[seq(tp,len=2L)]]);
if (is.finite(m)) abline(y[localwin[tp]]-m*x[localwin[tp]],m,col=col,lty=2L);
};
});
};

Resources