removing unused factors from facet_grid with row and column specified - r

What is the Problem?
Each facet in ggplot2 may not contain all factor levels in the 'name' column, in which case I want the facet to ignore those factor levels to avoid the gaps in the tiles.
What have I tried
I have tried adding and removing scales = "free", drop = TRUE, and space = "free" from facet_grid as recommended in a stackexchange question here, and a stackoverflow question here.
Any help would be much appreciated!
Example slice of data
test_data <- structure(list(sample = c("1", "1", "1", "1", "1", "2", "2",
"2", "2", "2", "3", "3", "3", "3", "3", "1", "1", "1", "1", "1",
"2", "2", "2", "2", "2", "3", "3", "3", "3", "3"), name = c("IV_1385127_1385127_1_+_3_1",
"IV_78222_78222_1_-_3_1", "XV_978130_978130_1_-_3_1", "XIV_574351_574351_1_+_3_1",
"XV_357215_357215_1_-_3_1", "XII_456601_456601_1_-_3_1", "V_423552_423552_1_+_3_1",
"XI_200191_200191_1_-_3_1", "XII_465717_465717_1_-_4_2", "XII_455342_455342_1_-_3_1",
"VII_84298_84298_1_-_3_1", "IV_229884_229884_1_+_4_2", "XII_633371_633371_1_-_4_2",
"XIII_9888_9888_1_-_4_2", "X_703096_703096_1_-_3_2", "IV_1385127_1385127_1_+_3_1",
"IV_78222_78222_1_-_3_1", "XV_978130_978130_1_-_3_1", "XIV_574351_574351_1_+_3_1",
"XV_357215_357215_1_-_3_1", "XII_456601_456601_1_-_3_1", "V_423552_423552_1_+_3_1",
"XI_200191_200191_1_-_3_1", "XII_465717_465717_1_-_4_2", "XII_455342_455342_1_-_3_1",
"VII_84298_84298_1_-_3_1", "IV_229884_229884_1_+_4_2", "XII_633371_633371_1_-_4_2",
"XIII_9888_9888_1_-_4_2", "X_703096_703096_1_-_3_2"), ntile = c("1",
"1", "1", "1", "1", "1", "1", "1", "2", "1", "1", "2", "2", "2",
"2", "1", "1", "1", "1", "1", "1", "1", "1", "2", "1", "1", "2",
"2", "2", "2"), position = c("-1", "-1", "-1", "-1", "-1", "-1",
"-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"
), base = c("T", "T", "T", "A", "C", "A", "T", "T", "A", "T",
"T", "G", "C", "C", "A", "A", "G", "A", "G", "G", "A", "T", "A",
"G", "T", "A", "A", "A", "A", "A")), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -30L))
Code used for plotting
p <- ggplot(test_data, aes(x = position, y = factor(name))) +
geom_tile(aes(fill = base)) +
scale_fill_viridis_d() +
theme_bw() +
theme(
axis.title.y=element_blank(),
axis.text.y=element_blank(),
legend.title=element_blank(),
axis.title.x=element_text(margin = margin(t = 15)),
panel.grid=element_blank()
)
p <- p + facet_grid(ntile ~ sample, scales = "free", space = "free", drop = TRUE)
Example plot output
ggplot2 version
ggplot2 3.1.1

There are no unused factors in your sample data and the arguments are not giving you what you desire. Possible startup workaround can be:
# Your script
p <- ggplot(test_data, aes(x = position, y = factor(name))) +
geom_tile(aes(fill = base)) +
scale_fill_viridis_d() +
theme_bw() +
theme(
axis.title.y=element_blank(),
axis.text.y=element_blank(),
legend.title=element_blank(),
axis.title.x=element_text(margin = margin(t = 15)),
panel.grid=element_blank()
)
p + coord_flip() + facet_wrap(ntile ~ sample, scales = "free")
output

Related

How to convert a range of columns from Character to Number/Integer in R

I am tryin to convert a few columns which are in a range from Character to Integer. I dont want to write each column as.integer.
I am trying to find a more effective way where I can pass the the column names which I want to convert and then convert them into integer.
Is this doable in R? Or Should I do it one column after the other.
The Expected output:
Convert a range of data which is in char to Integer.
Convert a few columns without using passing them as range but rather as individual columns.
The code I wrote is given below:
library(readxl)
Final <- read_excel("C:/X/X/X- X/Desktop/Final.xlsx")
First_Date <- colnames(Final)[4]
Last_Date <- tail(colnames(Final),1)
str(Final)
Final <- Final %>%
mutate_if(c(First_Date:Last_Date),as.numeric)
The data I am working with is given below:
structure(list(UniqueID = c("3F-FA|807905", "3F-FA|808005", "3F-FA|808006",
"3F-FA|808007", "Py_AuAriFa|761403", "3F-FA|761502", "AutoTheta|761602",
"3F-FA|318901", "3F-FA|339401"), Xreg = c("3F-FA", "3F-FA", "3F-FA",
"3F-FA", "Py_AuAriFa", "3F-FA", "AutoTheta", "3F-FA", "3F-FA"
), Row = c("807905", "808005", "808006", "808007", "761403",
"761502", "761602", "318901", "339401"), `2023-02-01` = c("0",
"0", "0", "0", "50", "1", "7", "0", "0"), `2023-03-01` = c("0",
"0", "0", "0", "32", "1", "7", "0", "0"), `2023-04-01` = c("0",
"0", "0", "0", "36", "1", "7", "0", "0"), `2023-05-01` = c("0",
"0", "0", "0", "41", "1", "7", "0", "0"), `2023-06-01` = c("0",
"0", "0", "0", "31", "1", "6", "0", "0"), `2023-07-01` = c("0",
"0", "0", "0", "38", "1", "6", "0", "0"), `2023-08-01` = c("0",
"0", "0", "0", "34", "1", "6", "0", "0"), `2023-09-01` = c("0",
"0", "0", "0", "32", "1", "6", "0", "0"), `2023-10-01` = c("0",
"0", "0", "0", "35", "1", "5", "0", "0")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -9L))
The columns I am trying to convert is from 2023-02-01 to 2023-10-01. I cant use mutateif and pass it through the whole dataframe as the column Row has data which are character and can be converted to integer but should not be converted. Hence the selected few columns.
We can match the patterns in the column names to loop over those column and modify the class
library(dplyr)
Final <- Final %>%
mutate(across(matches("^\\d{4}-\\d{2}-\\d{2}$"), as.integer))
Or use the :
Final <- Final %>%
mutate(across("2023-02-01":"2023-10-01", as.integer))

Create (many) columns conditional on similarly named columns

I want to create a new column that take the value of one of two similarly named columns, depending on a third column. There are many such columns to create. Here's my data.
dt <- structure(list(malvol_left_1_w1 = c("1", "1", "4", "3", "4",
"4", "1", "4", "4", "3", "1", "4", "4", "3", "4", "4", "5", "2",
"4", "2"), malvol_left_2_w1 = c("1", "1", "4", "3", "4", "4",
"1", "3", "4", "2", "2", "2", "4", "1", "5", "4", "5", "2", "4",
"2"), malvol_right_1_w1 = c("1", "1", "4", "3", "4", "4", "1",
"3", "4", "2", "1", "4", "4", "5", "5", "4", "2", "6", "4", "1"
), malvol_right_2_w1 = c("1", "1", "4", "3", "4", "4", "1", "3",
"4", "2", "1", "2", "4", "5", "5", "4", "5", "5", "4", "5"),
malvol_left_1_w2 = c("1", "1", "3", "3", "4", "4", "1", "5",
"4", "4", "4", "2", "1", "4", "5", "4", "3", "2", "4", "4"
), malvol_left_2_w2 = c("1", "1", "3", "3", "4", "4", "7",
"5", "4", "2", "3", "1", "1", "4", "4", "4", "3", "4", "4",
"4"), malvol_right_1_w2 = c("1", "3", "3", "3", "4", "4",
"1", "4", "4", "3", "2", "2", "4", "1", "4", "4", "5", "5",
"4", "4"), malvol_right_2_w2 = c("1", "2", "3", "3", "4",
"4", "1", "2", "4", "2", "3", "2", "4", "1", "4", "4", "5",
"4", "4", "3"), leftright_w1 = c("right", "right", "left",
"right", "right", "right", "left", "right", "right", "left",
"left", "left", "left", "right", "left", "left", "right",
"right", "right", "left"), leftright_w2 = c("right", "right",
"left", "left", "right", "left", "left", "right", "right",
"left", "left", "left", "left", "right", "left", "left",
"right", "right", "left", "left")), class = "data.frame", row.names = c("12",
"15", "69", "77", "95", "96", "112", "122", "150", "163", "184",
"216", "221", "226", "240", "298", "305", "354", "370", "379"
))
Now I can do this in dplyr like:
dt <- dt %>%
mutate(
malvol_1_w1 = case_when(
leftright_w1 == "left" ~ malvol_right_1_w1,
leftright_w1 == "right" ~ malvol_left_1_w1),
malvol_2_w1 = case_when(
leftright_w1 == "left" ~ malvol_right_2_w1,
leftright_w1 == "right" ~ malvol_left_2_w1),
malvol_1_w2 = case_when(
leftright_w2 == "left" ~ malvol_right_1_w2,
leftright_w2 == "right" ~ malvol_left_1_w2),
malvol_2_w2 = case_when(
leftright_w2 == "left" ~ malvol_right_2_w2,
leftright_w2 == "right" ~ malvol_left_2_w2))
However, it's not really a feasible solution, because there will be more of both numbers defining a variable (e.g. both malvol_3_w1 and malvol_1_w3 will need to be created).
One solution is to this with a loop:
for (wave in 1:2) {
for (var in 1:2) {
dt[, paste0("malvol_", var, "_w", wave)] <- dt[, paste0("malvol_right_", var, "_w", wave)]
dt[dt[[paste0("leftright_w", wave)]] == "right", paste0("malvol_", var, "_w", wave)] <-
dt[dt[[paste0("leftright_w", wave)]] == "right", paste0("malvol_left_", var, "_w", wave)]
}
}
However, what is a tidyverse solution?
UPDATE:
I came up with a tidyverse solution myself, however, not every elegant. Still looking for more canonical solutions.
dt <- dt %>%
mutate(
malvol_1_w1 = NA, malvol_2_w1 = NA,
malvol_1_w2 = NA, malvol_2_w2 = NA) %>%
mutate(
across(matches("malvol_\\d"),
~ case_when(
eval(parse(text = paste0("leftright_", str_extract(cur_column(), "w.")))) == "left" ~
eval(parse(text = paste0(str_split(cur_column(), "_\\d", simplify = T)[1],
"_right", str_split(cur_column(), "malvol", simplify = T)[2]))),
eval(parse(text = paste0("leftright_", str_extract(cur_column(), "w.")))) == "right" ~
eval(parse(text = paste0(str_split(cur_column(), "_\\d", simplify = T)[1],
"_left", str_split(cur_column(), "malvol", simplify = T)[2]))))))
What makes your problem difficult is that a lot of information is hidden in variable names rather than data cells. Hence, you need some steps to transform your data into "tidy" format. In the code below, the crucial part is (1) to turn the variables [malvol]_[lr]_[num]_[w] into four separate columns malvol, lr, num, w (all prefixed with m_), and (2) from the variables leftright_[w] extract variable w (prefixed with l_) using the functions pivot_longer and than separate.
# Just adding a row_id to your data, for later joining
dt <- dt %>% mutate(id = row_number())
df <- dt %>%
# Tidy the column "malvol"
pivot_longer(cols = starts_with('malvol'), names_to = "m_var", values_to = "m_val") %>%
separate(m_var, into = c("m_malvol", "m_lr", "m_num", "m_w")) %>%
# They the column "leftright"
pivot_longer(cols = starts_with('leftright'), names_to = 'l_var', values_to = 'l_lr') %>%
separate(l_var, into = c(NA, "l_w")) %>%
# Implement the logic
filter(l_w == m_w) %>%
filter(l_lr != m_lr) %>%
# Pivot into original wide format
select(-c(l_w, l_lr, m_lr)) %>%
pivot_wider(names_from = c(m_malvol, m_num, m_w), values_from = m_val)
# Merging back results to original data
dt <- dt %>% mutate(id = row_number()) %>% inner_join(df, by="id")
Although I pivoted the data back into your desired format in the end (to check whether results are in line with your desired results), I would suggest you leave the data in the long format, which is "tidy" and more easy to work with, compared to your "wide" format. So maybe skip the last pivot_wider operation.

Lapply to execute command for a list of variables

I intend to change the order of levels of some factors.
The intention is to apply this command
Df$X1 <- ordered(Df$X1, levels = c("5", "4", "3", "2", "1"))
to a list of variables (X1 to X2)
Df <- data.frame(
X1 = ordered(sample(1:5,30,r=T)),
X2 = ordered(sample(1:5,30,r=T)),
X3 = as.factor(sample(1:5,30,r=T)),
Y = as.factor(sample(1:5,30,r=T))
)
tmplistporadove <- as.list(paste("Df$",names(Df)[1:2],sep=""))
zmena <- lapply(tmplistporadove, function(x) substitute(x <- ordered(x, levels = c("5", "4", "3", "2", "1"))) )
eval(zmena)
But R just prints this:
X[[i]] <- ordered(X[[i]], levels = c("5", "4", "3", "2", "1"))

R code challenge: retrieving the values in matching columns and sum them up with matching rows

I have a problem solving this in R. I have this data frame called testa (dput included). I need to match all the letters in column ALT with the colnames (A,C,G,T,N) and get the corresponding values in those column along with the value for REF letters and get the result ad.new (my code does this job).
However, I need to expand this code to solve an issue with the line where the TYPE column has flat at the end. For the row with the flat, I need to match its start id (chr10:102053031) with other ids in start column. If they match, I need to sum up the corresponding value for ALT from A,C,G,T,N column and replace it with ad.new column for the flat line along with the REF value.
If you run the dput and my code you will be able to understand it. So basically, I want to match the letters in REF and ALT columns and get the corresponding values from the columns (A,C,G,T,N) and separate those values by comma for REF and ALT. However (in this example), for flat line I want to sum up the value in column A with matching start id with the start id of flat line (the value in this case is 6) and the value with another match (the value in this case is 7 from G column) and sum them together to give 13. So for flat line my result should be 0,13.
The expected result is also shown below.
my incomplete code:
testa[is.na(testa)]<-0
ref.counts<-testa[,testa[,"REF"]]
ref.counts<-as.matrix(Ref.counts)
ref.counts[is.na(Ref.counts)]<-0
ref.counts<-diag(Ref.counts)
alt.counts<-testa[,testa[,"ALT"]]
alt.counts<-as.matrix(alt.counts)
alt.counts[is.na(alt.counts)]<-0
alt.counts<-diag(alt.counts)
#############
##need to extend this code here
#############
ad.new<-paste(Ref.counts,alt.counts,sep=",")
dput for testa:
structure(c("chr10:101544447", "chr10:102053031", "chr10:102778767",
"chr10:102789831", "chr10:102989480", "chr10:102053031", "chr10:102053031",
"0", "6", "0", "0", "0", "0", "0", "0", "34", "24", "0", "0",
"34", "34", "0", "0", "0", "0", "0", "0", "7", "53", "0", "0",
"30", "12", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"chr10", "chr10", "chr10", "chr10", "chr10", "chr10", "chr10",
"101544447", "102053031", "102778767", "102789831", "102989480",
"102053031", "102053031", "A", "C", "C", "C", "C", "C", "C",
"T", "A", "T", "T", "T", "G", "G", "snp", "snp", "snp", "snp",
"snp", "snp:102053031:flat", "snp", "nonsynonymous SNV",
"intronic", "nonsynonymous SNV", "nonsynonymous SNV", "ncRNA_exonic",
"intronic", "intronic", "ABCC2:NM_000392:exon2:c.A116T:p.Y39F,",
"PKD2L1", "PDZD7:NM_024895:exon8:c.G1136A:p.R379Q,PDZD7:NM_001195263:exon8:c.G1136A:p.R379Q,",
"PDZD7:NM_024895:exon2:c.G146A:p.R49Q,PDZD7:NM_001195263:exon2:c.G146A:p.R49Q,",
"LBX1-AS1", "PKD2L1", "PKD2L1"), .Dim = c(7L, 15L), .Dimnames = list(
c("1", "2", "3", "4", "5", "6", "7"), c("start", "A", "C",
"G", "T", "N", "=", "-", "chr", "end", "REF", "ALT", "TYPE",
"refGene::location", "refGene::type")))
Expected result
ad.new
"0,53"
"34,6"
"24,0"
"0,30"
"0,12"
"0,13"
"34,7"
Something like this should work :
# apply the "normal" rule (non considering flat exceptions)
alts <- as.numeric(diag(testa[,testa[,"ALT"]]))
refs <- as.numeric(diag(testa[,testa[,"REF"]]))
res <- paste(refs,alts,sep=",")
# replace lines having TYPE ending with "flat"
flats <- grep('.*flat$',testa[,"TYPE"])
res[flats] <-
unlist(lapply(flats,function(x){
startId <- testa[x,"start"]
selection <- setdiff(which(testa[,"start"] == startId),r)
paste0("0,",sum(alts[selection]))
}))
ad.new <- as.matrix(res)
> ad.new
[,1]
[1,] "0,53"
[2,] "34,6"
[3,] "24,0"
[4,] "0,30"
[5,] "0,12"
[6,] "0,13"
[7,] "34,7"

Two Color Scales for geom_line in ggplot2

I have a chart (code to replicate will be below) that has two lines (and points) of data that need to be color coded, then three sets of confidence intervals (lines) which need to have their own color coding.
Unfortunately, ggplot sees the two calls to geom_line() and fits them all in the same scale.
Is there a way to have the central lines and dots have one scale (and legend entry) while the outer lines have a seperate scale (and legend entry)?
I've seen (complex) answers like ggplot2: Multiple color scales or shift colors systematically on different layers? but that relies on the old proto system which I believe has been phased out by now(?).
Thanks for any help.
Code to produce data and graphs. Sorry for the length:
exShapedMayGroup <- structure(list(Date = structure(c(14730, 14730, 14730, 14731,
14731, 14731, 14734, 14734, 14734, 14735, 14735, 14735, 14736,
14736, 14736, 14737, 14737, 14737, 14740, 14740, 14740, 14741,
14741, 14741, 14742, 14742, 14742, 14743, 14743, 14743, 14744,
14744, 14744, 14745, 14745, 14745, 14746, 14746, 14746, 14748,
14748, 14748, 14749, 14749, 14749, 14750, 14750, 14750, 14750,
14750, 14750, 14751, 14751, 14751, 14752, 14752, 14752, 14752,
14752, 14752, 14754, 14754, 14754, 14756, 14756, 14756, 14757,
14757, 14757, 14758, 14758, 14758, 14758, 14758, 14758, 14759,
14759, 14759, 14760, 14760, 14760), class = "Date"), Score = c(0.028,
0.028, 0.028, 0.03289, 0.03289, 0.03289, 0.034512, 0.034512,
0.034512, 0.0373496, 0.0373496, 0.0373496, 0.03201968, 0.03201968,
0.03201968, 0.040805744, 0.040805744, 0.040805744, 0.0344045952,
0.0344045952, 0.0344045952, 0.04017367616, 0.04017367616, 0.04017367616,
0.035998940928, 0.035998940928, 0.035998940928, 0.0342191527424,
0.0342191527424, 0.0342191527424, 0.09799532219392, 0.09799532219392,
0.09799532219392, 0.122746257755136, 0.122746257755136, 0.122746257755136,
0.0999570062041088, 0.0999570062041088, 0.0999570062041088, 0.0950656049632871,
0.0950656049632871, 0.0950656049632871, 0.0837224839706296, 0.0837224839706296,
0.0837224839706296, 0.00418, 0.00418, 0.00418, 0.0806379871765037,
0.0806379871765037, 0.0806379871765037, 0.009624, 0.009624, 0.009624,
0.0099792, 0.0099792, 0.0099792, 0.090740389741203, 0.090740389741203,
0.090740389741203, 0.0905523117929624, 0.0905523117929624, 0.0905523117929624,
0.0761218494343699, 0.0761218494343699, 0.0761218494343699, 0.0707874795474959,
0.0707874795474959, 0.0707874795474959, 0.02132336, 0.02132336,
0.02132336, 0.0636099836379967, 0.0636099836379967, 0.0636099836379967,
0.0550479869103974, 0.0550479869103974, 0.0550479869103974, 0.0466883895283179,
0.0466883895283179, 0.0466883895283179), Right = c("1", "2",
"3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3",
"1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1",
"2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2",
"3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3",
"1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1",
"2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2",
"3"), .id = c("0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0",
"0", "0", "1", "1", "1", "1", "1", "1", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0", "0", "0",
"0", "0", "0", "0", "0", "0"), Lower = c(0.0452301816389807,
0.0299531343622987, 0.0146760870856168, 0.0409430625769167, 0.0213788962381707,
0.00181472989942479, 0.0386359600820249, 0.0167646912483872,
-0.00510657758525054, 0.037279363974053, 0.0140514990324434,
-0.00917636590916623, 0.0364512577706185, 0.0123952866255743,
-0.0116606845194698, 0.0359359120595814, 0.0113645952035002,
-0.0132067216525811, 0.0356116886483614, 0.0107161483810601,
-0.0141793918862411, 0.035406383399575, 0.0103055378834873, -0.0147953076326005,
0.0352758647295475, 0.0100445005434323, -0.0151868636426829,
0.0351926859362388, 0.00987814295681498, -0.0154364000226088,
0.035139594640892, 0.00977196036612139, -0.0155956739086492,
0.0351056744462797, 0.00970411997689682, -0.0156974344924861,
0.0350839892725913, 0.00966074962952, -0.0157624900135513, 0.0350701204632195,
0.00963301201077625, -0.0158040964416669, 0.035061248392137,
0.00961526786861143, -0.0158307126549142, NA, NA, NA, 0.0350555718896789,
0.00960391486369513, -0.0158477421622886, NA, NA, NA, NA, NA,
NA, 0.0350519395924259, 0.00959665026918906, -0.0158586390540477,
0.0350496151941651, 0.00959200147266757, -0.01586561224883, 0.0350481276906492,
0.00958902646563569, -0.0158700747593778, 0.035047175734008,
0.00958712255235328, -0.0158729306293014, NA, NA, NA, 0.0350465665004368,
0.00958590408521094, -0.0158747583300149, 0.0350461765986017,
0.00958512428154069, -0.0158759280355203, 0.0350459270645606,
0.00958462521345864, -0.0158766766376434), Upper = c(0.0757842761923446,
0.0910613234690266, 0.106338370745709, 0.0800713952544086, 0.0996355615931546,
0.119199727931901, 0.0823784977493004, 0.104249766582938, 0.126121035416576,
0.0837350938572723, 0.106962958798882, 0.130190823740492, 0.0845632000607068,
0.108619171205751, 0.132675142350795, 0.0850785457717439, 0.109649862627825,
0.134221179483906, 0.0854027691829639, 0.110298309450265, 0.135193849717566,
0.0856080744317504, 0.110708919947838, 0.135809765463926, 0.0857385931017778,
0.110969957287893, 0.136201321474008, 0.0858217718950865, 0.11113631487451,
0.136450857853934, 0.0858748631904333, 0.111242497465204, 0.136610131739975,
0.0859087833850456, 0.111310337854428, 0.136711892323811, 0.085930468558734,
0.111353708201805, 0.136776947844877, 0.0859443373681059, 0.111381445820549,
0.136818554272992, 0.0859532094391883, 0.111399189962714, 0.136845170486239,
NA, NA, NA, 0.0859588859416464, 0.11141054296763, 0.136862199993614,
NA, NA, NA, NA, NA, NA, 0.0859625182388994, 0.111417807562136,
0.136873096885373, 0.0859648426371602, 0.111422456358658, 0.136880070080155,
0.0859663301406761, 0.11142543136569, 0.136884532590703, 0.0859672820973173,
0.111427335278972, 0.136887388460627, NA, NA, NA, 0.0859678913308885,
0.111428553746114, 0.13688921616134, 0.0859682812327236, 0.111429333549785,
0.136890385866846, 0.0859685307667647, 0.111429832617867, 0.136891134468969
)), .Names = c("Date", "Score", "Right", ".id", "Lower", "Upper"
), row.names = c(NA, 81L), class = "data.frame")
ggplot(exShapedMayGroup, aes_string(x="Date", y="Score")) + geom_line(aes_string(group=".id", colour=".id")) +
geom_point(aes_string(colour=".id")) + geom_line(aes_string(y="Lower", colour="Right")) +
geom_line(aes_string(y="Upper", colour="Right")) + scale_color_discrete(name="Limits")
P.S. Only using aes_string because this is called in a function which allows the user to input columns as a character.
The ggplot2 way to do this is to not use geom_line for your confidence bands. There's a geom built specifically for that: geom_ribbon.
ggplot(exShapedMayGroup, aes(x=Date, y=Score)) +
geom_ribbon(aes(ymin = Lower,ymax = Upper,fill = Right,group = Right),alpha = 0.25) +
geom_line(aes(group= .id, colour= .id)) +
geom_point(aes(colour = Right)) +
scale_color_discrete(name="Limits")
Obviously, I can't be sure that you'll be happy with how this looks, but this is generally how you approach this kind of graph in ggplot2. Note that I removed the aes_string uses in your code.
Unfortunately Hadley confirmed this is not possible. So I decided to use linetype instead (and not have anything distinguishing the points). This made everyone happy.
ggplot(exShapedMayGroup, aes_string(x="Date", y="Score")) +
geom_line(aes_string(group=".id", linetype=".id")) +
geom_point() +
geom_line(aes_string(y="Lower", colour="Right")) +
geom_line(aes_string(y="Upper", colour="Right")) +
scale_linetype_discrete(name="Group") + scale_color_discrete(name="Limits")
This is much simpler now, thanks to the Elio Campitelli's ggnewscale package
#install.packages("ggnewscale")
library(ggnewscale)
ggplot(exShapedMayGroup, aes(x=Date, y=Score)) +
geom_line(aes(color=.id)) +
geom_point(aes(color = .id)) +
scale_color_brewer(palette = "Dark2") +
ggnewscale::new_scale_color() +
geom_line(aes(y=Lower, colour=Right)) +
geom_line(aes(y=Upper, colour=Right)) +
scale_linetype_discrete(name="Group") +
scale_color_discrete(name="Limits")
Created on 2021-06-25 by the reprex package (v1.0.0)

Resources