Scatter plot with small pie charts with R - r

I have this data below called test1.melted. I also have the code to plot my data using package scatterpie, but due to inherent problem of scatterpie (if coordinates are not cartesian,i.e. equal horizontal and vertical distances), you would not get properly formatted plot. Is there a better way to plot this data without using scatterpie?
Data:
test1.melted<-structure(list(Wet_lab_dilution_A = structure(c(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L), .Label = c("A", "B", "C", "D", "E", "F",
"G", "H", "I", "J", "K", "L"), class = "factor"), TypeA = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("I", "II"), class = "factor"),
NA12878 = c(100L, 50L, 25L, 20L, 10L, 0L, 100L, 50L, 25L,
20L, 10L, 0L, 100L, 50L, 25L, 20L, 10L, 0L, 100L, 50L, 25L,
20L, 10L, 0L), NA12877 = c(0L, 50L, 75L, 80L, 90L, 100L,
0L, 50L, 75L, 80L, 90L, 100L, 0L, 50L, 75L, 80L, 90L, 100L,
0L, 50L, 75L, 80L, 90L, 100L), IBD = c(1.02, 0.619, 0.294,
0.244, 0.134, 0.003, 0.003, 0.697, 0.964, 0.978, 1, 1, 1.02,
0.619, 0.294, 0.244, 0.134, 0.003, 0.003, 0.697, 0.964, 0.978,
1, 1), variableA = c("tEst", "tEst", "tEst", "tEst", "tEst",
"tEst", "tEst", "tEst", "tEst", "tEst", "tEst", "tEst", "pair",
"pair", "pair", "pair", "pair", "pair", "pair", "pair", "pair",
"pair", "pair", "pair"), valueA = c(0.1, 59.8, 84.6, 89.2,
97.4, 100, 99.6, 56.4, 29.9, 24, 12.1, 0.1, 0.1, 51.08, 75.28,
80.09, 90.16, 100, 100, 48.09, 23.97, 18.81, 9.24, 0.08)), row.names = c(NA,
-24L), .Names = c("Wet_lab_dilution_A", "TypeA", "NA12878", "NA12877",
"IBD", "variableA", "valueA"), class = "data.frame")
code:
p<- ggplot() + geom_scatterpie(aes(x=valueA, y=IBD, group=TypeA), data=test1.melted,
cols=c("NA12878", "NA12877")) + coord_equal()+
facet_grid(TypeA~variableA)
p

Do you have to use a pie chart? (And you might; there's nothing wrong with them.)
Cause something like this could illustrate literally every variable in the dataset:
library(ggplot2)
test1.melted$NA12877 <- as.factor(test1.melted$NA12877)
test1.melted$NA12878 <- as.factor(test1.melted$NA12878)
p <- ggplot(data = test1.melted, aes(x=valueA, y=IBD, group=TypeA))
p <- p + geom_point(aes(colour=NA12877, fill = NA12878), stroke=3, size = 3, shape = 21)
p <- p + geom_text(aes(label = Wet_lab_dilution_A), size = 2)
p + facet_grid(TypeA ~ variableA) + theme_minimal()

Related

Ordering facets in a ggplot graph fails

I'm experiencing behaviour in ggplot2 I cannot understand.
I am trying to sort facets in a graph according to values of the y-axes. This works if I try to order the facets according to the value of the y-axes at x = "0", however ordering according to the value of the y-axes at x = "4" does not give the desired result.
All help would be much appreciated!
Here's my data:
my_df <- structure(list(days_incubated = c(0L, 0L, 0L, 0L, 4L, 4L, 4L,
4L, 10L, 10L, 10L, 10L, 17L, 17L, 17L, 17L, 24L, 24L, 24L, 24L,
66L, 66L, 66L, 66L, 81L, 81L, 81L, 81L, 94L, 94L, 94L, 94L, 116L,
116L, 116L, 116L), jar = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 2L, 3L, 4L, 1L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), co2 = c(15,
5, 50, 30, 2, 89, 14.0,
39.3, 1.2, 25.0, 10.27,
29.5, 5.54, 38.13, 27.33,
49.0, 28.62, 21.1, 24.0,
0.6, 1.04, 7.94, 1.2,
0.67, 1.9, 3.59, 4.92,
0.02, 13.2, 20.3, 21.52,
4.9, 6.6, 21.8, 12.28,
2.9)), row.names = c(NA, -36L), class = "data.frame")
And here's the code for the graph:
my_df %>%
group_by(days_incubated)%>%
mutate(co2_day0 = ifelse(days_incubated == 0, co2, NA),
jar = fct_reorder(factor(jar),
co2_day0,
mean,
na.rm = TRUE,
.desc = FALSE)) %>%
ggplot(aes(days_incubated, co2)) +
stat_summary(fun = mean) +
facet_wrap(vars(jar), scales = "free_y")
If you want to do all the calculations without breaking the pipe you could do :
library(dplyr)
library(ggplot2)
n <- 0
my_df %>%
mutate(jar = factor(jar,
jar[days_incubated == n][order(co2[days_incubated == n])])) %>%
ggplot(aes(days_incubated, co2)) +
stat_summary(fun = mean) +
facet_wrap(~jar, scales = "free_y")
With n = 4
n <- 4
my_df %>%
mutate(jar = factor(jar,
jar[days_incubated == n][order(co2[days_incubated == n])])) %>%
ggplot(aes(days_incubated, co2)) +
stat_summary(fun = mean) +
facet_wrap(~jar, scales = "free_y")

Trouble with GLMM with glmer in R: Error in pwrssUpdate...halvings failed to reduce deviance in pwrssUpdate

Here's a snipped of randomly selected data from my full dataframe:
canopy<-structure(list(Stage = structure(c(6L, 5L, 3L, 6L, 7L, 5L, 4L,
7L, 2L, 7L, 5L, 1L, 1L, 4L, 3L, 6L, 5L, 7L, 4L, 4L), .Label = c("milpa",
"robir", "jurup che", "pak che kor", "mehen che", "nu kux che",
"tam che"), class = c("ordered", "factor")), ID = c(44L, 34L,
18L, 64L, 54L, 59L, 28L, 51L, 11L, 56L, 33L, 1L, 7L, 25L, 58L,
48L, 36L, 51L, 27L, 66L), Sample = c(4L, 2L, 2L, 10L, 6L, 9L,
4L, 3L, 3L, 8L, 1L, 1L, 7L, 1L, 10L, 8L, 4L, 3L, 3L, 10L), Subsample = c(2L,
3L, 4L, 3L, 2L, 1L, 3L, 2L, 4L, 3L, 1L, 3L, 2L, 4L, 1L, 1L, 3L,
1L, 1L, 4L), Size..ha. = c(0.5, 0.5, 0.5, 0.5, 6, 0.5, 0.5, 0.25,
0.5, 6, 1, 1, 0.5, 2, 1, 0.5, 1, 0.25, 0.5, 2), Avg.Subsample.Canopy = c(94.8,
94.8, 97.92, 96.88, 97.14, 92.46, 93.24, 97.4, 25.64, 97.4, 94.8,
33.7, 13.42, 98.18, 85.44, 96.36, 97.4, 95.58, 85.7, 92.2), dec = c(0.948,
0.948, 0.9792, 0.9688, 0.9714, 0.9246, 0.9324, 0.974, 0.2564,
0.974, 0.948, 0.337, 0.1342, 0.9818, 0.8544, 0.9636, 0.974, 0.9558,
0.857, 0.922)), .Names = c("Stage", "ID", "Sample", "Subsample",
"Size..ha.", "Avg.Subsample.Canopy", "dec"), row.names = c(693L,
537L, 285L, 1017L, 853L, 929L, 441L, 805L, 173L, 889L, 513L,
9L, 101L, 397L, 913L, 753L, 569L, 801L, 417L, 1053L), class = "data.frame")
I am trying to code a GLMM of dec as a function of Stage and Size..ha.
The GLMM is necessary because each row represents a point Subsample measured within a larger Sample area. I am also using a binomial distribution given dec are proportional data.
I tried the model:
canopy.binomial.mod<-glmer(dec~Stage*Size..ha.+(1|Sample),family="binomial",data=canopy)
summary(canopy.binomial.mod)
but get the error:
Error in pwrssUpdate(pp, resp, tol = tolPwrss, GQmat = GQmat, compDev
= compDev, : (maxstephalfit) PIRLS step-halvings failed to reduce deviance in pwrssUpdate
I've seen online that this can be a result of needing to scale a predictor variable, so I tried:
cs. <- function(x) scale(x,scale=TRUE,center=TRUE)
canopy.binomial.mod<-glmer(dec~Stage*cs.(Size..ha.)+(1|Sample),family="binomial",data=canopy.rmna)
summary(canopy.binomial.mod)
Which doesn't seem to help. I also thought that maybe I'm asking too much of the model and it's not converging due to too many predictor variables, so let's remove the Size variable, which is of less interest to me.
canopy.binomial.mod<-glmer(dec~Stage+(1|Sample),family="binomial",data=canopy.rmna)
summary(canopy.binomial.mod)
Still no luck. Any ideas how to address this?

Matching colums and rows in a special condition

output1 <- output1 <- structure(list(row = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 214L, 214L,214L), col = c(17L, 17L, 17L, 17L, 17L, 17L, 16L, 110L, 111L,111L), cell = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 27244L, 27245L, 27245L), xcoord = c(783750L, 783750L, 783750L, 783750L, 783750L, 783750L,783725L, 786075L, 786100L, 786100L), ycoord = c(187050L, 187050L,187050L, 187050L, 187050L, 187050L, 187025L, 181725L, 181725L,181725L), species = structure(c(1L, 1L, 1L, 8L, 9L, 11L, 1L,3L, 3L, 3L), .Label = c("abiealba", "alnuinca", "alnuviri", "betupend","betupube", "fagusilv", "larideci", "piceabie", "pinucemb", "pinusilv","popunigr", "poputrem", "salicapr", "sorbaucu"), class = "factor"),age = c(100L, 20L, 10L, 100L, 100L, 100L, 100L, 30L, 70L,30L), biomass = c(0.1015, 0.0152, 0.0127, 0.5391, 0.02, 0.1584,0.1019, 0.0114, 0.0115, 0.0114), stems = c(1L, 10L, 10L,20L, 5L, 3L, 4L, 15L, 2L, 10L), slowGrowth = c(0L, 0L, 0L,0L, 14L, 0L, 0L, 0L, 0L, 0L), DBH = c(17.9273, 8.831, 8.2681,34.9717, 9.7366, 18.9254, 17.9523, 6.6486, 6.6793, 6.6486), height = c(14.0924, 8.0258, 7.625, 23.4468, 8.0478, 13.6345,14.1081, 3.6519, 3.6552, 3.6519), availableLight = c(0.0934,0.0807, 0.071, 0.4742, 0.0887, 0.101, 0.0985, 0.958, 0.9952,0.9624), light_rf = c(0.2619, 0.2067, 0.1708, 0.6971, 0.063,0.1049, 0.2896, 0.9768, 0.9972, 0.9793), LeafArea = c(5.4506,5.4506, 5.4506, 5.4506, 5.4506, 5.4506, 5.2884, 0.2307, 0.1732,0.1732), nitorgen_rf = c(0, 0, 0, 0, 0.1328, 0, 0, 0, 0,0), droughtIndex = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), moisture_rf = c(1,1, 1, 1, 1, 1, 1, 1, 1, 1), degreeDay_rf = c(0.258, 0.258,0.258, 0.4726, 0.5144, 0.237, 0.258, 0.1125, 0.1125, 0.1125), foliageWght = c(0.0093, 0.0031, 0.0028, 0.0265, 0.0036,0.0023, 0.0094, 5e-04, 5e-04, 5e-04), twigWght = c(0.0537,0.0115, 0.0096, 0.0513, 0.0149, 0.0847, 0.0538, 0.0109, 0.011,0.0109), boleWght = c(0.0384, 6e-04, 3e-04, 0.4613, 0.0015,0.0713, 0.0387, 0, 0, 0), deadFoliage = c(0.405, 0.405, 0.405,0.405, 0.405, 0.405, 0.3664, 0.0627, 0.0534, 0.0534), deadTwig = c(0.9887,0.9887, 0.9887, 0.9887, 0.9887, 0.9887, 0.9537, 0.7391, 0.8132,0.8132), deadbole = c(2.3166, 2.3166, 2.3166, 2.3166, 2.3166,2.3166, 2.3947, 0, 0, 0)), .Names = c("row", "col", "cell","xcoord", "ycoord", "species", "age", "biomass", "stems", "slowGrowth","DBH", "height", "availableLight", "light_rf", "LeafArea", "nitorgen_rf","droughtIndex", "moisture_rf", "degreeDay_rf", "foliageWght","twigWght", "boleWght", "deadFoliage", "deadTwig", "deadbole"), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 131023L, 131024L,131025L), class = "data.frame")
and
Details <- structure(list(fireID = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 1052L,1052L, 1052L), decade = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 100L, 100L,100L), cell = c(14150L, 14321L, 14320L, 14489L, 14323L, 13977L,14492L, 14461L, 14122L, 14123L), row = c(128L, 129L, 129L, 130L,129L, 127L, 130L, 130L, 128L, 128L), column = c(137L, 137L, 136L,135L, 139L, 136L, 138L, 107L, 109L, 110L), biomass = c(0.724241,0.652821, 0.776811, 0.860563, 0.649643, 0.751143, 0.760428, 20.5968,33.6653, 15.1725)), .Names = c("fireID", "decade", "cell", "row","column", "biomass"), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,12896L, 12897L, 12898L), class = "data.frame")
I want to match these two dataset by rows and cols. Actually, I did it with
aa <- merge.data.frame(Details, output1, by=c("cell","row"))
but the problem is I have many rows in output1 which has same coordinates. However I only want to get one coordinates for each row in my details output.
Any suggestions?
Thanks in advance.
If I understand the question correctly you need something like this:
aa <- aa[!duplicated(aa[c("row", "cell")]), ]
I am removing not unique combinations of row and cell because I would imagine that cell plays a role in your analysis since you use it in the merge. Otherwise:
aa <- aa[!duplicated(aa["row"]), ]

LPsolve Hybrid constraints

I use the following R code to optimize Soccer lineups for my fantasy sports league. It has been working great up until now, but a new wrinkle has been added into the list of constraints that I would like to resolve.
A lineup consists of 8 players. 1GK, 2D, 2M, 2F, & 1 Util.
When creating the model Matrix, I now have to account for hybrid player positions such as M/F or D/M
In R what is the correct way to add a 1 in the column for M and a 1 in the column for F if a players position is M/F? Is this the correct approach to resolve this or should I be looking at other ideas.
Working Solver code with GK D M F positions accounted for but not D/M or M/F
df <- read.csv("players.csv",encoding = "UTF-8")
mm <- cbind(model.matrix(as.formula("FP~Pos+0"), df))
mm <- cbind(mm, mm, 1, df$Salary, df$Salary, df$FP)
colnames(mm) <- c("D", "F", "GK", "M", "D", "F", "GK", "M", "tot", "salary", "minSal", "FP")
mm <- t(mm)
obj <- df$FP
dir <- c("<=", "<=", "<=", "<=", ">=", ">=", ">=", ">=", "==", "<=", ">=", "<=")
x <- 20000
vals <- c()
ptm <- proc.time()
for(i in 1:5){
rhs <- c(3, 3, 1, 3, 2, 2, 1, 2, 8, 50000, 49500, x)
lp <- lp(direction = 'max',
objective.in = obj,
all.bin = T,
const.rhs = rhs,
const.dir = dir,
const.mat = mm)
vals <- c(vals, lp$objval)
x <- lp$objval - 0.00001
df$selected <- lp$solution
lineup <- df[df$selected == 1, ]
lineup = subset(lineup, select = -c(selected))
lineup <- lineup %>%
arrange(Pos)
print("---- Start ----")
print(i)
print(lineup)
print(sum(lineup$FP))
print(mean(lineup$own, na.rm = TRUE))
print(sum(lineup$Salary))
print(sum(lineup$S))
print("---- END ----")
}
proc.time() - ptm
Here is a sample pool of approx 100 players with a few hybrid players included.
structure(list(Name = structure(c(104L, 105L, 92L, 16L, 84L,
53L, 85L, 37L, 21L, 34L, 100L, 101L, 83L, 31L, 14L, 35L, 98L,
59L, 60L, 5L, 6L, 78L, 57L, 89L, 26L, 17L, 74L, 63L, 33L, 71L,
75L, 41L, 9L, 39L, 12L, 1L, 29L, 7L, 2L, 68L, 73L, 90L, 46L,
72L, 79L, 50L, 88L, 20L, 97L, 64L, 67L, 3L, 94L, 4L, 22L, 103L,
52L, 47L, 30L, 58L, 10L, 44L, 28L, 38L, 23L, 15L, 49L, 69L, 81L,
43L, 99L, 93L, 32L, 56L, 82L, 91L, 62L, 36L, 70L, 48L, 11L, 77L,
27L, 51L, 25L, 24L, 65L, 96L, 42L, 18L, 102L, 86L, 76L, 87L,
45L, 61L, 40L, 95L, 8L, 55L, 13L, 66L, 80L, 19L, 54L), .Label = c(" Bojan",
" Oscar", " Willian", "Aaron Ramsey", "Abel Hernandez", "Adam Smith",
"Adama Diomande", "Adlene Guedioura", "Adnan Januzaj", "Ahmed Elmohamady",
"Alex Iwobi", "Alex Oxlade-Chamberlain", "Alexis Sanchez", "Andre Gray",
"Andrew Robertson", "Andros Townsend", "Anthony Martial", "Antonio Valencia",
"Ben Mee", "Branislav Ivanovic", "Calum Chambers", "Cedric Soares",
"Cesc Fabregas", "Charlie Daniels", "Christian Fuchs", "Curtis Davies",
"Daley Blind", "Daniel Drinkwater", "David de Gea", "Demarai Gray",
"Diego Costa", "Donald Love", "Dusan Tadic", "Eden Hazard", "Eldin Jakupovic",
"Erik Pieters", "Etienne Capoue", "Fernando Llorente", "Gareth Barry",
"Glenn Whelan", "Gylfi Sigurdsson", "Hector Bellerin", "Idrissa Gueye",
"Jack Cork", "Jack Rodwell", "Jason Puncheon", "Jefferson Montero",
"Jeremain Lens", "Jeremy Pied", "Jermain Defoe", "Joe Allen",
"Joel Ward", "John Obi Mikel", "Jordi Amat", "Jordon Ibe", "Joshua King",
"Juan Mata", "Kasper Schmeichel", "Kevin Mirallas", "Kyle Naughton",
"Laurent Koscielny", "Leighton Baines", "Leroy Fer", "Lukasz Fabianski",
"Maarten Stekelenburg", "Marc Albrighton", "Mason Holgate", "Matt Targett",
"Matthew Lowton", "Max Gradel", "Michy Batshuayi", "Modou Barrow",
"Nacho Monreal", "Nathan Redmond", "Nordin Amrabat", "Pape Souare",
"Papy Djilobodji", "Patrick van Aanholt", "Paul Pogba", "Phil Bardsley",
"Pierre-Emile HĂžjbjerg", "Ramiro Funes Mori", "Riyad Mahrez",
"Robert Snodgrass", "Ross Barkley", "Ryan Fraser", "Sam Clucas",
"Sam Vokes", "Santiago Cazorla", "Serge Gnabry", "Shane Long",
"Shaun Maloney", "Simon Francis", "Stephen Kingsley", "Stephen Ward",
"Steven Davis", "Steven Defour", "Theo Walcott", "Thibaut Courtois",
"Tom Heaton", "Wayne Rooney", "Wayne Routledge", "Wilfried Zaha",
"Xherdan Shaqiri", "Zlatan Ibrahimovic"), class = "factor"),
Salary = c(7000L, 9600L, 5700L, 7100L, 6500L, 3200L, 7800L,
4200L, 3300L, 8600L, 4200L, 7900L, 9900L, 8700L, 7700L, 4300L,
6700L, 5600L, 3700L, 6600L, 4700L, 5700L, 6600L, 7200L, 3500L,
7300L, 5900L, 4300L, 7700L, 7100L, 4000L, 9100L, 7400L, 4000L,
5800L, 5700L, 5600L, 6300L, 6800L, 4500L, 5100L, 3400L, 5700L,
5100L, 8000L, 7800L, 7000L, 5100L, 4900L, 4500L, 3300L, 8300L,
3200L, 6600L, 4900L, 6300L, 4400L, 4200L, 4800L, 5200L, 5200L,
4500L, 4300L, 7100L, 6500L, 4100L, 3000L, 3800L, 4700L, 4600L,
5800L, 4600L, 4200L, 6100L, 3500L, 6800L, 5800L, 4800L, 7300L,
5000L, 5000L, 3300L, 4200L, 3900L, 6100L, 5500L, 5400L, 4700L,
4700L, 4600L, 4400L, 3400L, 4300L, 4900L, 4600L, 4000L, 3500L,
3600L, 3300L, 4800L, 9300L, 7900L, 3700L, 3400L, 2800L),
Position = structure(c(5L, 3L, 2L, 5L, 5L, 5L, 5L, 5L, 1L,
6L, 4L, 3L, 6L, 3L, 3L, 4L, 6L, 6L, 1L, 3L, 1L, 1L, 5L, 5L,
1L, 6L, 6L, 5L, 5L, 3L, 6L, 5L, 5L, 5L, 6L, 6L, 4L, 3L, 5L,
1L, 2L, 5L, 5L, 6L, 5L, 3L, 3L, 2L, 5L, 4L, 1L, 5L, 1L, 5L,
1L, 6L, 1L, 6L, 6L, 4L, 1L, 5L, 5L, 3L, 5L, 1L, 1L, 1L, 5L,
5L, 4L, 1L, 1L, 3L, 1L, 3L, 2L, 1L, 6L, 3L, 6L, 1L, 1L, 5L,
1L, 2L, 4L, 5L, 1L, 1L, 5L, 5L, 1L, 5L, 5L, 1L, 5L, 1L, 5L,
6L, 6L, 5L, 1L, 1L, 1L), .Label = c("D", "D/M", "F", "GK",
"M", "M/F"), class = "factor"), FP = c(23.5, 21.75, 21, 19.75,
17.5, 17.333, 16.625, 16.5, 16.5, 16.25, 16, 15.25, 14.875,
14.25, 13.75, 13.5, 13.375, 13.25, 12.875, 12.75, 12.75,
12.5, 12.375, 12, 11.75, 11.625, 11.375, 11, 10.875, 10.625,
10.5, 10.375, 10.125, 10, 9.625, 9.625, 9.5, 9.25, 9.125,
9.125, 9, 9, 8.875, 8.875, 8.75, 8.75, 8.5, 8.5, 8.5, 8.5,
8.5, 8.25, 8.25, 8, 8, 7.875, 7.875, 7.875, 7.75, 7.5, 7.5,
7.5, 7.5, 7.25, 7.25, 7.125, 7, 6.875, 6.625, 6.625, 6.5,
6.5, 6.5, 6.25, 6.25, 6.125, 6.125, 6.125, 6, 6, 6, 6, 5.875,
5.875, 5.75, 5.75, 5.75, 5.75, 5.75, 5.75, 5.75, 5.75, 5.625,
5.5, 5.5, 5.5, 5.5, 5.375, 5.375, 5.25, 5.125, 5, 5, 5, 5
), teamAbbrev = structure(c(11L, 9L, 7L, 5L, 7L, 4L, 6L,
14L, 1L, 4L, 3L, 9L, 8L, 4L, 3L, 7L, 1L, 6L, 13L, 7L, 2L,
12L, 9L, 1L, 7L, 9L, 10L, 13L, 10L, 4L, 14L, 13L, 12L, 6L,
1L, 11L, 9L, 7L, 4L, 10L, 1L, 1L, 5L, 13L, 9L, 12L, 3L, 4L,
3L, 13L, 6L, 4L, 13L, 1L, 10L, 5L, 5L, 13L, 8L, 8L, 7L, 13L,
8L, 13L, 4L, 7L, 10L, 3L, 10L, 6L, 4L, 2L, 12L, 2L, 6L, 10L,
6L, 11L, 2L, 12L, 1L, 12L, 9L, 11L, 8L, 2L, 6L, 10L, 1L,
9L, 13L, 2L, 5L, 7L, 12L, 1L, 11L, 3L, 14L, 2L, 1L, 8L, 11L,
3L, 13L), .Label = c("ARS", "BOU", "BUR", "CHE", "CRY", "EVE",
"HUL", "LEI", "MU", "SOU", "STK", "SUN", "SWA", "WAT"), class = "factor")), .Names = c("Name",
"Salary", "Position", "FP", "teamAbbrev"), class = "data.frame", row.names = c(NA,
-105L))
By using an empty matrix and filling the rows with the correct values for each position I was able to get this to work.
#### SOLVER ##### ----
mm <- matrix(0, nrow = 8, ncol = nrow(df))
# Goal Keeper
j<-1
i<-1
for (i in 1:nrow(df)){
if (df$Pos[i]=="GK")
mm[j,i]<-1
}
# Defender
j<-2
i<-1
for (i in 1:nrow(df)){
if (df$Pos[i]=="D")
mm[j,i]<-1
}
# Midfielder
j<-3
i<-1
for (i in 1:nrow(df)){
if (df$Pos[i]=="M" ||
df$Pos[i]=="M/F")
mm[j,i]<-1
}
# Forward
j<-4
i<-1
for (i in 1:nrow(df)){
if (df$Pos[i]=="F" ||
df$Pos[i]=="M/F")
mm[j,i]<-1
}
# Utility
j<-5
i<-1
for (i in 1:nrow(df)){
if (!df$Pos[i]=="GK")
mm[j,i]<-1
}
# Salary
mm[6, ] <- df$Salary
mm[7, ] <- df$FP
mm[8, ] <- 1
# rbind existing matrix to itself to set minimum constraints
mm <- rbind(mm, mm[1:5,])
i<-1
objective.in <- df$FP
const.mat <- mm
const.dir <- c("<=", "<=", "<=", "<=", "<=", "<=", "<=", "==",
">=", ">=", ">=", ">=", ">=")
x <- 20000
vals <- c()
for(i in 1:5){
const.rhs <- c(1, 4, 4, 4, 7, 50000, x, 8, # max for each contraint
1, 2, 2, 2, 7) # min for each constraint
sol <- lp(direction = "max", objective.in, # maximize objective function
const.mat, const.dir, const.rhs, # constraints
all.bin = TRUE)
vals <- c(vals, sol$objval)
x <- sol$objval - 0.00001
inds <- which(sol$solution == 1)
sum(df$salary[inds])
solution<-df[inds, ]
solution <- solution[,-c(8)]
solution <- solution %>%
arrange(Pos)
print("---- Start ----")
print(i)
print(solution)
print(sum(solution$FP))
print(sum(solution$Salary))
print(sum(solution$S))
print("---- END ----")
}

R - Lattice xyplot - How do you add error bars to groups and summary lines?

I'm posting this question because the very similar question here has not been answered until now.
I have been asked to plot the mean +/- SEM of my whole cohort of patients over the xyplot() that depicts the values of all patients. The data used represents intraoperative cardiovascular findings from patients undergoing surgery.
This is my data.frame called df
dput(df)
structure(list(Name = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("DE", "JS", "KG", "MK", "TG", "WT"), class = "factor"),
Time = structure(c(1L, 2L, 3L, 4L, 5L, 7L, 8L, 1L, 2L, 3L,
4L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 7L, 8L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 2L, 3L, 4L, 5L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L), .Label = c("T1", "T2", "T3", "T4", "T5", "T6", "T7",
"T8"), class = "factor"), Dobut = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L), .Label = c("No", "Yes"
), class = "factor"), DobutDose = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
4L, 6L, 8L, 8L, 8L, 8L, 8L, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 5L, 5L, NA), CI = c(1.4, 2.3, 1.3, 1.8, 2.1,
2, 2.1, 2.1, 2.3, 1.9, 1.6, 2, 2.4, 2.7, 2.6, 2.7, 2.6, 2.3,
2.4, 2.6, 0.9, 2.5, 2.1, 1.6, 1.5, 1.8, 2, 2, 1.9, 2.1, 2.3,
2, 2.4, 2.3, 2.6, 2.4, 2, 2.2, 1.6, 2.1, 2.5, 2.8), SvO2 = c(57L,
65L, 47L, 45L, 51L, 60L, 56L, 70L, 85L, 75L, 79L, 82L, 73L,
77L, 78L, 73L, 71L, 73L, 80L, 74L, 41L, 66L, 51L, 51L, 49L,
54L, 68L, 48L, 80L, 70L, 71L, 69L, 74L, 79L, 77L, 77L, 75L,
74L, 70L, 79L, 80L, 79L), SVRI = c(4000L, 1983L, 4000L, 2444L,
1981L, 2120L, 2514L, 2971L, 2157L, 3747L, 4300L, 3200L, 2867L,
1778L, 1169L, 1215L, 1262L, 1461L, 1600L, 1692L, 4978L, 1760L,
2019L, 2650L, 2827L, 2356L, 1800L, 2840L, 2063L, 2248L, 1948L,
2160L, 1733L, 2296L, 2677L, 2100L, 2640L, 2655L, 3950L, 2210L,
2848L, 2543L), MAP = c(80L, 65L, 86L, 74L, 67L, 65L, 74L,
90L, 70L, 90L, 96L, 94L, 100L, 82L, 60L, 61L, 62L, 62L, 69L,
71L, 70L, 71L, 77L, 73L, 75L, 77L, 61L, 85L, 65L, 74L, 70L,
67L, 69L, 74L, 92L, 71L, 88L, 93L, 89L, 79L, 97L, 97L), CVP = c(10L,
8L, 21L, 19L, 15L, 12L, 8L, 12L, 8L, 11L, 10L, 14L, 14L,
22L, 22L, 20L, 21L, 20L, 21L, 16L, 14L, 16L, 24L, 20L, 22L,
24L, 16L, 14L, 16L, 15L, 14L, 13L, 17L, 8L, 5L, 8L, 22L,
20L, 20L, 21L, 8L, 8L), PAP = c(23L, 22L, 36L, 36L, 34L,
32L, 22L, 33L, 28L, 36L, 36L, 40L, 37L, 37L, 40L, 35L, 35L,
34L, 38L, 36L, 45L, 43L, 55L, 49L, 52L, 54L, 43L, 47L, 27L,
25L, 23L, 22L, 28L, 21L, 20L, 25L, 33L, 33L, 38L, 35L, 33L,
29L), PCWP = c(15L, 11L, 28L, 26L, 23L, 21L, 11L, 26L, NA,
NA, 25L, 25L, NA, 27L, NA, NA, NA, NA, NA, NA, 30L, NA, NA,
NA, NA, NA, NA, NA, 19L, NA, NA, NA, NA, NA, 16L, NA, NA,
NA, NA, NA, NA, NA)), .Names = c("Name", "Time", "Dobut",
"DobutDose", "CI", "SvO2", "SVRI", "MAP", "CVP", "PAP", "PCWP"
), class = "data.frame", row.names = c(NA, -42L))
Now the first xyplot I made for the variable CI looks like this
require(lattice)
xyplot(CI~Time, groups=Name, data=df, ty=c("l", "p"),
+ ,xlab="Measurement Time Point",
ylab=expression("CI"~(l/min/m^"2")), main="Cardiac Index")
Now I was able to add the mean (black line) of the whole cohort, by doing the following
xyplot(CI~Time, groups=Name, data=df, ty=c("l", "p"),
panel = function(x, y, ...) {
panel.xyplot(x, y, ...)
panel.linejoin(x, y, horizontal = FALSE,..., col="black", lty=1, lwd=4)
}
,xlab="Measurement Time Point",
ylab=expression("CI"~(l/min/m^"2")), main="Cardiac Index")
Now I'd like to add +/- SE to the mean as a line above/below the mean, but nowhere can I find how to do this.
What I can do is using the latticeExtra package is add the loess line +/- SE, as below, but that's not the correct mathematical function I'm looking for. I've left the mean line in there to illustrate the difference between the two.
require(latticeExtra)
xyplot(CI~Time, groups=Name, data=df, ty=c("l", "p"),
+ panel = function(x, y, ...) {
+ panel.xyplot(x, y, ...)
+ panel.linejoin(x, y, horizontal = FALSE,..., col="black", lty=1, lwd=4)
+ panel.smoother(x,y,se=TRUE, col.se="grey")
+ }
+ ,xlab="Measurement Time Point",
ylab=expression("CI"~(l/min/m^"2")), main="Cardiac Index")
I have performed an extensive search through SO and the internet, but I haven't been able to find the right function to do this.
Help is very much appreciated! Thanks.
You could create your own panel function to plot a +/- SD region. For example
#new panel function
panel.se <- function(x, y, col.se=plot.line$col, alpha.se=.25, ...) {
plot.line <- trellis.par.get("plot.line")
xs <- if(is.factor(x)) {
factor(c(levels(x) , rev(levels(x))), levels=levels(x))
} else {
xx <- sort(unique(x))
c(xx, rev(xx))
}
means <- tapply(y,x, mean, na.rm=T)
vars <- tapply(y,x, var, na.rm=T)
Ns <- tapply(!is.na(y),x, sum)
ses <- sqrt(vars/Ns)
panel.polygon(xs, c(means+ses, rev(means-ses)), col=col.se, alpha=alpha.se)
}
and then you can use it like
#include new panel function
xyplot(CI~Time, groups=Name, data=df, ty=c("l", "p"),
panel = function(x, y, ...) {
panel.se(x,y, col.se="grey")
panel.xyplot(x, y, ...)
panel.linejoin(x, y, horizontal = FALSE,..., col="black", lty=1, lwd=4)
}
,xlab="Measurement Time Point",
ylab=expression("CI"~(l/min/m^"2")), main="Cardiac Index")
which results in

Resources