Related
I am trying to run a sem with a random effect in piecewiseSEM. My model runs with no error, and sem.fit() also runs with no error or warnings. However, when I run sem.coefs() I get the following warning:
1: In if (grepl("cbind", deparse(formula(x)))) all.vars(formula(x))[-c(1:2)] else all.vars(formula(x)) :
the condition has length > 1 and only the first element will be used
Any ideas what this warning is about or what it means? Given it's a warning and not an error, the code still runs and give me estimates, but can I trust the estimates?
Thanks!
EDIT
#code:
library(piecewiseSEM)
library(nlme)
avg.forb<-list( lme(nitrogen_variation~nat+impervious+precip.variation,random=~1|site/species,control = lmeControl(opt = "optim"),forb), lme(po4_variation~nat+impervious+precip.variaton,random=~1|site/species,control = lmeControl(opt = "optim"),forb),
lme(nitrogen~nat +impervious+precip.variation,random=~1|site/species,control = lmeControl(opt = "optim"), forb),
lme(po4 ~nat +impervious+precip.variation,random=~1|site/species,control = lmeControl(opt = "optim"),forb), lme(avg.height~nat+impervious+po4+po4_variation+nitrogen+nitrogen_variation+precip.variation+n_i, random=~1|site/species,control =lmeControl(opt="optim"),forb), lme(avg.culms~nat+impervious+po4+po4_variation+nitrogen+nitrogen_variation+precip.variation+n_i,random=~1|site/species,control = lmeControl(opt = "optim"), forb), lme(avg.chloro~nat+impervious+po4+po4_variation+nitrogen+nitrogen_variation+precip.variation+n_i,random=~1|site/species, control =lmeControl(opt="optim"),forb), lme(avg.sla~nat+impervious+po4+po4_variation+nitrogen+nitrogen_variation+precip.variation+n_i,random=~1|site/species, control = lmeControl(opt = "optim"),forb))
sem.fit(avg.forb, conditional=T, forb) #this code gives the above error message
#data subset:
structure(list(site = structure(c(1L, 1L, 1L, 2L, 2L, 3L), .Label = c("Baker", "Cronkelton", "Delaware"), class = "factor"), species = structure(c(1L, 4L, 6L, 2L, 3L, 5L), .Label = c("apocynum cannabinum", "aster ericoides", "aster lanceolatus var. interior", "cirsium arvense", "impatiens capensis", "typha angustifolia"), class = "factor"), n_i = structure(c(2L,
1L, 1L, 2L, 2L, 2L), .Label = c("i", "n"), class = "factor"),nat=structure(c(1L, 1L, 1L, 1L, 1L, 2L), .Label = c("1", "2"), class = "factor"), impervious = structure(c(2L, 2L, 2L, 1L, 1L, 1L), .Label = c("1", "2"), class = "factor"), precip_variation = c(70.24882178, 70.24882178, 70.24882178, 21.92460821, 21.92460821, 18.90115299), po4 = c(-2.203425667,
-2.204119983, -2.20481541, -1.845271793, -1.844967771, -2.417936637), po4_variation = c(0.8011, 0.801, 0.8009, 0.4839, 0.484, 0.5229), nitrogen = c(0.00627, 0.00626, 0.00625, 0.00432, 0.00433, 0.01018), nitrogen_variation = c(0.7739, 0.7738, 0.7737, 0.5435, 0.5436, -0.1251), avg.height = c(99.1, 113.5559506, 191.4111012, 73.72222025, 35.42222025, 59.52222025), avg.culms = c(0.492915384, 0.78612011, 0.884606749, 0.96483549, 0.819543936, 0.831087338), avg.sla = c(179.3510333, 149.0332471, 68.77888941, 334.2177912, 798.7581389, 443.2005556), avg.chloro = c(0.900670513, 0.790832282, 0.965532685, 0.565585484, 1.106203493, 0.970209082)), .Names = c("site", "species", "n_i", "nat", "impervious", "precip_variation", "po4", "po4_variation", "nitrogen", "nitrogen_variation", "avg.height", "avg.culms", "avg.sla", "avg.chloro"), row.names = c(NA, 6L), class = "data.frame")
I have a data looks like the following df
df<- structure(list(V1 = structure(c(5L, 1L, 2L, 3L, 4L), .Label = c("DNAJC11;FGOTG",
"MAPK14", "PPIB", "RBX1", "USP14"), class = "factor"), V2 = structure(c(4L,
3L, 2L, 1L, 1L), .Label = c("", "DNAJC9", "MAPK14", "USP14"), class = "factor"),
V3 = structure(c(3L, 2L, 4L, 5L, 1L), .Label = c("", "DNAJC11;FGOTG",
"GCLC", "GSR", "STIP1"), class = "factor")), .Names = c("V1",
"V2", "V3"), class = "data.frame", row.names = c(NA, -5L))
I want to merge all columns into one and then keep the unique ones
for example the output should look like this
USP14
DNAJC11;FGOTG
MAPK14
PPIB
RBX1
DNAJC9
GCLC
GSR
STIP1
I tried to use meltfunction but I could not figure out how to do this, any comment is appreciated. Thanks
unique(as.vector(as.matrix(df)))
To remove the entries with no characters:
vec<-unique(as.vector(as.matrix(df)))
vec[-which(vec=="")]
or, courtesy #rawr
Filter(nzchar, unique(as.vector(as.matrix(df))))
I have this data frame. It is a very small subset of my actual data frame but the column names everything else is the same:
This is the data frame that has the data:
dput(p)
structure(list(Hostname = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("server101",
"server102", "server103", "server104", "test101", "app101d",
"web25", "web26", "web111", "web11", "web123", "tomcat101", "tomcat103",
"tomcat104"), class = "factor"), Date = structure(c(1441373431,
1441372531, 1441737938, 1441337431, 1441374331, 1441367131), class = c("POSIXct",
"POSIXt"), tzone = ""), Cpubusy = c(22, 21, 20, 28, 22, 20),
UsedPercentMemory = c(3L, 3L, 21L, 3L, 3L, 4L), App = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("WEB", "DB"), class = "factor"),
HA = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("server3456",
"backup101", "ha123", "No HA Host", "No HA Host", "server120",
"server234", "server666", "No HA Host"), class = "factor"),
DR = structure(c(5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Cannot login to DR Host",
"dr101", "dr345", "dr444", "dr5678", "No DR Host", "drserver11",
"dr666", "No HA Host", "No HA Host", "No HA Host"), class = "factor")), .Names = c("Hostname",
"Date", "Cpubusy", "UsedPercentMemory", "App", "HA", "DR"), row.names = c(NA,
6L), class = "data.frame")
This is my applition server mapping data frame:
dput(c)
structure(list(App = structure(c(1L, 2L, 2L, 2L, 2L, 1L), .Label = c("WEB",
"APP"), class = "factor"), Prod = structure(c(1L, 5L, 6L, 4L,
3L, 2L), .Label = c("server101", "server102", "server104", "server106",
"server107", "server109"), class = "factor"), HA = structure(c(1L,
3L, 3L, 3L, 3L, 2L), .Label = c("server3456", "server3456", "No HA Host"
), class = "factor"), DR = structure(c(1L, 2L, 2L, 2L, 2L, 3L
), .Label = c("dr5678", "No DR Host", "No DR Host"), class = "factor")), .Names = c("App",
"Prod", "HA", "DR"), row.names = c(NA, 6L), class = "data.frame")
This is just a small subset of data. I am trying to run knitr to export my analysis to pdf file as follows:
application<-unique(c$App, drop=TRUE)
application<-droplevels(application)
metrics<-unique(colnames(pp[,c(3:4)]))
env<-unique(colnames(c[,c(2:4)]))
I have this chunk in knitr to create charts in each section:
```{r qplot,fig.width=10, fig.height=8, message=FALSE, results = 'asis', echo=FALSE, warning=FALSE, fig.cap="long caption", fig.scap="short"}
library(ggplot2)
library(knitr)
for (product in application){
product<-gsub("\\(", "_",product)
product<-gsub("\\)", "_",product)
for(en in env){
tryCatch({
mergedData <- merge(pp, c, by.x=c("Hostname"),by.y=en)
cat(paste(product, en, " - Environment"))
cat("\n")
for(m in metrics){
p<-ggplot(mergedData,aes(Date, m, group=Hostname, colour=Hostname))+geom_line()+
geom_smooth(method="lm", se=T, colour="blue")+
facet_wrap(~Hostname)+theme_bw()+
ggtitle(m)+
theme(strip.background = element_rect(colour="blue", fill="#b2d8ff"),axis.text.x = element_text(angle = 30,hjust = 1),plot.background = element_rect(size = 1, colour="black",linetype = "solid"))+
scale_fill_brewer(palette="RdYlGn")
print(p)
cat("\n")
}
},error=function(e) {
print(paste(product, "does not have - ", en, "- Environment"))
print("\n")
})
}
}
```
It is taking extremely long time to run this, any ideas how I could optimize it to run in a short time?
Hopefully someone here will be able to help me with a problem that I'm having with a ggplot script I'm trying to get right. The script will be used many times with different data, so it needs to be relatively flexible. I've got it almost where I want it, but I've come across a problem I haven't been able to solve.
The script is for a line graph with labels for each line in the right hand margin. Sometimes the graph is faceted, other times it is not.
The piece I'm having trouble with is that I would like to color code the labels in the right margin as black if there was no significant change over time, green if there was positive change, and red if there was negative change. I've got a script that works to carry this out when I only have a single facet, but as soon as I have multiple facets in the graph, the color coding of the labels gives the following error
Error: Incompatible lengths for set aesthetics:
Below is the script with data with multiple facets. The problem seems to be in the way that I'm specifying color in the geom_text line. If I delete the color call in the geom_text line in the script, then I get the attributes printed in the correct place, just not colored. I'm really at a loss on this one. This is my first post here, so let me know if I've done anything wrong with my post.
WITH MULTIPLE FACETS (DOES NOT WORK)
require(ggplot2)
require(grid)
require(zoo)
require(reshape)
require(reshape2)
require(directlabels)
time.data<-structure(list(Attribute = structure(c(1L, 1L, 2L, 2L, 3L, 3L,
4L, 4L, 5L, 5L, 6L, 6L), .Label = c("Taste 1", "Taste 2", "Taste 3",
"Use 1", "Use 2", "Use 3"), class = "factor"), Attribute.Category = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Nutritional/Usage",
"Taste/Quality"), class = "factor"), Attribute.Order = c(1L,
1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L), Category.Order = c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), Color = structure(c(1L,
1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L), .Label = c("#084594",
"#2171B5", "#4292C6", "#6A51A3", "#807DBA", "#9E9AC8"), class = "factor"),
value = c(75L, 78L, 90L, 95L, 82L, 80L, 43L, 40L, 25L, 31L,
84L, 84L), Date2 = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L), .Label = c("1/1/2013", "9/1/2012"), class = "factor")), .Names = c("Attribute",
"Attribute.Category", "Attribute.Order", "Category.Order", "Color",
"value", "Date2"), class = "data.frame", row.names = c(NA, -12L
))
label.data<-structure(list(7:12, Attribute = structure(1:6, .Label = c("Taste 1",
"Taste 2", "Taste 3", "Use 1", "Use 2", "Use 3"), class = "factor"),
Attribute.Category = structure(c(2L, 2L, 2L, 1L, 1L, 1L), .Label = c("Nutritional/Usage",
"Taste/Quality"), class = "factor"), Attribute.Order = 1:6,
Category.Order = c(1L, 1L, 1L, 2L, 2L, 2L), Color = structure(1:6, .Label = c("#084594",
"#2171B5", "#4292C6", "#6A51A3", "#807DBA", "#9E9AC8"), class = "factor"),
Significance = structure(c(2L, 3L, 1L, 1L, 3L, 2L), .Label = c("neg",
"neu", "pos"), class = "factor"), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "1/1/2013", class = "factor"),
value = c(78L, 95L, 80L, 40L, 31L, 84L), Date2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "2013-01-01", class = "factor"),
label.color = structure(c(1L, 2L, 3L, 3L, 2L, 1L), .Label = c("black",
"forestgreen", "red"), class = "factor")), .Names = c("",
"Attribute", "Attribute.Category", "Attribute.Order", "Category.Order",
"Color", "Significance", "variable", "value", "Date2", "label.color"
), class = "data.frame", row.names = c(NA, -6L))
color.palette<-as.character(unique(time.data$Color))
time.data$Date2<-as.Date(time.data$Date2,format="%m/%d/%Y")
plot<-ggplot()+
geom_line(data=time.data,aes(as.numeric(time.data$Date2),time.data$value,group=time.data$Attribute,color=time.data$Color),size=1)+
geom_text(data=label.data,aes(x=Inf, y=label.data$value, label=paste(" ",label.data$Attribute)),
color=label.data$label.color,
size=4,vjust=0, hjust=0,na.rm=T)+
facet_grid(Attribute.Category~.,space="free")+
theme_bw()+
scale_x_continuous(breaks=as.numeric(unique(time.data$Date2)),labels=format(unique(time.data$Date2),format = "%b %Y"))+
theme(strip.background=element_blank(),
strip.text.y=element_blank(),
legend.text=element_blank(),
legend.title=element_blank(),
plot.margin=unit(c(1,5,1,1),"cm"),
legend.position="none")+
scale_colour_manual(values=color.palette)
gt3 <- ggplot_gtable(ggplot_build(plot))
gt3$layout$clip[gt3$layout$name == "panel"] <- "off"
grid.draw(gt3)
Some problems:
Inside your aesthetic declarations, you should not be referencing the data columns as time.data$Date2, but just as Date2. The data argument specifies where to look for that information (which needs to all be in the same data.frame for a given layer, but, as you take advantage of, can vary layer to layer).
In the geom_text call, color was not inside the aes call; if you are mapping it to data which is in the data.frame, you have to have it inside the aes call. This would throw a different error after fixing the first part because then it would not be able to find label.color anywhere because it would not know to look inside label.data.
Fixing those, then the scale_colour_manual complains that there are 9 colors and you have only supplied 6. That is because there are 6 colors from the lines and 3 from the text. Since you specified these as actual color names, you can just use scale_colour_identity.
Putting this all together:
plot <- ggplot()+
geom_line(data=time.data, aes(as.numeric(Date2), value,
group=Attribute, color=Color),
size=1)+
geom_text(data=label.data, aes(x=Inf, y=value,
label=paste(" ",Attribute),
color=label.color),
size=4,vjust=0, hjust=0)+
facet_grid(Attribute.Category~.,space="free") +
scale_x_continuous(breaks=as.numeric(unique(time.data$Date2)),
labels=format(unique(time.data$Date2),format = "%b %Y")) +
scale_colour_identity() +
theme_bw()+
theme(strip.background=element_blank(),
strip.text.y=element_blank(),
legend.text=element_blank(),
legend.title=element_blank(),
plot.margin=unit(c(1,5,1,1),"cm"),
legend.position="none")
gt3 <- ggplot_gtable(ggplot_build(plot))
gt3$layout$clip[gt3$layout$name == "panel"] <- "off"
grid.draw(gt3)
To get an idea how much you can strip down your example, this is much closer to minimal:
time.data <-
structure(list(Attribute = structure(c(1L, 1L, 2L, 2L, 3L, 3L,
4L, 4L), .Label = c("Taste 1", "Taste 2", "Use 1", "Use 2"), class = "factor"),
Attribute.Category = structure(c(2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L), .Label = c("Nutritional/Usage", "Taste/Quality"), class = "factor"),
Color = c("#084594", "#084594", "#2171B5", "#2171B5", "#6A51A3",
"#6A51A3", "#807DBA", "#807DBA"), value = c(75L, 78L, 90L,
95L, 43L, 40L, 25L, 31L), Date2 = structure(c(15584, 15706,
15584, 15706, 15584, 15706, 15584, 15706), class = "Date")), .Names = c("Attribute",
"Attribute.Category", "Color", "value", "Date2"), row.names = c(NA,
-8L), class = "data.frame")
label.data <-
structure(list(value = c(78L, 95L, 40L, 31L), Attribute = structure(1:4, .Label = c("Taste 1",
"Taste 2", "Use 1", "Use 2"), class = "factor"), label.color = c("black",
"forestgreen", "red", "forestgreen"), Attribute.Category = structure(c(2L,
2L, 1L, 1L), .Label = c("Nutritional/Usage", "Taste/Quality"), class = "factor"),
Date2 = structure(c(15706, 15706, 15706, 15706), class = "Date")), .Names = c("value",
"Attribute", "label.color", "Attribute.Category", "Date2"), row.names = c(NA,
-4L), class = "data.frame")
ggplot() +
geom_line(data = time.data,
aes(x=Date2, y=value, group=Attribute, colour=Color)) +
geom_text(data = label.data,
aes(x=Date2, y=value, label=Attribute, colour=label.color),
hjust = 1) +
facet_grid(Attribute.Category~.) +
scale_colour_identity()
The theme stuff (and the making the labels visible outside the plot) isn't relevant to the question, nor is the x-axis conversions from Date to numeric to handle having Inf. I also trimmed the data to just the needed columns, and reduced categorical variable to only two categories.
I have the following barchart to which I want to add error bars.
library(lattice)
barchart(Change~fTreat,groups=Process,change,
auto.key=list(points=FALSE,rectangles=TRUE),
panel=function(x, y,...){
panel.barchart(x,y,origin = 0,...);
panel.abline(h=0,col="black",...);
}
)
I have tried using the panel.errbars from the memisc package which works great for xyplots, but when I add it to my code it does not respect the groups.
library(memisc)
barchart(cbind(Change,lower,upper)~fTreat,groups=Process,change,
ylab="Pocertage change",
ylim=-115:50,
scales=list(alternating=FALSE,
tick.number=7,
tck=c(-1,0)),
panel=function(x, y,groups,...){
panel.barchart(x,y=change$Change,groups=change$Process,origin = 0,...);
panel.abline(h=0,col="black",...);
panel.errbars(x,y,make.grid="none",ewidth=0.2,type="n",...)
}
)
Any ideas of how to add error bars to my plot either using the panel.errbars or any other function?
The data:
structure(list(Treat = structure(c(3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L), .Label = c("12-380", "12-750", "8-380", "8-750"), class = "factor"),
Process = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("Resp",
"Cal"), class = c("ordered", "factor")), Change = c(-33.05,
-34.74, 20.94, 18.06, 6.85, -28.57, -8.1, -78.72), upper = c(-13.22896628,
-28.61149669, 31.29930461, 27.30173776, 39.73271282, 9.458372948,
13.11035572, -47.03745704), lower = c(-52.86120694, -40.87446411,
10.57421563, 8.822042178, -26.03144161, -66.60447035, -29.30563327,
-110.3973761), fTreat = structure(c(1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L), .Label = c("8-380", "8-750", "12-380", "12-750"), class = c("ordered",
"factor"))), .Names = c("Treat", "Process", "Change", "upper",
"lower", "fTreat"), row.names = c(NA, -8L), class = "data.frame")
Cheers
Here is another answer I was given using lattice.
prepanel=function(y, stderr, subscripts=subscripts, ...){
uy <- as.numeric(y+stderr[subscripts])
ly <- as.numeric(y-stderr[subscripts])
list(ylim=range(y,uy,ly, finite=TRUE))
}
panel.err=function(x, y, subscripts, groups, stderr, box.ratio, ...){
d <- 1/(nlevels(groups)+nlevels(groups)/box.ratio)
g <- (as.numeric(groups[subscripts])-1); g <- (g-median(g))*d
panel.arrows(as.numeric(x)+g,y-stderr[subscripts], as.numeric(x)+g, y+stderr[subscripts],
code=3,angle=90, length=0.025)
}
barchart(Change~fTreat,groups=Process,change,
stderr=change$stderr,
ylab="Pocertage change",
xlab="Treatment",
ylim=-115:50,
auto.key=list(points=FALSE,rectangles=TRUE,columns=2),
scales=list(alternating=FALSE,
tick.number=7,
tck=c(-1,0)),
prepanel=prepanel,
panel=function(x, y, subscripts, groups, stderr, box.ratio, ...){
panel.barchart(x, y, subscripts=subscripts,
groups=groups, box.ratio=box.ratio,origin=0, ...)
panel.abline(h=0,col="black",...)
panel.err(x, y, subscripts=subscripts,
groups=groups, box.ratio=box.ratio,stderr=change$stderr)
}
)
A big thank you to Walmes Marques Zeviani for providing the code
Here is the modified data:
change <- structure(list(Treat = structure(c(3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L), .Label = c("12-380", "12-750", "8-380", "8-750"), class = "factor"),
Process = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("Respiration",
"Calcification"), class = c("ordered", "factor")), Change = c(-33L,
-35L, 21L, 18L, 7L, -29L, -8L, -79L), stderr = c(20L, 6L,
10L, 9L, 33L, 38L, 21L, 32L), fTreat = structure(c(1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L), .Label = c("8-380", "8-750", "12-380",
"12-750"), class = c("ordered", "factor"))), .Names = c("Treat",
"Process", "Change", "stderr", "fTreat"), row.names = c(NA, -8L
), class = "data.frame")
This is not what you're asking for, but the plot is rather easy to make with ggplot2 (in a case that this is an option)
dt <- structure(list(Treat = structure(c(3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L), .Label = c("12-380", "12-750", "8-380", "8-750"), class = "factor"),
Process = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("Resp",
"Cal"), class = c("ordered", "factor")), Change = c(-33.05,
-34.74, 20.94, 18.06, 6.85, -28.57, -8.1, -78.72), upper = c(-13.22896628,
-28.61149669, 31.29930461, 27.30173776, 39.73271282, 9.458372948,
13.11035572, -47.03745704), lower = c(-52.86120694, -40.87446411,
10.57421563, 8.822042178, -26.03144161, -66.60447035, -29.30563327,
-110.3973761), fTreat = structure(c(1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L), .Label = c("8-380", "8-750", "12-380", "12-750"), class = c("ordered",
"factor"))), .Names = c("Treat", "Process", "Change", "upper",
"lower", "fTreat"), row.names = c(NA, -8L), class = "data.frame")
a <- ggplot(dt, aes(y = Change, x = Treat, ymax = upper, ymin = lower))
dodge <- position_dodge(width=0.9)
a + geom_bar(aes(fill = Process), position = dodge) +
geom_errorbar(aes(fill = Process), position = dodge, width = 0.2)