change color data points plotLearnerPrediction (MLR package) - r

I have produced some nice plots with the plotLearnerPrediction function of the MLR package. I was able to make some adjustments to the returned ggplot (see my code below). But I am not sure how to make the last adjustment. Namely, I want to change the coloring of the data points based on labels (groups in example plot).
My last plot (with black data points)
Another produced plot (overlapping data points)
This is the last version of my code (normally part of a for loop):
plot <- plotLearnerPrediction(learner = learner_name, task = tasks[[i]], cv = 0,
pointsize = 1.5, gridsize = 500) +
ggtitle(trimws(sprintf("Predictions %s %s", meta$name[i], meta$nr[i])),
subtitle = sprintf("DR = %s, ML = %s, CV = LOO, ACC = %.2f", meta$type[i],
toupper(strsplit(learner_name, "classif.")[[1]][2]), acc[[i]])) +
xlab(sprintf("%s 1", lab)) +
ylab(sprintf("%s 2", lab)) +
scale_fill_manual(values = colors) +
theme(plot.title = element_text(size = 18, face = "bold"),
plot.subtitle = element_text(size = 12, face = "bold", colour = "grey40"),
axis.text.x = element_text(vjust = 0.5, hjust = 1),
axis.text = element_text(size = 14, face = "bold"),
axis.title.x = element_text(vjust = 0.5),
axis.title = element_text(size = 16, face = "bold"),
#panel.grid.minor = element_line(colour = "grey80"),
axis.line.x = element_line(color = "black", size = 1),
axis.line.y = element_line(color = "black", size = 1),
panel.grid.major = element_line(colour = "grey80"),
panel.background = element_rect(fill = "white"),
legend.justification = "top",
legend.margin = margin(l = 0),
legend.title = element_blank(),
legend.text = element_text(size = 14))
Below is a part of the source code of the plotLearnerPrediction function. I want to overrule geom_point(colour = "black"). Adding simply geom_point(colour = "pink") to my code will not color data points, but the whole plot. Is there a solution to overrule that code with a vector of colors? Possibly a change in the aes() is also needed to change colors based on groups.
else if (taskdim == 2L) {
p = ggplot(mapping = aes_string(x = x1n, y = x2n))
p = p + geom_tile(data = grid, mapping = aes_string(fill = target))
p = p + scale_fill_gradient2(low = bg.cols[1L], mid = bg.cols[2L],
high = bg.cols[3L], space = "Lab")
p = p + geom_point(data = data, mapping = aes_string(x = x1n,
y = x2n, colour = target), size = pointsize)
p = p + geom_point(data = data, mapping = aes_string(x = x1n,
y = x2n), size = pointsize, colour = "black",
shape = 1)
p = p + scale_colour_gradient2(low = bg.cols[1L],
mid = bg.cols[2L], high = bg.cols[3L], space = "Lab")
p = p + guides(colour = FALSE)
}

You can always hack into gg objects. The following works for ggplot2 2.2.1 and adds a manual alpha value to all geom_point layers.
library(mlr)
library(ggplot2)
g = plotLearnerPrediction(makeLearner("classif.qda"), iris.task)
ids.geom.point = which(sapply(g$layers, function(z) class(z$geom)[[1]]) == "GeomPoint")
for(i in ids.geom.point) {
g$layers[[i]]$aes_params$alpha = 0.1
}
g

The plotLearnerPrediction() function returns the ggplot plot object, which allows for some level of customization without having to modify the source code. In your particular case, you can use scale_fill_manual() to set custom fill colors:
library(mlr)
g = plotLearnerPrediction(makeLearner("classif.randomForest"), iris.task)
g + scale_fill_manual(values = c("yellow", "orange", "red"))

Related

Change tile size for ggplot2 heatmaps

I'm making a heatmap in R using ggplot2 and I want to change the size of my tiles because the text in my heatmap doesn't fit in the cells. Below is my data and attempted plots.
library("ggplot2")
s = sprintf("dataset number %s", 1:9)
vars = sprintf("many many words here, here and here %s", 1:6)
data = data.frame(s = rep(s, 6), stringsAsFactors = FALSE)
data$variable = rep(vars, rep.int(9, 6))
data$variable = as.factor(data$variable)
data$value = rep.int(-1000, 54)
data$font = "plain"
for(v in (unique(data$s))) {
ids = which(data$s %in% "dataset number 1")
vals = data[ids, ]
row = rownames(vals[vals$value %in% max(vals$value, na.rm = TRUE), ])
data[row, ]$font = "bold"
}
I'm making a heatmap like this
title = "Heatmap"
cbbPalette = list(grey = "#999999", black = "#000000", orange = "#E69F00", sky = "#56B4E9", green = "#009E73", yellow = "#F0E442", blue = "#0072B2", darko = "#D55E00",
pink = "#CC79A7")
pdf(sprintf("./heatmap.pdf"))
heatmap = ggplot(data = data, aes(x = variable, y = s, fill = value)) +
geom_tile(color = "black") +
scale_fill_gradient2(low = cbbPalette$pink, high = cbbPalette$green, mid = cbbPalette$grey,
midpoint = 0, limit = c(-100,100), space = "Lab",
name=title) +
scale_x_discrete(limits = levels(data$variable)) +
geom_vline(xintercept = 6 - 0.5, color = "white", size = 1) +
geom_vline(xintercept = 2 + 0.5, color = "white", size = 1) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 16, hjust = 1), legend.title = element_text(size = 18),
axis.text.y = element_text(size = 16)) +
coord_fixed()
#add numbers to cells
heatmap = heatmap + geom_text(aes(x = variable, y = s, label = value, fontface = font), color = cbbPalette$black, size = 3) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.justification = c(0.5, 0),
legend.direction = "horizontal",
legend.position = "top") +
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))
# Print the heatmap
print(heatmap)
dev.off()
And I get the following result -- heatmap
Other SO posts suggested to change width and height within geom_tile, but I get the following result:
heatmap = ggplot(data = data, aes(x = variable, y = s, fill = value)) +
geom_tile(color = "black", aes(width = 2L)) + ...
wrong_heatmap
I also tried changing height in geom_tile as well as coord_fixed(ratio = 1L) but without any success.
Also, I know that I can simply change the size of the text and numbers will fit, but I need to keep the font size the same. I can also shorten the labels but assume this is not an option.
I would appreciate any help with this.

R : object in custom function is not found for specific usage

I face a problem with R (and its well known rigor in dealing with objects...) : I wrote a custom function to plot dotplots from ggplots and add a custom p value with brackets. The problem is not the plot (it works well...) but dealing with objects and functions : I cannot use an object created inside the function. Some code to explain :
pval.label <- function(p)
{
if (p < 0.001) { return("p<0.001") }
else { return(paste0("p = ", round(p, digits = 2))) }
}
dot_plot <- function(data, var, endpoint, title, ylab)
{
endpoint = deparse(substitute(endpoint))
var = deparse(substitute(var))
df = data.frame(var = data[[var]], endpoint = data[[endpoint]])
pvalue = compare_means(data = df, formula = var ~ endpoint)
pvalue <- pvalue %>% mutate(y.position = 120*(max(df$var)/100))
plot =
ggplot(df, aes(x=endpoint, y=var)) +
labs(title = title, y = ylab, x="") +
geom_dotplot(binaxis='y', stackdir='center', stackratio=1.7, binwidth = .8, show.legend = F,
aes(fill = endpoint, color = endpoint)) +
stat_summary(fun=median, geom="crossbar", lwd=.5, width=.7, col="black", show.legend=F) +
scale_fill_manual(values=c("#2d419b", "#ee2025")) +
scale_color_manual(values=c("#2d419b", "#ee2025")) +
theme_classic() +
theme(text = element_text(family = "sans", face = "bold"),
plot.title = element_text(size = 14, hjust = .5, lineheight = 1.5),
axis.text = element_text(size = 12, face = "bold", colour = "black"),
axis.title = element_text(size = 14, face = "bold", colour = "black"),
axis.line = element_line(size = 1),
axis.ticks = element_line(size = 1), axis.ticks.length = unit(5, "pt")) +
stat_pvalue_manual(pvalue, label = "{pval.label(pvalue$p)}", bracket.size = 1, size = 4)
plot
}
The problem is with this line :
stat_pvalue_manual(pvalue, label = "{pval.label(pvalue$p)}", bracket.size = 1, size = 4)
It says the object "pvalue" is not found. But, if I create this object outside the function and have it in R environment, it works well !!
How to explain that ? And how to deal with it ?
Thanks,
Olivier

ggMarginal ignores choord_cartesian. How to change marginal scales?

I'm trying to plot a 2D density plot with ggplot, with added marginal histograms. Problem is that the polygon rendering is stupid and needs to be given extra padding to render values outside your axis limits (e.g. in this case I set limits between 0 and 1, because values outside this range have no physical meaning). I still want the density estimate though, because often it's much cleaner than a blocky 2D heatmap.
Is there a way around this problem, besides scrapping ggMarginal entirely and spending another 50 lines of code trying to align histograms?
Unsightly lines:
Now rendering works, but ggMarginal ignores choord_cartesian(), which demolishes the plot:
Data here:
http://pasted.co/b581605a
dataset <- read.csv("~/Desktop/dataset.csv")
library(ggplot2)
library(ggthemes)
library(ggExtra)
plot_center <- ggplot(data = dataset, aes(x = E,
y = S)) +
stat_density2d(aes(fill=..level..),
bins= 8,
geom="polygon",
col = "black",
alpha = 0.5) +
scale_fill_continuous(low = "yellow",
high = "red") +
scale_x_continuous(limits = c(-1,2)) + # Render padding for polygon
scale_y_continuous(limits = c(-1,2)) + #
coord_cartesian(ylim = c(0, 1),
xlim = c(0, 1)) +
theme_tufte(base_size = 15, base_family = "Roboto") +
theme(axis.text = element_text(color = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
legend.text = element_text(size = 12, family = "Roboto"),
legend.title = element_blank(),
legend.position = "none")
ggMarginal(plot_center,
type = "histogram",
col = "black",
fill = "orange",
margins = "both")
You can solve this problem by using xlim() and ylim() instead of coord_cartesian.
dataset <- read.csv("~/Desktop/dataset.csv")
library(ggplot2)
library(ggthemes)
library(ggExtra)
plot_center <- ggplot(data = dataset, aes(x = E,
y = S)) +
stat_density2d(aes(fill=..level..),
bins= 8,
geom="polygon",
col = "black",
alpha = 0.5) +
scale_fill_continuous(low = "yellow",
high = "red") +
scale_x_continuous(limits = c(-1,2)) + # Render padding for polygon
scale_y_continuous(limits = c(-1,2)) + #
xlim(c(0,1)) +
ylim(c(0,1)) +
theme_tufte(base_size = 15, base_family = "Roboto") +
theme(axis.text = element_text(color = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
legend.text = element_text(size = 12, family = "Roboto"),
legend.title = element_blank(),
legend.position = "none")
ggMarginal(plot_center,
type = "histogram",
col = "black",
fill = "orange",
margins = "both")

How to highlight certain portion of an image?

I am trying to process a tissue image in R and I have been able to highlight certain cells with black color and rest background is white in color. My question is very similar to this (being a newbie I am unable to post picture)
How to circle/highlight/rectangle certain regions of an image?
Just that I need to know how will this be possible in R. I have tried many functions and searched for many techniques but no result yet. Here is the code and the original image
img<-readJPEG("img.jpg")
img<-array(img,c(480,640,3))
imgDm <- dim(img)
imgRGB <- data.frame(
x = rep(1:imgDm[2], each = imgDm[1]),
y = rep(imgDm[1]:1, imgDm[2]),
R = as.vector(img[,,1]),
G = as.vector(img[,,2]),
B = as.vector(img[,,3])
)
library(ggplot2)
plotTheme <- function() {
theme(
panel.background = element_rect(
size = 3,
colour = "black",
fill = "white"),
axis.ticks = element_line(
size = 2),
panel.grid.major = element_line(
colour = "gray80",
linetype = "dotted"),
panel.grid.minor = element_line(
colour = "gray90",
linetype = "dashed"),
axis.title.x = element_text(
size = rel(1.2),
face = "bold"),
axis.title.y = element_text(
size = rel(1.2),
face = "bold"),
plot.title = element_text(
size = 20,
face = "bold",
vjust = 1.5)
)
}
ggplot(data = imgRGB, aes(x = x, y = y)) + geom_point(colour = rgb(imgRGB[c("R", "G", "B")])) +labs(title = "Original Image: Tissue") +xlab("x") +ylab("y") +plotTheme()
kClusters <- 3
kMeans <- kmeans(imgRGB[, c("R", "G", "B")], centers = kClusters)
kColours <- rgb(kMeans$centers[kMeans$cluster,])
kColours[kColours=="#8B8B8B"]<-"#1874CD"
kColours[kColours=="#565656"]<-"#FFFFFF"
kColours[kColours=="#CCCCCC"]<-"#FFFFFF"
ggplot(data = imgRGB, aes(x = x, y = y)) + geom_point(colour = kColours) +labs(title = paste("k-Means Clustering of", kClusters, "Colours")) +xlab("x") +ylab("y") + plotTheme()
Image:
This is the image I have processed until now:

how to get and modify size legend in ggplot2

I am having some trouble displaying the size legend in my plot and changing the name of my size legend.
My data is corp already has a size column which is either of the values 5, 10, 20
I am using ggplot2 I already have a legend for the color
I want to add one for the size and manually change the size labels..
How do I increase the the font of the legend ? It's super tiny (FIN, IND UTIL)
also the 15 for the size shouldnt be there i want to just omit it and display both legends side by side.
p <- ggplot(corp, aes(x=annRisk, y=annRet, color = corp$subsector1, face = "bold"))
p<- p + geom_point(aes(size = corp$Colsize), alpha = 0.55)
p<-p + scale_size(range = c(8, 20))
p<-p + scale_colour_manual("", values = c("UTIL" = "#fdcc8b", "IND" = "#fc8d59", "FIN" = "#d7301f",
"ABS" = "#74a9cf", "CMBS" = "#0570b0", "LA" = "#8c96c6", "SOV"= "#88419d", "SUPRA" = "#b3cde3"))
p<-p+labs(title = "SOME TITLE")
print(p)
p<-p+theme(plot.title = element_text(face = "bold", size = 20))
p<-p+theme(axis.title.x = element_text(size = 20), axis.text.x = element_text(size = 13))
p<-p+theme(axis.title.y = element_text(size = 20), axis.text.y = element_text(size = 13))
p<-p+geom_text(aes(label=ifelse(Colsize>=10,subsector2,"")), size=5,color = "black", face = "bold", hjust=-0.1, vjust = 0.1)
p<-p+scale_x_continuous(labels = percent, name = "Annualized Risk", limits = c(0.05, 0.09))
p<-p+scale_y_continuous(labels = percent, name = "Annualized Return", limits = c(0.04, 0.08))
p<-p+ theme(legend.position = "bottom")
print(p)
Although I can't use your data yet, you can try adding the following code:
p <- p + theme(legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size=14),
legend.box = "horizontal")
p <- p + scale_size_manual(values=c(5,10,20), labels = c("5","10","20"))

Resources