I want create 10 b1's for each value of x1 and x2's in xp and yp lists by optimizing res formula below. However my values are somehow not added to b1.created. I get b1.created = 0 when I check after I run the code.How can I make the code work?
y <- matrix(c(1,2,3,4,2,6,7,8,9,10),ncol = 1)
x1 <- matrix(c(2,4,6,5,10,12,14,16,18,20),ncol =1)
x2 <- matrix(c(1,4,9,16,25,25,48,64,81,99),ncol = 1)
x <- cbind(x1,x2)
created.b1 = 0
normal <- function(b0,y,xp,yp,x1,x2){for (i in xp){
res <- sum((y- (b0 + x1[i]*xp[i] + x2[i]*yp[i]))^2)
optobj <- optimize(normal,c(-10,10),y =y ,xp = xp,yp =yp, x1 = x1,x2 = x2)
created.b1[i] = obtobj$minimum[i]
}
}
I think this does what you want, but please cross-check.
created.b1 <- numeric(length = 10)
for (i in 1:10)
{
opt_obj <- optimise(f = function(b0, y, xp, yp, x1, x2) sum((y - (b0 + (x1 * xp) + (x2 * yp))) ^ 2),
interval = c(-10, 10),
y = y,
xp = xp[i],
yp = yp[i],
x1 = x1,
x2 = x2)
created.b1[i] <- opt_obj$minimum
}
created.b1
I am trying to plot a heatmap with the library pheatmap in R.
I think that by default the branch length is proportional to the "dissimilarity" of the clusters that got merged at this step. I would like to chance that, so it is a fixed value because for my purpose it looks very weird!
If anyone has an idea how I can fix this, I would be very happy.
Here is a sample code
library(pheatmap)
test = matrix(rnorm(6000), 100, 60)
pheatmap(test)
Cheers!
Here is an example of two column groups with high dissimilarity:
library(pheatmap)
test = cbind(matrix(rnorm(3000), 100, 30),
matrix(rnorm(3000)+10, 100, 30))
pheatmap(test)
TIn pheatmapthe dendrogram is plotted by the pheatmap:::draw_dendrogram function
and branch lengths are stored in the h object.
Below I define equal-length branches adding the command
hc$height <- cumsum(rep(1/length(hc$height), length(hc$height)))
as follows:
draw_dendrogram <- function(hc, gaps, horizontal = T) {
# Define equal-length branches
hc$height <- cumsum(rep(1/length(hc$height), length(hc$height)))
h = hc$height/max(hc$height)/1.05
m = hc$merge
o = hc$order
n = length(o)
m[m > 0] = n + m[m > 0]
m[m < 0] = abs(m[m < 0])
dist = matrix(0, nrow = 2 * n - 1, ncol = 2, dimnames = list(NULL,
c("x", "y")))
dist[1:n, 1] = 1/n/2 + (1/n) * (match(1:n, o) - 1)
for (i in 1:nrow(m)) {
dist[n + i, 1] = (dist[m[i, 1], 1] + dist[m[i, 2], 1])/2
dist[n + i, 2] = h[i]
}
draw_connection = function(x1, x2, y1, y2, y) {
res = list(x = c(x1, x1, x2, x2), y = c(y1, y, y, y2))
return(res)
}
x = rep(NA, nrow(m) * 4)
y = rep(NA, nrow(m) * 4)
id = rep(1:nrow(m), rep(4, nrow(m)))
for (i in 1:nrow(m)) {
c = draw_connection(dist[m[i, 1], 1], dist[m[i, 2], 1],
dist[m[i, 1], 2], dist[m[i, 2], 2], h[i])
k = (i - 1) * 4 + 1
x[k:(k + 3)] = c$x
y[k:(k + 3)] = c$y
}
x = pheatmap:::find_coordinates(n, gaps, x * n)$coord
y = unit(y, "npc")
if (!horizontal) {
a = x
x = unit(1, "npc") - y
y = unit(1, "npc") - a
}
res = polylineGrob(x = x, y = y, id = id)
return(res)
}
# Replace the non-exported function `draw_dendrogram` in `pheatmap`:
assignInNamespace(x="draw_dendrogram", value=draw_dendrogram, ns="pheatmap")
pheatmap(test)
The result is:
I have the following function:
fx <- function(x) {
if(x >= 0 && x < 3) {
res <- 0.2;
} else if(x >=3 && x < 5) {
res <- 0.05;
} else if(x >= 5 && x < 6) {
res <- 0.15;
} else if(x >= 7 && x < 10) {
res <- 0.05;
} else {
res <- 0;
}
return(res);
}
How can I plot it's CDF function on the interval [0,10]?
Try
fx <- Vectorize(fx)
grid <- 0:10
p <- fx(grid)
cdf <- cumsum(p)
plot(grid, cdf, type = 'p', ylim = c(0, 1), col = 'steelblue',
xlab = 'x', ylab = expression(F(x)), pch = 19, las = 1)
segments(x0 = grid, x1 = grid + 1, y0 = cdf)
segments(x0 = grid + 1, y0 = c(cdf[-1], 1), y1 = cdf, lty = 2)
To add a bit accuracy to #Martin Schmelzer's answer. A cummulative distribution function(CDF)
evaluated at x, is the probability that X will take a value less than
or equal to x
So to get CDF from Probability Density Function(PDF), you need to integrate on PDF:
fx <- Vectorize(fx)
dx <- 0.01
x <- seq(0, 10, by = dx)
plot(x, cumsum(fx(x) * dx), type = "l", ylab = "cummulative probability", main = "My CDF")
Just adding up on the previous answers and using ggplot
# cdf
Fx <- function(x, dx) {
cumsum(fx(x)*dx)
}
fx <- Vectorize(fx)
dx <- 0.01
x <- seq(0, 10, dx)
df <- rbind(data.frame(x, value=fx(x), func='pdf'),
data.frame(x, value=Fx(x, dx), func='cdf'))
library(ggplot2)
ggplot(df, aes(x, value, col=func)) +
geom_point() + geom_line() + ylim(0, 1)
http://www.texample.net/tikz/examples/lindenmayer-systems/
My sample code shown below, I don't know how to colored with hue color.
plot.koch <- function(k,col="blue"){
plot(0,0,xlim=c(0,1), ylim=c(-sqrt(3)/6,sqrt(3)/2), asp = 1,type="n",xlab="", ylab="")
plotkoch <- function(x1,y1,x2,y2,n){
if (n > 1){
plotkoch(x1,y1,(2*x1+x2)/3,(2*y1+y2)/3,n-1);
plotkoch((2*x1+x2)/3,(2*y1+y2)/3,(x1+x2)/2-(y1-y2)*sqrt(3)/6,(y1+y2)/2-(x2-x1) *sqrt(3)/6,n-1);
plotkoch((x1+x2)/2-(y1-y2)*sqrt(3)/6,(y1+y2)/2-(x2-x1)*sqrt(3)/6,(2*x2+x1)/3,(2 *y2+y1)/3,n-1);
plotkoch((2*x2+x1)/3,(2*y2+y1)/3,x2,y2,n-1)
}
else {
x=c(x1,(2*x1+x2)/3,(x1+x2)/2-(y1-y2)*sqrt(3)/6,(2*x2+x1)/3,x2);
y=c(y1,(2*y1+y2)/3,(y1+y2)/2-(x2-x1)*sqrt(3)/6,(2*y2+y1)/3,y2);
polygon(x,y,type="l",col=col)
}
}
plotkoch(0,0,1,0,k)
plotkoch(0.5,sqrt(3)/2,0,0,k)
plotkoch(1,0,0.5,sqrt(3)/2,k)
}
plot.koch(3, col=3)
Here's a method using spatial objects in R, with sp, rgeos and raster packages in the mix.
Slight modifications to the function to return the x,y coordinates to the user (and in the correct order):
koch <- function(k) {
yy <- xx <- numeric(0)
Koch <- function(x1, y1, x2, y2, n) {
if (n > 1){
Koch(x1, y1, (2*x1+x2)/3, (2*y1+y2)/3, n-1);
Koch((2*x1+x2)/3, (2*y1+y2)/3, (x1+x2)/2-(y1-y2)*sqrt(3)/6,
(y1+y2)/2-(x2-x1) *sqrt(3)/6, n-1);
Koch((x1+x2)/2-(y1-y2)*sqrt(3)/6, (y1+y2)/2-(x2-x1)*sqrt(3)/6,
(2*x2+x1)/3, (2 *y2+y1)/3, n-1);
Koch((2*x2+x1)/3, (2*y2+y1)/3, x2, y2, n-1)
}
else {
x <- c(x1, (2*x1+x2)/3, (x1+x2)/2-(y1-y2)*sqrt(3)/6, (2*x2+x1)/3, x2);
xx <<- c(xx, x)
y <- c(y1, (2*y1+y2)/3, (y1+y2)/2-(x2-x1)*sqrt(3)/6, (2*y2+y1)/3, y2);
yy <<- c(yy, y)
}
}
Koch(0, 0, 1, 0, k)
Koch(1, 0, 0.5, sqrt(3)/2, k)
Koch(0.5, sqrt(3)/2, 0, 0, k)
xy <- data.frame(x=xx, y=yy)
rbind(unique(xy), xy[1, ])
}
Create a colour ramp:
colr <- colorRampPalette(hcl(h=seq(0, 360, len=100), c=100))
Use koch function to get vertices:
xy <- koch(4)
Load spatial packages and create SpatialPolygons object from fractal and plot it once to set up the plot area.
library(sp)
library(rgeos)
library(raster)
poly <- SpatialPolygons(list(Polygons(list(Polygon(xy)), 1)))
plot(poly)
Plot a series of segments with desired origin and large enough radius to cover the fractal polygon (here we use radius r <- 1).
r <- 1
mapply(function(theta, col) {
segments(0.5, 0.3, 0.5 + r*cos(theta), 0.3 + r*sin(theta), lwd=3, col=col)
}, seq(0, 360, length=1000)*pi/180, colr(1000))
Create a second polygon of the difference between the plot area and the fractal polygon, and plot this (with col='white') to mask out the unwanted gradient area.
plot(gDifference(as(extent(par('usr')), 'SpatialPolygons'), poly),
col='white', border='white', add=TRUE)
Plot the polygon once more.
plot(poly, add=TRUE)
Here's my attempt at solving your question. Currently it draws the color also outside of the snowflake. If you can figure out if points are inside or outside the snowflake, you should be able to just remove outside points in the df_fill.
Here I'm first creating the data.frame used for plotting the polygon. Then I'm creating the data.frame for the background color. And finally I'm using ggplot2 to plot the data.
# creating relevant data
data.koch <- function(k){
df <- data.frame(x = 0,
y = 0,
grp = 0)
plotkoch <- function(x1, y1, x2, y2, n, data){
if (n==1) {
x=c(x1,(2*x1+x2)/3,(x1+x2)/2-(y1-y2)*sqrt(3)/6,(2*x2+x1)/3,x2)
y=c(y1,(2*y1+y2)/3,(y1+y2)/2-(x2-x1)*sqrt(3)/6,(2*y2+y1)/3,y2)
df <- rbind(data, data.frame(x, y, grp=max(data$grp)+1))
}
if (n > 1){
df <- plotkoch(x1,y1,(2*x1+x2)/3,(2*y1+y2)/3,n-1, data = data)
df <- plotkoch((2*x1+x2)/3,(2*y1+y2)/3,(x1+x2)/2-(y1-y2)*sqrt(3)/6,(y1+y2)/2-(x2-x1) *sqrt(3)/6,n-1, data=df)
df <- plotkoch((x1+x2)/2-(y1-y2)*sqrt(3)/6,(y1+y2)/2-(x2-x1)*sqrt(3)/6,(2*x2+x1)/3,(2 *y2+y1)/3,n-1, data=df)
df <- plotkoch((2*x2+x1)/3,(2*y2+y1)/3,x2,y2,n-1, data=df)
}
return(df)
}
df <- plotkoch(0,0,1,0,k, data = df)
df <- plotkoch(0.5,sqrt(3)/2,0,0,k, data = df)
df <- plotkoch(1,0,0.5,sqrt(3)/2,k, data = df)
return(df)
}
# plotting functon
plot.koch <- function(k){
stopifnot(require(ggplot2))
if (is.data.frame(k)) df <- k
else df <- data.koch(k)
# filling data (CHANGE HERE TO GET ONLY INSIDE POINTS)
l <- 500
df_fill <- expand.grid(x=seq(0, 1, length=l),
y=seq(-sqrt(3)/6, sqrt(3)/2, length=l))
df_fill[, "z"] <- atan2(-df_fill[, "y"] + sqrt(3)/6, df_fill[, "x"] - 0.5) + pi/2
df_fill[df_fill[, "z"] < 0, "z"] <- df_fill[df_fill[, "z"] < 0, "z"] + 2*pi
# plotting
ggplot(df, aes(x, y, group=grp)) +
geom_raster(data = df_fill,
aes(fill=z, group=NULL),
hjust = 0,
vjust = 0,
linetype='blank') +
geom_path(data=df, size=1) +
scale_fill_gradientn(colours = rainbow(30), guide = 'none') +
scale_x_continuous(name = '', limits = c(0, 1), expand=c(0, 0)) +
scale_y_continuous(name = '', limits = c(-sqrt(3)/6,sqrt(3)/2), expand=c(0, 0)) +
coord_fixed() +
theme_bw() +
theme(axis.line = element_blank(),
panel.grid = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank())
}
#
p <- plot.koch(4)
print(p)
I would do it like this:
for any drawed pixel obtain its position x,y
compute the angle=atan2(y-y0,x-x0)
where x0,y0 is the koch's snowflake mid position
compute the color based on angle
if you use HSV then hue=angle and compute the target color value (I assume RGB). If you want the visible spectra colors you can try mine:
RGB values of visible spectrum
Just convert the angle range angle=<0,2*Pi> [rad] to wavelength l=<400,700> [nm] so:
l = 400.0 + (700.0-400.0)*angle/(2.0*M_PI)
render the pixel
[Notes]
not using R nor Matlab so you need to code it yourself. The angle may need some shifting to match your coordinate system for example:
const angle0=???; // some shift constant [rad]
angle+=angle0; // or angle=angle0-angle; if the direction is oposite
if (angle>=2.0*M_PI) angle-=2.0*M_PI;
if (angle< 0.0) angle+=2.0*M_PI;
If you drawing this as polygon then you need to compute color per vertex not per pixel but then you can get to problems because this is not convex polygon. So how to ensure the mid point color ??? I am afraid you will need to use some sort of triangulation because simple triangle fan will fail ...
The only thing that is obvious is to fill the color for whole space and then draw the outline with black color then flood fill all non black pixels from outside with white color
It's my solution with grid package.
##data
koch <- function(k) {
yy <- xx <- numeric(0)
Koch <- function(x1, y1, x2, y2, n) {
if (n > 1) {
Koch(x1, y1, (2 * x1 + x2)/3, (2 * y1 + y2)/3, n - 1)
Koch((2 * x1 + x2)/3, (2 * y1 + y2)/3, (x1 + x2)/2 - (y1 -
y2) * sqrt(3)/6, (y1 + y2)/2 - (x2 - x1) * sqrt(3)/6,
n - 1)
Koch((x1 + x2)/2 - (y1 - y2) * sqrt(3)/6, (y1 + y2)/2 -
(x2 - x1) * sqrt(3)/6, (2 * x2 + x1)/3, (2 * y2 + y1)/3,
n - 1)
Koch((2 * x2 + x1)/3, (2 * y2 + y1)/3, x2, y2, n - 1)
} else {
x <- c(x1, (2 * x1 + x2)/3, (x1 + x2)/2 - (y1 - y2) * sqrt(3)/6,
(2 * x2 + x1)/3, x2)
xx <<- c(xx, x)
y <- c(y1, (2 * y1 + y2)/3, (y1 + y2)/2 - (x2 - x1) * sqrt(3)/6,
(2 * y2 + y1)/3, y2)
yy <<- c(yy, y)
}
}
Koch(0, 0, 1, 0, k)
Koch(1, 0, 0.5, sqrt(3)/2, k)
Koch(0.5, sqrt(3)/2, 0, 0, k)
xy <- data.frame(x = (xx - min(xx))/(max(xx) - min(xx)), y = (yy -
min(yy))/(max(yy) - min(yy)))
rbind(unique(xy), xy[1, ])
}
xy <- koch(5)
##Plot
library(grid)
grid.newpage()
pushViewport(dataViewport(xy$x, xy$y), plotViewport(c(1, 1, 1, 1)))
for (i in 1:nrow(xy)) {
grid.path(x = c(xy[i, 1], xy[i + 1, 1], mean(xy$x)),
y = c(xy[i, 2], xy[i + 1, 2], mean(xy$y)),
gp = gpar(col = rainbow(nrow(xy))[i],
fill = rainbow(nrow(xy))[i]))
}
For very heavy-tailed data of both positive and negative sign, I sometimes like to see all the data on a plot without hiding structure in the unit interval.
When plotting with Matplotlib in Python, I can achieve this by selecting a symlog scale, which uses a logarithmic transform outside some interval, and linear plotting inside it.
Previously in R I have constructed similar behavior by transforming the data with an arcsinh on a one-off basis. However, tick labels and the like are very tricky to do right (see below).
Now, I am faced with a bunch of data where the subsetting in lattice or ggplot would be highly convenient. I don't want to use Matplotlib because of the subsetting, but I sure am missing symlog!
Edit:
I see that ggplot uses a package called scales, which solves a lot of this problem (if it works). Automatically choosing tick mark and label placing still looks pretty hard to do nicely though. Some combination of log_breaks and cbreaks perhaps?
Edit 2:
The following code is not too bad
sinh.scaled <- function(x,scale=1){ sinh(x)*scale }
asinh.scaled <- function(x,scale=1) { asinh(x/scale) }
asinh_breaks <- function (n = 5, scale = 1, base=10)
{
function(x) {
log_breaks.callable <- log_breaks(n=n,base=base)
rng <- rng <- range(x, na.rm = TRUE)
minx <- floor(rng[1])
maxx <- ceiling(rng[2])
if (maxx == minx)
return(sinh.scaled(minx, scale=scale))
big.vals <- 0
if (minx < (-scale)) {
big.vals = big.vals + 1
}
if (maxx>scale) {
big.vals = big.vals + 1
}
brk <- c()
if (minx < (-scale)) {
rbrk <- log_breaks.callable( c(-min(maxx,-scale), -minx ) )
rbrk <- -rev(rbrk)
brk <- c(brk,rbrk)
}
if ( !(minx>scale | maxx<(-scale)) ) {
rng <- c(max(minx,-scale), min(maxx,scale))
minc <- floor(rng[1])
maxc <- ceiling(rng[2])
by <- floor((maxc - minc)/(n-big.vals)) + 1
cb <- seq(minc, maxc, by = by)
brk <- c(brk,cb)
}
if (maxx>scale) {
brk <- c(brk,log_breaks.callable( c(max(minx,scale), maxx )))
}
brk
}
}
asinh_trans <- function(scale = 1) {
trans <- function(x) asinh.scaled(x, scale)
inv <- function(x) sinh.scaled(x, scale)
trans_new(paste0("asinh-", format(scale)), trans, inv,
asinh_breaks(scale = scale),
domain = c(-Inf, Inf))
}
A solution based on the package scales and inspired by Brian Diggs' post mentioned by #Dennis:
symlog_trans <- function(base = 10, thr = 1, scale = 1){
trans <- function(x)
ifelse(abs(x) < thr, x, sign(x) *
(thr + scale * suppressWarnings(log(sign(x) * x / thr, base))))
inv <- function(x)
ifelse(abs(x) < thr, x, sign(x) *
base^((sign(x) * x - thr) / scale) * thr)
breaks <- function(x){
sgn <- sign(x[which.max(abs(x))])
if(all(abs(x) < thr))
pretty_breaks()(x)
else if(prod(x) >= 0){
if(min(abs(x)) < thr)
sgn * unique(c(pretty_breaks()(c(min(abs(x)), thr)),
log_breaks(base)(c(max(abs(x)), thr))))
else
sgn * log_breaks(base)(sgn * x)
} else {
if(min(abs(x)) < thr)
unique(c(sgn * log_breaks()(c(max(abs(x)), thr)),
pretty_breaks()(c(sgn * thr, x[which.min(abs(x))]))))
else
unique(c(-log_breaks(base)(c(thr, -x[1])),
pretty_breaks()(c(-thr, thr)),
log_breaks(base)(c(thr, x[2]))))
}
}
trans_new(paste("symlog", thr, base, scale, sep = "-"), trans, inv, breaks)
}
I am not sure whether the impact of a parameter scale is the same as in Python, but here are a couple of comparisons (see Python version here):
data <- data.frame(x = seq(-50, 50, 0.01), y = seq(0, 100, 0.01))
data$y2 <- sin(data$x / 3)
# symlogx
ggplot(data, aes(x, y)) + geom_line() + theme_bw() +
scale_x_continuous(trans = symlog_trans())
# symlogy
ggplot(data, aes(y, x)) + geom_line() + theme_bw()
scale_y_continuous(trans="symlog")
# symlog both, threshold = 0.015 for y
# not too pretty because of too many breaks in short interval
ggplot(data, aes(x, y2)) + geom_line() + theme_bw()
scale_y_continuous(trans=symlog_trans(thr = 0.015)) +
scale_x_continuous(trans = "symlog")
# Again symlog both, threshold = 0.15 for y
ggplot(data, aes(x, y2)) + geom_line() + theme_bw()
scale_y_continuous(trans=symlog_trans(thr = 0.15)) +
scale_x_continuous(trans = "symlog")