I have a 20 points (X1, Y1,…. Xn, Yn) on a pyramid and a random base point (Xbase, Ybase). I wish to calculate the triangle area between (Xi, Yi; Xi+1, Yi+1; Xbase, Ybase). Therefore, I did a loop that calculate the area but I can not store the area result area in a the data.frame (myDF). Furthermore, is there another elegant way to calculate the area?
Script:
library(ggplot2)
myDF <- data.frame(area=double())
nElem <- 100
xData <- as.data.frame(seq(1,nElem,5))
yData1 <- seq(5,nElem/2,5)
yData2 <- rev(yData1-4)
yData<- as.data.frame((c(yData1, yData2)))
xyDATA<- cbind(xData,yData)
colnames(xyDATA) <- c("xCoord","yCoord")
Xbase <-runif(1, 90, 91)
Ybase <-runif(1, 1.0, 1.5)
for(i in 1:19)
{
x1 <- Xbase
y1 <- Ybase
x2 <- xyDATA[i,1]
y2 <- xyDATA[i,2]
x3 <- xyDATA[i+1,1]
y3 <- xyDATA[i+1,2]
s <- 0.5*sqrt((x2*x3-x3*y2)^2+(x3*y1-x1*y3)^2+(x1*y2-x2*y1)^2)
myDF[i] <-s
}
P1 <- ggplot(xyDATA) + geom_point(aes(x = xCoord, y = yCoord))
P2 <- P1 + geom_point(aes(x = x1, y = y1),colour="red",size=4)
P2
Thanks a lot.
As written you are assigning the value of s to an entire column in the dataframe. You probably want to specify an area column and then assign into a row of that col.
# before the loop, create the column:
DF['area'] <- NA
# Inside the loop
....
myDF[i, "area"] <-s
Here is a solution using the dplyr package:
nElem <- 100
xData <- as.data.frame(seq(1,nElem,5))
yData1 <- seq(5,nElem/2,5)
yData2 <- rev(yData1-4)
yData<- as.data.frame((c(yData1, yData2)))
xyDATA<- cbind(xData,yData)
colnames(xyDATA) <- c("xCoord","yCoord")
Xbase <-runif(1, 90, 91)
Ybase <-runif(1, 1.0, 1.5)
library(dplyr)
myDF <- xyDATA %>%
mutate("s" = 0.5*sqrt(
(xCoord*lead(xCoord)-lead(xCoord)*yCoord)^2+
(lead(xCoord)*Ybase-Xbase*lead(yCoord))^2+
(Xbase*yCoord-xCoord*Ybase)^2
))
head(myDF)
xCoord yCoord s
1 1 5 502.2731
2 6 10 807.6995
3 11 15 1118.5987
4 16 20 1431.4092
5 21 25 1745.1034
6 26 30 2059.2776
Related
I'm trying to use ggplot to make a contour plot of some weather data. I'm trying to graph the contents of the following dataframe:
dt
-18.25 -17.75 -17.25 -16.75 -16.25
-67.25 -1.042116 -0.2610118 3.527539 1.698116 5.8457116
-66.75 10.888635 10.7380880 5.282561 10.558529 12.0000000
-66.25 11.793157 -0.1322045 12.000000 12.000000 12.0000000
-65.75 9.407542 11.4077278 12.000000 12.000000 -0.6859424
-65.25 6.355656 9.4592526 12.000000 -2.770040 -4.0922426
-64.75 3.103860 12.0000000 3.928329 -5.728103 -5.2666501
-64.25 6.720593 10.5359569 -2.267352 -7.223494 -6.5408307
Here, column names are the latitude and the rownames are the longitude.
> lon
[1] -67.75 -67.25 -66.75 -66.25 -65.75 -65.25 -64.75 -64.25 -63.75
> lat
[1] -18.75 -18.25 -17.75 -17.25 -16.75 -16.25 -15.75
And the values in the dataframe are the ones I want to contour in eaxh coordinate.
In order to graph this, I'm using the following function:
ggplot(dt) +
geom_density_2d_filled(aes(x=rownames(dt),y=names(dt)))
But whenever I try to do yhis, I get the following error:
Error in check_aesthetics():
! Aesthetics must be either length 1 or the same as the data (7): y
I would really appreciate any insights to solve this problem!
You have vectors with different length. Therefore bring them to the same length and plot:
Update with the original data:
library(tidyverse)
lon <- c(-67.75, -67.25, -66.75, -66.25, -65.75, -65.25, -64.7, 5 -64.25, -63.75)
lat <- c(-18.75, -18.25, -17.75, -17.25, -16.75, -16.25, -15.75)
x <- lon
y <- lat
max.len = max(length(x), length(y))
x = c(x, rep(NA, max.len - length(x)))
y = c(y, rep(NA, max.len - length(y)))
tibble(x,y) %>%
ggplot(aes(x, y)) +
geom_density_2d()
First answer:
library(tidyverse)
x <- colnames(dt)
y <- rownames(dt)
max.len = max(length(x), length(y))
x = c(x, rep(NA, max.len - length(x)))
y = c(y, rep(NA, max.len - length(y)))
tibble(x,y) %>%
mutate(x = parse_number(x)) %>%
mutate(y = as.numeric(y)) %>%
ggplot(aes(x, y)) +
geom_density_2d()
data:
x <- c("18.25", "17.75", "17.25", "16.75", "16.25")
y <- c("-67.25", "-66.75", "-66.25", "-65.75", "-65.25", "-64.75", "-64.25")
I am looking to generate a ternary plot with binned polygons (either triangle or hex, preferably in a ggplot framework) where the color of the polygon is a binned mean or median of selected values.
This script gets very close, but triangle cell color is representative of a number of observations, rather than a mean value of observations contained within the triangle cell.
So rather than soley providing X,Y, and Z; I would provide a fourth fill/value variable is provided from which binned means or medians are calculated and represented as a color on a gradient.
Akin to the below image, though in a ternary framework with an additional axis.
Image of stat_summary_hex() plot with color as binned mean value
I appreciate the help. Thank you.
Dummy data to begin with:
#load libraries
devtools::install_git('https://bitbucket.org/nicholasehamilton/ggtern')
library(ggtern)
library(ggplot)
# example data
sig <- matrix(c(3,0,0,2),2,2)
data <- data.frame(mvrnorm(n=10000, rep(2, 2), sig))
data$X1 <- data$X1/max(data$X1)
data$X2 <- data$X2/max(data$X2)
data$X1[which(data$X1<0)] <- runif(length(data$X1[which(data$X1<0)]))
data$X2[which(data$X2<0)] <- runif(length(data$X2[which(data$X2<0)]))
data$X3 <- with(data, 1-X1-X2)
data <- data[data$X3 >= 0,]
data$X4 <- rnorm(dim(data)[1])
data <- data.frame(X = data$X1, Y = data$X2, Z = data$X3, fill_variable = data$X4)
str(data)
# simple ternary plot where color of point is the fill variable value
ggtern(data,aes(X,Y,Z, color = fill_variable))+geom_point()
# 2D example, not a ternary though. Keep in mind in geom_hex Z is the fill, not the additional axis like ggtern
ggplot(data,aes(X,Y))+stat_summary_hex(aes(z = fill_variable))
This code isn't cleaned up, but it's a good jumping off point. Credit for original goes the OP referenced in the first question.
I made some minor adjustments to the count_bin function to instead of doing bin counts, it does bin medians. Use at your own risk and please point out any bugs. For my implementation this reports 0 for NA bins.
Example:
Function for binned median (pardon the name, just saves time):
count_bin <- function(data, minT, maxT, minR, maxR, minL, maxL) {
ret <- data
ret <- with(ret, ret[minT <= X1 & X1 < maxT,])
ret <- with(ret, ret[minL <= X2 & X2 < maxL,])
ret <- with(ret, ret[minR <= X3 & X3 < maxR,])
if(is.na(median(ret$VAR))) {
ret <- 0
} else {
ret <- median(ret$VAR)
}
ret
}
Modified heatmap function:
heatmap3d <- function(data, inc, logscale=FALSE, text=FALSE, plot_corner=TRUE) {
# When plot_corner is FALSE, corner_cutoff determines where to stop plotting
corner_cutoff = 1
# When plot_corner is FALSE, corner_number toggles display of obervations in the corners
# This only has an effect when text==FALSE
corner_numbers = TRUE
count <- 1
points <- data.frame()
for (z in seq(0,1,inc)) {
x <- 1- z
y <- 0
while (x>0) {
points <- rbind(points, c(count, x, y, z))
x <- round(x - inc, digits=2)
y <- round(y + inc, digits=2)
count <- count + 1
}
points <- rbind(points, c(count, x, y, z))
count <- count + 1
}
colnames(points) = c("IDPoint","T","L","R")
#str(points)
#str(count)
# base <- ggtern(data=points,aes(L,T,R)) +
# theme_bw() + theme_hidetitles() + theme_hidearrows() +
# geom_point(shape=21,size=10,color="blue",fill="white") +
# geom_text(aes(label=IDPoint),color="blue")
# print(base)
polygons <- data.frame()
c <- 1
# Normal triangles
for (p in points$IDPoint) {
if (is.element(p, points$IDPoint[points$T==0])) {
next
} else {
pL <- points$L[points$IDPoint==p]
pT <- points$T[points$IDPoint==p]
pR <- points$R[points$IDPoint==p]
polygons <- rbind(polygons,
c(c,p),
c(c,points$IDPoint[abs(points$L-pL) < inc/2 & abs(points$R-pR-inc) < inc/2]),
c(c,points$IDPoint[abs(points$L-pL-inc) < inc/2 & abs(points$R-pR) < inc/2]))
c <- c + 1
}
}
#str(c)
# Upside down triangles
for (p in points$IDPoint) {
if (!is.element(p, points$IDPoint[points$T==0])) {
if (!is.element(p, points$IDPoint[points$L==0])) {
pL <- points$L[points$IDPoint==p]
pT <- points$T[points$IDPoint==p]
pR <- points$R[points$IDPoint==p]
polygons <- rbind(polygons,
c(c,p),
c(c,points$IDPoint[abs(points$T-pT) < inc/2 & abs(points$R-pR-inc) < inc/2]),
c(c,points$IDPoint[abs(points$L-pL) < inc/2 & abs(points$R-pR-inc) < inc/2]))
c <- c + 1
}
}
}
#str(c)
# IMPORTANT FOR CORRECT ORDERING.
polygons$PointOrder <- 1:nrow(polygons)
colnames(polygons) = c("IDLabel","IDPoint","PointOrder")
df.tr <- merge(polygons,points)
Labs = ddply(df.tr,"IDLabel",function(x){c(c(mean(x$T),mean(x$L),mean(x$R)))})
colnames(Labs) = c("Label","T","L","R")
#str(Labs)
#triangles <- ggtern(data=df.tr,aes(L,T,R)) +
# geom_polygon(aes(group=IDLabel),color="black",alpha=0.25) +
# geom_text(data=Labs,aes(label=Label),size=4,color="black") +
# theme_bw()
# print(triangles)
bins <- ddply(df.tr, .(IDLabel), summarize,
maxT=max(T),
maxL=max(L),
maxR=max(R),
minT=min(T),
minL=min(L),
minR=min(R))
#str(bins)
count <- ddply(bins, .(IDLabel), summarize,
N=count_bin(data, minT, maxT, minR, maxR, minL, maxL)
#N=mean(data)
)
df <- join(df.tr, count, by="IDLabel")
str(count)
Labs = ddply(df,.(IDLabel,N),function(x){c(c(mean(x$T),mean(x$L),mean(x$R)))})
colnames(Labs) = c("Label","N","T","L","R")
if (plot_corner==FALSE){
corner <- ddply(df, .(IDPoint, IDLabel), summarize, maxperc=max(T,L,R))
corner <- corner$IDLabel[corner$maxperc>=corner_cutoff]
df$N[is.element(df$IDLabel, corner)] <- 0
if (text==FALSE & corner_numbers==TRUE) {
Labs$N[!is.element(Labs$Label, corner)] <- ""
text=TRUE
}
}
heat <- ggtern(data=df,aes(L,T,R)) +
geom_polygon(aes(fill=N,group=IDLabel),color="black",alpha=1, size = 0.1,show.legend = F)
if (logscale == TRUE) {
heat <- heat + scale_fill_gradient(name="Observations", trans = "log",
low=palette[2], high=palette[4])
} else {
heat <- heat + scale_fill_distiller(name="Median Value",
palette = "Spectral")
}
heat <<- heat +
Tlab("x") +
Rlab("y") +
Llab("z") +
theme_bw() +
theme(axis.tern.arrowsep=unit(0.02,"npc"), #0.01npc away from ticks ticklength
axis.tern.arrowstart=0.25,axis.tern.arrowfinish=0.75,
axis.tern.text=element_text(size=12),
axis.tern.arrow.text.T=element_text(vjust=-1),validate = F,
axis.tern.arrow.text.R=element_text(vjust=2),
axis.tern.arrow.text.L=element_text(vjust=-1),
#axis.tern.arrow.text=element_text(size=12),
axis.tern.title=element_text(size=15),
axis.tern.text=element_blank(),
axis.tern.arrow.text=element_blank())
if (text==FALSE) {
print(heat)
} else {
print(heat + geom_text(data=Labs,aes(label=N),size=3,color="white"))
}
}
Dummy example:
# dummy example
sig <- matrix(c(3,3,3,3),3,3)
data <- data.frame(mvrnorm(n=10000, rep(2, 2), sig))
data$X1[which(data$X1<0)] <- runif(length(data$X1[which(data$X1<0)]))
data$X2[which(data$X2<0)] <- runif(length(data$X2[which(data$X2<0)]))
data$X3 <- with(data, 1-X1-X2)
data <- data[data$X3 >= 0,]
data$VAR <- rnorm(dim(data)[1])
data <- data.frame(X = data$X1, Y = data$X2, Z = data$X3, fill_variable = data$X4)
str(data)
ggtern(data,aes(X1,
X2,
X3, color = VAR))+geom_point(size = 5)+scale_color_distiller(palette = "Spectral")
heatmap3d(data,.05)
Anyone know of a non-raster method to achieve the following?
require(raster)
d = data.frame(rasterToPoints(raster(volcano)))
head(d)
x y layer
1 0.008196721 0.9942529 100
2 0.024590164 0.9942529 100
3 0.040983607 0.9942529 101
4 0.057377049 0.9942529 101
5 0.073770492 0.9942529 101
6 0.090163934 0.9942529 101
Cheers.
One way would be to use the row and col command:
library(raster)
data(volcano)
df <- data.frame(
x = as.vector(col(volcano)),
y = (yy <- as.vector(row(volcano)))[length(yy):1],
val = as.vector(volcano)
)
raster rescales the range to 0 - 1, if not specified differently, so we would to have to do this too:
## rescale
df$x <- with(df, (x - min(x)) / (max(x) - min(x)))
df$y <- with(df, (y - min(x)) / (max(y) - min(y)))
Finally lets check, that the results are the same:
## Using raster df1 <- data.frame(rasterToPoints(raster(volcano)))
cols <- colorRampPalette(c('white', "blue",'red')) df$col <-
cols(20)[as.numeric(cut(df$val, breaks = 20))] df1$col <-
cols(20)[as.numeric(cut(df1$layer, breaks = 20))]
par(mfrow = c(1, 2)) plot(df[, 1:2], col = df$col, pch = 20, main =
"matrix")
plot(df1[, 1:2], col = df1$col, pch = 20, main = "raster")
Note:
While the results appear the same visually, they are not. The resolution of the raster command is most likely different, and hence there are different nrows for df and df1.
Faster for large matrices:
data.frame(
x = rep(1:ncol(m), each=nrow(m)),
y = rep(nrow(m):1, ncol(m)),
val = as.vector(m)
)
I need to calculate the area of the eclipse (a=6 b=3) with the Montecarlo Method.
Also I have to make a plot (a diagram) of the result with the inside points red and the out ones black. At the end I have to compare the "Montecarlo result" with the "Regular Result"
The equation is (x^2)/36+(y^2)/9=1
The method must have 100000 replies.
This is what I do. Obviously it doesn't work.
set.seed(157619)
n <- 100000
xmin <- (-6)
xmax <- (+6)
ymin <- (-3)
ymax <- (+3)
rx <- (xmax-xmin)/2
ry <- (ymax-ymin)/2
outa <- runif(n,min=xmin,max=xmax)
outb <- runif(n,min=ymin,max=ymax)
dx <- outa*2
dy <- outb*2
ly <- dy<=(ry^2); my <- dy>(ry^2)
lx <- dx<=(ry^2); mx <- dx>(rx^2)
This is an example code that work for the circle:
n <- 200
xmin <- -1; xmax <- 1
r <- (xmax-xmin)/2
out <- runif(n,min=xmin,max=xmax)
x <- matrix(out,ncol=2)
d <- x[,1]^2 + x[,2]^2
l <- d<=(r^2); m <- d>(r^2)
win.graph(7,7.8) # così è quadrato
plot(c(xmin,xmax),c(xmin,xmax),type="n")
plot(x[l,1],x[l,2])
points(x[m,1],x[m,2],col="red",pch=19)
(p <- sum(l)/length(l))
p*4
I suspect this is homework, but here we go:
set.seed(42)
n <- 1e5
xmax <- 6
ymax <- 3
x <- runif(n, 0, xmax)
y <- runif(n, 0, ymax)
inside <- (x^2)/36+(y^2)/9 <= 1
plot(x, y, pch=16, cex=0.5, col=inside+1)
mean(inside) * (xmax*ymax) *4
#[1] 56.54376
pi*6*3
#[1] 56.54867
set.seed(1)
n = 1000
a = 6
b = 3
x.samp = runif(n, -a, a)
y.samp = runif(n, -b, b)
p.in = (x.samp/a)^2 + (y.samp/b)^2 <= 1
S = 4*a*b*sum(p.in)/n
print(S)
plot(x.samp, y.samp, col = p.in + 1)
Say I have this example data frame
set.seed(12345)
n1 <- 3
n2 <- 10
n3 <- 60
times <- seq(0, 100, 0.5)
individual <- c(rep(1, n1),
rep(2, n2),
rep(3, n3))
events <- c(sort(sample(times, n1)),
sort(sample(times, n2)),
sort(sample(times, n3)))
df <- data.frame(individual = individual, events = events)
Which gives
> head(df, 10)
individual events
1 1 72.0
2 1 75.5
3 1 87.5
4 2 3.0
5 2 14.5
6 2 16.5
7 2 32.0
8 2 45.5
9 2 50.0
10 2 70.5
I would like to plot a cumulative step graph of the events so that I get one line per individual which goes up by 1 each time an event is "encountered".
So, for instance individual 1 will be 0 up to 72.0, then go up to 1, until 75.5 when it becomes 2 and up to 3 at 87.5 to the end of the graph.
What would be the easiest way to do that?
df$step <- 1
library(plyr)
df <- ddply(df,.(individual),transform,step=cumsum(step))
plot(step~events,data=df[df$individual==1,],type="s",xlim=c(0,max(df$events)),ylim=c(0,max(df$step)),xlab="time",ylab="step")
lines(step~events,data=df[df$individual==2,],type="s",col=2)
lines(step~events,data=df[df$individual==3,],type="s",col=3)
There is also the stepfun function in the stats package. Using that, you could use the plot method for that object class:
sdf <- split(df, individual)
plot(1, 1, type = "n", xlim = c(0, max(events)), ylim = c(0, max(table(individual))),
ylab = "step", xlab = "time")
sfun <- lapply(sdf, function(x){
sf <- stepfun(sort(x$events), seq_len(nrow(x) + 1) - 1)
plot(sf, add = TRUE, col = unique(x$individual), do.points = FALSE)
})
Use ggplot2:
library(ggplot2)
# Add step height information with sequence and rle
df$step <- sequence(rle(df$individual)$lengths)
# plot
df$individual <- factor(df$individual)
ggplot(df, aes(x=events, group=individual, colour=individual, y=step)) +
geom_step()