Draw discrete CDF in R

Draw discrete CDF in R - r

I want to draw a CDF in R, but I am having some problems. I want it to look like this:
But I get lines between the open and closed points by using the command plot(x,y,type="s")
So how do I get rid of those lines?

This isn't a general purpose example, but it will show you how to build the plot you desire in a couple of steps.
First, let's create some data (notice the zeros at the beginning):
x <- 0:6
fx <- c(0, 0.19, 0.21, 0.4, 0.12, 0.05, 0.03)
Fx <- cumsum(fx)
n <- length(x)
Then let's make an empty plot
plot(x = NA, y = NA, pch = NA,
xlim = c(0, max(x)),
ylim = c(0, 1),
xlab = "X label",
ylab = "Y label",
main = "Title")
Add closed circles, open circles, and finally the horizontal lines
points(x = x[-n], y = Fx[-1], pch=19)
points(x = x[-1], y = Fx[-1], pch=1)
for(i in 1:(n-1)) points(x=x[i+0:1], y=Fx[c(i,i)+1], type="l")
Viola!
If you insist on not seeing the line "inside" of the white points, do this instead:
points(x = x[-n], y = Fx[-1], pch=19)
for(i in 1:(n-1)) points(x=x[i+0:1], y=Fx[c(i,i)+1], type="l")
points(x = x[-1], y = Fx[-1], pch=19, col="white")
points(x = x[-1], y = Fx[-1], pch=1)

You can construct this plot using:
plot(x, y, pch = 16, ylim = c(-0.03, 1.03), ylab = "CDF") # solid points/graphic settings
points(x[-1], y[-length(y)]) # open points
abline(h = c(0, 1), col = "grey", lty = 2) # horizontal lines
Note: plot(x,y, type = "s) does not produce a plot like your original question, but rather a step function with both treads (horizontal lines) and risers (vertical lines):
Data
library(dplyr)
set.seed(1)
df <- data.frame(x = rpois(30, 3)) %>%
dplyr::arrange(x) %>%
dplyr::add_count(x) %>%
dplyr::distinct(x, .keep_all = T) %>%
mutate(y = cumsum(n) / sum(n))
x <- df$x
y <- df$y

Related

Side-by-Side plots lined up in R

I am trying to place two plots side-by-side in R and have the below example.
library(vioplot)
x <- rnorm(100)
y <- rpois(100,1)
plot(x, y, xlim=c(-5,5), ylim=c(-5,5),type='n')
vioplot(x, col="tomato", horizontal=TRUE, at=-4, add=TRUE,lty=2, rectCol="gray")
vioplot(y, col="cyan", horizontal=TRUE, at=-3, add=TRUE,lty=2)
vioplot(y, col="cyan", horizontal=TRUE, at=-2, add=TRUE,lty=2)
With this data, I'm able to make a vioplot of my x and y variables. Now, for example, I want to develop bar plots of separate count data that relates to each vioplot on the left-hand side.
counts <- c(10, 20, 30)
barplot(counts, main="Car Distribution", horiz=TRUE)
I've used the mtcars example but it could be any count data. I'm wondering if it is possible to generate these plots side-by-side so that the count plot lines up with the vioplot correctly. I do not need any y-axis labels for the count plot.

According your specifications ggplot is my recommendation
library(tidyverse)
p1 <- lst(x, y, y1=y) %>%
bind_cols() %>%
pivot_longer(1:3) %>%
ggplot(aes(name, value)) +
geom_violin(trim = FALSE)+
geom_boxplot(width=0.15) +
coord_flip()
p2 <- mtcars %>%
count(gear) %>%
ggplot(aes(gear, n)) +
geom_col()+
coord_flip()
cowplot::plot_grid(p1, p2)
In base R you can do (please note, I used boxplot, but should work with viopülot either)
par(mfrow=c(1,2))
counts <- table(mtcars$gear)
boxplot(cbind(x,y,y), col="tomato", horizontal=TRUE,lty=2, rectCol="gray")
barplot(counts, main="Car Distribution", horiz=TRUE,
names.arg=c("3 Gears", "4 Gears", "5 Gears"))

Another option if you want to use ggplot is function ggarrange() from ggpubr.
library(dplyr)
library(ggplot2)
library(ggpubr)
# Create a sample dataset
dt <- tibble(group = rep(c("x", "y"), each = 100)) %>%
mutate(value = if_else(group == "x", rnorm(200),
as.double(rpois(200, 1))))
# Combined violin/Box plot
violins <- dt %>%
ggplot(aes(value, group)) +
geom_violin(width = 0.5) +
geom_boxplot(width = 0.1)
# Bar chart
bars <- dt %>%
ggplot(aes(group)) +
geom_bar(width = 0.1) +
coord_flip()
# Combine
ggpubr::ggarrange(violins, bars + rremove("ylab") + rremove("y.text"), ncol = 2)
Output:

You can use this code:
library(vioplot)
x <- rnorm(100)
y <- rpois(100,1)
par(mfrow=c(1,2))
plot(x, y, xlim=c(-5,5), ylim=c(-5,-1),type='n')
vioplot(x, col="tomato", horizontal=TRUE, at=-4, add=TRUE,lty=2, rectCol="gray")
vioplot(y, col="cyan", horizontal=TRUE, at=-3, add=TRUE,lty=2)
vioplot(y, col="cyan", horizontal=TRUE, at=-2, add=TRUE,lty=2)
counts <- table(mtcars$gear)
barplot(counts, main="Car Distribution", horiz=TRUE,
names.arg=c("3 Gears", "4 Gears", "5 Gears"))
Output:

Thank you for your interesting question, which has motivates me to explore base R graphics features. I have tried to find a case where the side-by-side configuration between the violin plot and the barplot provides a meaningful relationship. The case is that I have a subset of iris data with various counts of the species. I want to show three statistics:
the counts of sampled each species, by showing barplots;
the spread of sepal lengths in each sampled species, by showing violin plots; and
the median petal width of each sampled species, by positioning the violin plots.
I follow #GW5's idea here to create barplots of which the positions on the axes can be controlled. I follow #IRTFM's idea here to adjust the origins of the axes.
Here is the full code:
library(vioplot)
some_iris <- iris[c(1:90, 110:139), ]
ir_counts <- some_iris |> with(Species) |> table()
ir_counts
# setosa versicolor virginica
# 50 40 30
ir_names <- names(ir_counts)
ir_colors <- c("cyan", "green", "pink")
x_vio1 <- some_iris |> subset(Species == ir_names[1]) |> with(Sepal.Length)
x_vio2 <- some_iris |> subset(Species == ir_names[2]) |> with(Sepal.Length)
x_vio3 <- some_iris |> subset(Species == ir_names[3]) |> with(Sepal.Length)
y_vio1 <- some_iris |> subset(Species == ir_names[1]) |> with(Petal.Length) |> median()
y_vio2 <- some_iris |> subset(Species == ir_names[2]) |> with(Petal.Length) |> median()
y_vio3 <- some_iris |> subset(Species == ir_names[3]) |> with(Petal.Length) |> median()
# `xpd = FALSE` to keep the grid inside the plotting boxes.
par(mfrow = c(1, 2), xpd = FALSE)
# The violin plots, put on the left side.
plot(NULL,
xlim = c(0, 10), ylim = c(0, 10), type = "n", las = 1, xaxs = "i", yaxs = "i",
xlab = "Sepal Length (cm)", ylab = " Median Petal Width (cm)")
vioplot(x_vio1, col = ir_colors[1], horizontal = TRUE, at = y_vio1, add = TRUE, lty = 2)
vioplot(x_vio2, col = ir_colors[2], horizontal = TRUE, at = y_vio2, add = TRUE, lty = 2)
vioplot(x_vio3, col = ir_colors[3], horizontal = TRUE, at = y_vio3, add = TRUE, lty = 2)
grid()
# The texts that informs the names of the species
text(labels = ir_names, y = c(y_vio1, y_vio2, y_vio3),
x = c (min(x_vio1), min(x_vio2), min(x_vio3)) - 1)
# The barplots, put on the right side.
plot(NULL,
xlim = c(0, 60), ylim = c(0, 10), yaxt = "n", type = "n",
las = 1, xlab = "Counts", ylab = "", xaxs = "i", yaxs = "i"
)
rect(xleft = 0, xright = ir_counts[1],
ybottom = y_vio1 - 0.3, ytop = y_vio1 + 0.3, col = ir_colors[1])
rect(xleft = 0, xright = ir_counts[2],
ybottom = y_vio2 - 0.3, ytop = y_vio2 + 0.3, col = ir_colors[2])
rect(xleft = 0, xright = ir_counts[3],
ybottom = y_vio3 - 0.3, ytop = y_vio3 + 0.3, col = ir_colors[3])
grid()
Here is the result:
In case you want to put labels on the barplots (on the right side), you can use mtext as follows:
# ... (The same code above)
mtext(text = ir_names, side = 2, at = c(y_vio1, y_vio2, y_vio3),
line = 0.2, las = 1 )
The resulted labels:

How to specify breaks for y axis in R plot

I have created the following fanchart using the fanplot package. I'm trying to add axis ticks and labels to the y axis, however it's only giving me the decimals and not the full number. Looking for a solution to display the full number (e.g 4.59 and 4.61) on the y axis
I am also unsure of how to specify the breaks and number of decimal points for the labels on the y-axis using plot(). I know doing all of this in ggplot2 it would look something like this scale_y_continuous(breaks = seq(min(data.ts$Index),max(data.ts$Index),by=0.02)) . Any ideas on how to specify the breaks in the y axis as well as the number of decimal points using the base plot() feature in R?
Here is a reproductible of my dataset data.ts
structure(c(4.6049904235401, 4.60711076016453, 4.60980084146652,
4.61025389170935, 4.60544515681515, 4.60889021700954, 4.60983993107244,
4.61091608826696, 4.61138799159174, 4.61294431148318, 4.61167545843765,
4.61208284263432, 4.61421991328081, 4.61530485425155, 4.61471465043043,
4.6155992084451, 4.61195799200607, 4.61178486640435, 4.61037927954796,
4.60744590947049, 4.59979957741728, 4.59948551500254, 4.60078678080182,
4.60556092645471, 4.60934962087565, 4.60981147563749, 4.61060477704678,
4.61158365084251, 4.60963435263623, 4.61018215733317, 4.61209710959768,
4.61231368335184, 4.61071363571141, 4.61019496497916, 4.60948652606191,
4.61068813487859, 4.6084092003352, 4.60972706132393, 4.60866915174087,
4.61192565195909, 4.60878767339377, 4.61341471281265, 4.61015272152397,
4.6093479714315, 4.60750965935653, 4.60768790690338, 4.60676463096309,
4.60746490411374, 4.60885670935448, 4.60686846708382, 4.60688947889575,
4.60867708110485, 4.60448791268212, 4.60387348166032, 4.60569806689426,
4.6069320880709, 4.6087143894128, 4.61059688801283, 4.61065399116698,
4.61071421014339), .Tsp = c(2004, 2018.75, 4), class = "ts")
and here is a reproductible of the code I'm using
# # Install and Load Packages
## pacman::p_load(forecast,fanplot,tidyverse,tsbox,lubridate,readxl)
# Create an ARIMA Model using the auto.arima function
model <- auto.arima(data.ts)
# Simulate forecasts for 4 quarters (1 year) ahead
forecasts <- simulate(model, n=4)
# Create a data frame with the parameters needed for the uncertainty forecast
table <- ts_df(forecasts) %>%
rename(mode=value) %>%
mutate(time0 = rep(2019,4)) %>%
mutate(uncertainty = sd(mode)) %>%
mutate(skew = rep(0,4))
y0 <- 2019
k <- nrow(table)
# Set Percentiles
p <- seq(0.05, 0.95, 0.05)
p <- c(0.01, p, 0.99)
# Simulate a qsplitnorm distribution
fsval <- matrix(NA, nrow = length(p), ncol = k)
for (i in 1:k)
fsval[, i] <- qsplitnorm(p, mode = table$mode[i],
sd = table$uncertainty[i],
skew = table$skew[i])
# Create Plot
plot(data.ts, type = "l", col = "#75002B", lwd = 4,
xlim = c(y0 - 2,y0 + 0.75), ylim = range(fsval, data.ts),
xaxt = "n", yaxt = "n", ylab = "",xlab='',
main = '')
title(ylab = 'Log AFSI',main = 'Four-Quarter Ahead Forecast Fan - AFSI',
xlab = 'Date')
rect(y0 - 0.25, par("usr")[3] - 1, y0 + 2, par("usr")[4] + 1,
border = "gray90", col = "gray90")
fan(data = fsval, data.type = "values", probs = p,
start = y0, frequency = 4,
anchor = data.ts[time(data.ts) == y0 - .25],
fan.col = colorRampPalette(c("#75002B", "pink")),
ln = NULL, rlab = NULL)
# Add axis labels and ticks
axis(1, at = y0-2:y0 + 2, tcl = 0.5)
axis(1, at = seq(y0-2, y0 + 2, 0.25), labels = FALSE, tcl = 0.25)
abline(v = y0 - 0.25, lty = 1)
abline(v = y0 + 0.75, lty = 2)
axis(2, at = range(fsval, data.ts), las = 2, tcl = 0.5)

range(blah) will only return two values (the minimum and maximum). The at parameter of axis() requires a sequence of points at which you require axis labels. Hence, these are the only two y values you have on your plot. Take a look at using pretty(blah) or seq(min(blah), max(blah), length.out = 10).

The suggestions of #Feakster are worth looking at, but the problem here is that the y-axis margin isn't wide enough. You could do either of two things. You could round the labels so they fit within the margins, for example you could replace this
axis(2, at = range(fsval, data.ts), las = 2, tcl = 0.5)
with this
axis(2, at = range(fsval, data.ts),
labels = sprintf("%.3f", range(fsval, data.ts)), las = 2, tcl = 0.5)
Or, alternatively you could increase the y-axis margin before you make the plot by specifying:
par(mar=c(5,5,4,2)+.1)
plot(data.ts, type = "l", col = "#75002B", lwd = 4,
xlim = c(y0 - 2,y0 + 0.75), ylim = range(fsval, data.ts),
xaxt = "n", yaxt = "n", ylab = "",xlab='',
main = '')
Then everything below that should work. The mar element of par sets the number of lines printed in the margin of each axis. The default is c(5,4,4,2).

R. How to avoid lines connecting dots in dotplot

I made a plot using plot() using RStudio.
x = X$pos
y = X$anc
z = data.frame(x,y)
#cut in segments
my_segments = c(52660, 106784, 151429, 192098, 233666,
273857, 307933, 343048, 373099, 408960,
441545, 472813, 497822, 518561, 537471,
556747, 571683, 591232, 599519, 616567,
625727, 633744)
my_cuts = cut(x,my_segments, labels = FALSE)
my_cuts[is.na(my_cuts)] = 0
This is the code:
#create subset of segments
z_alt = z
z_alt[my_cuts %% 2 == 0,] = NA
#plot green, then alternating segments in blue
plot(z, type="p", cex = 0.3,pch = 16,
col="black",
lwd=0.2,
frame.plot = F,
xaxt = 'n', # removes x labels,
ylim = c(0.3, 0.7),
las = 2,
xlim = c(0, 633744),
cex.lab=1.5, # size of axis labels
ann = FALSE, # remove axis titles
mgp = c(3, 0.7, 0))
lines(z_alt,col="red", lwd=0.2)
# adjust y axis label size
par(cex.axis= 1.2, tck=-0.03)
If you see, some black dots are separated, but other black dots have red connecting lines. Does anyone know how to remove these annoying lines?. I just want black and red dots. Many thanks

there is no need to call the points in a second function. you can try to directly set the color in the plot function using a color vector.
# create some data as you have not provided some
set.seed(123)
df <- data.frame(x=1:100,y=runif(100))
# some sgment breaks
my_segments <- c(0,10,20,50,60)
gr <- cut(df$x, my_segments,labels = FALSE, right = T)
gr[is.na(gr)] <- 0
# create color vector with 1 == black, and 2 == red
df$color <- ifelse(gr %% 2 == 0, 1, 2)
# and the plot
plot(df$x, df$y, col = df$color, pch = 16)

The problem here is that you are using lines to add your z_alt. As the name of the function suggests, you will be adding lines. Use points instead.
z <- runif(20,0,1)
z_alt <- runif(20,0.8,1.2)
plot(z, type="p", col="black", pch = 16, lwd=0.2, ylim = c(0,1.4))
points(z_alt, col = "red", pch = 16, lwd = 0.2)

Overlapping stacked density plots

I'm trying to achieve a similar plot to this one, using R's native plot command.
I was able to get something similar with the code below, however, I'd like the density polygons to overlap. Can anyone suggest a way to do this?
data = lapply(1:5, function(x) density(rnorm(100, mean = x)))
par(mfrow=c(5,1))
for(i in 1:length(data)){
plot(data[[i]], xaxt='n', yaxt='n', main='', xlim=c(-2, 8), xlab='', ylab='', bty='n', lwd=1)
polygon(data[[i]], col=rgb(0,0,0,.4), border=NA)
abline(h=0, lwd=0.5)
}
Outputs:

I would do it something like the following. I plot the densities in the same plot but add an integer to the y values. To make them overlapping i multiply by a constant factor fac.
# Create your toy data
data <- lapply(1:5, function(x) density(rnorm(100, mean = x)))
fac <- 5 # A factor to make the densities overlap
# We make a empty plot
plot(1, type = "n", xlim = c(-3, 10), ylim = c(1, length(data) + 2),
axes = FALSE, xlab = "", ylab = "")
# Add each density, shifted by i and scaled by fac
for(i in 1:length(data)){
lines( data[[i]]$x, fac*data[[i]]$y + i)
polygon(data[[i]]$x, fac*data[[i]]$y + i, col = rgb(0, 0, 0, 0.4), border = NA)
abline(h = i, lwd = 0.5)
}

(Note: This content was previously edited into the Question and was written by #by0.)
Thanks to #AEBilgrau, I quickly put together this function which works really nicely. Note: you need to play around with the factor fac depending on your data.
stacked.density <- function(data, fac = 3, xlim, col = 'black',
alpha = 0.4, show.xaxis = T,
xlab = '', ylab = ''){
xvals = unlist(lapply(data, function(d) d$x))
if(missing(xlim)) xlim=c(min(xvals), max(xvals))
col = sapply(col, col2alpha, alpha)
if(length(col) == 1) col = rep(col, length(data))
plot(1, type = "n", xlim = xlim, ylim = c(1,length(data) + 2),
yaxt='n', bty='n', xaxt=ifelse(show.xaxis, 'l', 'n'), xlab = xlab, ylab = ylab)
z = length(data):1
for(i in 1:length(data)){
d = data[[ z[i] ]]
lines(d$x, fac*d$y + i, lwd=1)
polygon(d$x, fac*d$y+ i, col=col[i], border=NA)
abline(h = i, lwd=0.5)
}
}
data <- lapply(1:5, function(x) density(rnorm(100, mean = x)))
stacked.density(data, col=c('red', 'purple', 'blue', 'green', 'yellow'), alpha=0.3, show.xaxis=T)
outputs:

R - color scatterplot points by z value with legend

I have a scatterplot and wish to color the points by a z value assigned to each point. Then I want to get the legend on the right hand side of the plot to show what colors correspond to what z values using a nice smooth color spectrum.
Here are some x,y,z values you can use so that this is a reproducible example.
x = runif(50)
y = runif(50)
z = runif(50) #determines color of the (x,y) point
I suppose the best answer would be one that is generalized for any color function, but I do anticipate using rainbow()

Translated from this previous question:
library(ggplot2)
d = data.frame(x=runif(50),y=runif(50),z=runif(50))
ggplot(data = d, mapping = aes(x = x, y = y)) + geom_point(aes(colour = z), shape = 19)

If you don't want to use ggplot2 I modified a solution to this provided by someone else, I don't remember who.
scatter_fill <- function (x, y, z,xlim=c(min(x),max(x)),ylim=c(min(y),max(y)),zlim=c(min(z),max(z)),
nlevels = 20, plot.title, plot.axes,
key.title, key.axes, asp = NA, xaxs = "i",
yaxs = "i", las = 1,
axes = TRUE, frame.plot = axes, ...)
{
mar.orig <- (par.orig <- par(c("mar", "las", "mfrow")))$mar
on.exit(par(par.orig))
w <- (3 + mar.orig[2L]) * par("csi") * 2.54
layout(matrix(c(2, 1), ncol = 2L), widths = c(1, lcm(w)))
par(las = las)
mar <- mar.orig
mar[4L] <- mar[2L]
mar[2L] <- 1
par(mar = mar)
# choose colors to interpolate
levels <- seq(zlim[1],zlim[2],length.out = nlevels)
col <- colorRampPalette(c("red","yellow","dark green"))(nlevels)
colz <- col[cut(z,nlevels)]
#
plot.new()
plot.window(xlim = c(0, 1), ylim = range(levels), xaxs = "i", yaxs = "i")
rect(0, levels[-length(levels)], 1, levels[-1L],col=col,border=col)
if (missing(key.axes)) {if (axes){axis(4)}}
else key.axes
box()
if (!missing(key.title))
key.title
mar <- mar.orig
mar[4L] <- 1
par(mar = mar)
# points
plot(x,y,type = "n",xaxt='n',yaxt='n',xlab="",ylab="",xlim=xlim,ylim=ylim,bty="n")
points(x,y,col = colz,xaxt='n',yaxt='n',xlab="",ylab="",bty="n",...)
## options to make mapping more customizable
if (missing(plot.axes)) {
if (axes) {
title(main = "", xlab = "", ylab = "")
Axis(x, side = 1)
Axis(y, side = 2)
}
}
else plot.axes
if (frame.plot)
box()
if (missing(plot.title))
title(...)
else plot.title
invisible()
}
Just run the function first and it is ready to be used. It is quite handy.
# random vectors
vx <- rnorm(40,0,1)
vy <- rnorm(40,0,1)
vz <- rnorm(40,10,10)
scatter_fill(vx,vy,vz,nlevels=15,xlim=c(-1,1),ylim=c(-1,5),zlim=c(-10,10),main="TEST",pch=".",cex=8)
As you can notice, it inherits the usual plot function capabilities.

Another alternative using levelplot in package latticeExtra, with three different colour palettes.
library(latticeExtra)
levelplot(z ~ x + y, panel = panel.levelplot.points, col.regions = heat.colors(50))
levelplot(z ~ x + y, panel = panel.levelplot.points,
col.regions =colorRampPalette(brewer.pal(11,"RdYlGn"))(50))
levelplot(z ~ x + y, panel = panel.levelplot.points, col.regions = rainbow(50))

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Draw discrete CDF in R - r

I want to draw a CDF in R, but I am having some problems. I want it to look like this: But I get lines between the open and closed points by using the command plot(x,y,type="s") So how do I get rid of those lines?

Related

Side-by-Side plots lined up in R

How to specify breaks for y axis in R plot

R. How to avoid lines connecting dots in dotplot

Overlapping stacked density plots

R - color scatterplot points by z value with legend

Categories

Resources