I am trying to plot two or more lines on the same graph using a loop. My plot is a population dynamic in which I want to repeatedly change the value of the starting population but keep all other parameters the same. I want to plot the different outcomes on one graph. Can anyone help?
Try the following:
library(ggplot2)
MAX.Y<-30
# year<-0:30
year<-1:30
rlp<-0.1
lp<-rep(0,MAX.Y)
lp[1]<-4000
K<-4000000
for(n in 1: (MAX.Y-1)) {lp[n+1]<-lp[n]+(rlp)*(1-lp[n]/K)*lp[n]}
# plot(lp~year, xlab="Time (years)", ylab="Population size", main=c(paste("B) Anchovy population growth"), paste ("in less productive environment")), col="darkorchid", type="l", cex.main=1.0)
sp<-rep(0,MAX.Y)
sp[1]<-100000
for(n in 1: (MAX.Y-1)) {sp[n+1]<-sp[n]+(rlp)*(1-sp[n]/K)*sp[n]}
# lines(sp~year, type="l", col="black")
data = data.frame(year=year,lp=lp, sp=sp)
data = reshape2::melt(data, id.vars = 'year')
ggplot(data, aes(year, value, colour = variable))+
geom_line()+
labs(x = "Time (years)", y = "Population size",
title = "B) Anchovy population growth \n in less productive environment")+
theme_minimal()
Here is what I would do.
First, since the computations for lp and sp are the same, only the initial values change, create a function to do it.
f <- function(initial, MAX, rlp, K){
x <- numeric(MAX)
x[1] <- initial
for(i in seq_len(MAX - 1)) {
x[i + 1] <- x[i] + rlp*(1 - x[i]/K)*x[i]
}
x
}
Now sapply the function to a vector of initial values.
MAX.Y <- 30
rlp <- 0.1
year <- seq_len(MAX.Y)
K <- 4000000
InitialValues <- setNames(c(4000, 100000), c("lp", "sp"))
x <- sapply(InitialValues, f, MAX.Y, rlp, K)
And plot it with matlines. But for matlines to work the plot must be created with the custom title, axis limits, etc.
plot(1, type = "n",
xlim = range(year), ylim = range(x),
main = c(paste("B) Anchovy population growth"), paste ("in less productive environment")),
xlab = "Time (years)",
ylab = "Population size",
cex.main = 1.0,
col = c("darkorchid", "black"))
matlines(x, lty = "solid")
Related
enter image description here
For this question, I have been able to split the data into two histograms with one being income above the median and the other being income below median. The following code is what I've done so far:
library(openintro)
data("countyComplete")
attach("countyComplete")
median(median_household_income, na.rm = FALSE)
x<-subset(countyComplete,median_household_income > 42445)
y<-subset(countyComplete,median_household_income < 42445)
par(mfrow=c(1,2))
hist(x$median_household_income, main="Income Above Median" )
hist(y$median_household_income,main = "Income Below Median")
However, I am a bit confused about how do I force histograms to use same limits on y axis, as well as breaks. Could someone point me in the right direction. I tried to do this:
par(mfrow=c(1,2))
hist(x$median_household_income,
breaks=seq(0,100,by=5),
freq = FALSE,
ylim=c(0,.15),
xlim = range(breaks),
main="Income Above Median")
hist(y$median_household_income, main = "Income Below Median")
But I only get one histogram showing up on my plot screen and the console says
"Error in hist.default(x$median_household_income, breaks = seq(0, 100, :
some 'x' not counted; maybe 'breaks' do not span range of 'x'".
What do I do?
I would forget the breaks argument. It doesn't make sense, you are plotting values below and above the median, they do not intersect.
As for the histograms, I have precomputed the median, and the maximum value of the density.
library(openintro)
data("countyComplete")
med <- median(countyComplete$median_household_income, na.rm = FALSE)
x <- subset(countyComplete, median_household_income > med)
y <- subset(countyComplete, median_household_income < med)
hx <- hist(x$median_household_income, plot = FALSE)
hy <- hist(y$median_household_income, plot = FALSE)
MaxY <- max(c(hx$density, hy$density))
op <- par(mfrow = c(1, 2))
hist(x$median_household_income, main = "Income Above Median",
freq = FALSE, ylim = c(0, MaxY))
hist(y$median_household_income, main = "Income Below Median",
freq = FALSE, ylim = c(0, MaxY))
par(op)
I am looking to plot the following:
L<-((2*pi*h*c^2)/l^5)*((1/(exp((h*c)/(l*k*T)-1))))
all variables except l are constant:
T<-6000
h<-6.626070040*10^-34
c<-2.99792458*10^8
k<-1.38064852*10^-23
l has a range of 20*10^-9 to 2000*10^-9.
I have tried l<-seq(20*10^-9,2000*10^-9,by=1*10^-9), however this does not give me the results I expect.
Is there a simple solution for this in R, or do I have to try in another language?
Thank you.
Looking at the spectral radiance equation wikipedia page, it seems that your formula is a bit off. Your formula multiplies an additional pi (not sure if intended) and the -1 is inside the exp instead of outside:
L <- ((2*pi*h*c^2)/l^5)*((1/(exp((h*c)/(l*k*T)-1))))
Below is the corrected formula. Also notice I have converted it into a function with parameter l since this is a variable:
T <- 6000 # Absolute temperature
h <- 6.626070040*10^-34 # Plank's constant
c <- 2.99792458*10^8 # Speed of light in the medium
k <- 1.38064852*10^-23 # Boltzmann constant
L <- function(l){((2*h*c^2)/l^5)*((1/(exp((h*c)/(l*k*T))-1)))}
# Plotting
plot(L, xlim = c(20*10^-9,2000*10^-9),
xlab = "Wavelength (nm)",
ylab = bquote("Spectral Radiance" ~(KW*sr^-1*m^-2*nm^-1)),
main = "Plank's Law",
xaxt = "n", yaxt = "n")
xtick <- seq(20*10^-9, 2000*10^-9,by=220*10^-9)
ytick <- seq(0, 4*10^13,by=5*10^12)
axis(side=1, at=xtick, labels = (1*10^9)*seq(20*10^-9,2000*10^-9,by=220*10^-9))
axis(side=2, at=ytick, labels = (1*10^-12)*seq(0, 4*10^13,by=5*10^12))
The plot above is not bad, but I think we can do better with ggplot2:
h <- 6.626070040*10^-34 # Plank's constant
c <- 2.99792458*10^8 # Speed of light in the medium
k <- 1.38064852*10^-23 # Boltzmann constant
L2 <- function(l, T){((2*h*c^2)/l^5)*((1/(exp((h*c)/(l*k*T))-1)))} # Plank's Law
classical_L <- function(l, T){(2*c*k*T)/l^4} # Rayleigh-Jeans Law
library(ggplot2)
ggplot(data.frame(l = c(20*10^-9,2000*10^-9)), aes(l)) +
geom_rect(aes(xmin=390*10^-9, xmax=700*10^-9, ymin=0, ymax=Inf),
alpha = 0.3, fill = "lightblue") +
stat_function(fun=L2, color = "red", size = 1, args = list(T = 3000)) +
stat_function(fun=L2, color = "green", size = 1, args = list(T = 4000)) +
stat_function(fun=L2, color = "blue", size = 1, args = list(T = 5000)) +
stat_function(fun=L2, color = "purple", size = 1, args = list(T = 6000)) +
stat_function(fun=classical_L, color = "black", size = 1, args = list(T = 5000)) +
theme_bw() +
scale_x_continuous(breaks = seq(20*10^-9, 2000*10^-9,by=220*10^-9),
labels = (1*10^9)*seq(20*10^-9,2000*10^-9,by=220*10^-9),
sec.axis = dup_axis(labels = (1*10^6)*seq(20*10^-9,2000*10^-9,by=220*10^-9),
name = "Wavelength (\U003BCm)")) +
scale_y_continuous(breaks = seq(0, 4*10^13,by=5*10^12),
labels = (1*10^-12)*seq(0, 4*10^13,by=5*10^12),
limits = c(0, 3.5*10^13)) +
labs(title = "Black Body Radiation described by Plank's Law",
x = "Wavelength (nm)",
y = expression("Spectral Radiance" ~(kWsr^-1*m^-2*nm^-1)),
caption = expression(''^'\U02020' ~'Spectral Radiance described by Rayleigh-Jeans Law, which demonstrates the ultraviolet catastrophe.')) +
annotate("text",
x = c(640*10^-9, 640*10^-9, 640*10^-9, 640*10^-9,
150*10^-9, (((700-390)/2)+390)*10^-9, 1340*10^-9),
y = c(2*10^12, 5*10^12, 14*10^12, 31*10^12,
35*10^12, 35*10^12, 35*10^12),
label = c("3000 K", "4000 K", "5000 K", "6000 K",
"UV", "VISIBLE", "INFRARED"),
color = c(rep("black", 4), "purple", "blue", "red"),
alpha = c(rep(1, 4), rep(0.6, 3)),
size = 4.5) +
annotate("text", x = 1350*10^-9, y = 23*10^12,
label = deparse(bquote("Classical theory (5000 K)"^"\U02020")),
color = "black", parse = TRUE)
Notes:
I created L2 by also making absolute temperature T a variable
For each T, I plot the function L2 using different colors for representation. I've also added a classical_L function to demonstrate classical theory of spectral radiance
geom_rect creates the light blue shaded area for "VISIBLE" light wavelength range
scale_x_continuous sets the breaks of the x axis, while labels sets the axis tick labels. Notice I have multiplied the seq by (1*10^9) to convert the units to nanometer (nm). A second x-axis is added to display the micrometer scale
Analogously, scale_y_continuous sets the breaks and tick labels for y axis. Here I multiplied by (1*10^-12) or (1*10^(-3-9)) to convert from watts (W) to kilowatts (kW), and from inverse meter (m^-1) to inverse nanometer (nm^-1)
bquote displays superscripts correctly in the y axis label
annotate sets the coordinates and text for curve labels. I've also added the labels for "UV", "VISIBLE" and "INFRARED" light wavelengths
ggplot2
Plot from wikipedia:
Image source: https://upload.wikimedia.org/wikipedia/commons/thumb/1/19/Black_body.svg/600px-Black_body.svg.png
in R, with ecdf I can plot a empirical cumulative distribution function
plot(ecdf(mydata))
and with hist I can plot a histogram of my data
hist(mydata)
How I can plot the histogram and the ecdf in the same plot?
EDIT
I try make something like that
https://mathematica.stackexchange.com/questions/18723/how-do-i-overlay-a-histogram-with-a-plot-of-cdf
Also a bit late, here's another solution that extends #Christoph 's Solution with a second y-Axis.
par(mar = c(5,5,2,5))
set.seed(15)
dt <- rnorm(500, 50, 10)
h <- hist(
dt,
breaks = seq(0, 100, 1),
xlim = c(0,100))
par(new = T)
ec <- ecdf(dt)
plot(x = h$mids, y=ec(h$mids)*max(h$counts), col = rgb(0,0,0,alpha=0), axes=F, xlab=NA, ylab=NA)
lines(x = h$mids, y=ec(h$mids)*max(h$counts), col ='red')
axis(4, at=seq(from = 0, to = max(h$counts), length.out = 11), labels=seq(0, 1, 0.1), col = 'red', col.axis = 'red')
mtext(side = 4, line = 3, 'Cumulative Density', col = 'red')
The trick is the following: You don't add a line to your plot, but plot another plot on top, that's why we need par(new = T). Then you have to add the y-axis later on (otherwise it will be plotted over the y-axis on the left).
Credits go here (#tim_yates Answer) and there.
There are two ways to go about this. One is to ignore the different scales and use relative frequency in your histogram. This results in a harder to read histogram. The second way is to alter the scale of one or the other element.
I suspect this question will soon become interesting to you, particularly #hadley 's answer.
ggplot2 single scale
Here is a solution in ggplot2. I am not sure you will be satisfied with the outcome though because the CDF and histograms (count or relative) are on quite different visual scales. Note this solution has the data in a dataframe called mydata with the desired variable in x.
library(ggplot2)
set.seed(27272)
mydata <- data.frame(x= rexp(333, rate=4) + rnorm(333))
ggplot(mydata, aes(x)) +
stat_ecdf(color="red") +
geom_bar(aes(y = (..count..)/sum(..count..)))
base R multi scale
Here I will rescale the empirical CDF so that instead of a max value of 1, its maximum value is whatever bin has the highest relative frequency.
h <- hist(mydata$x, freq=F)
ec <- ecdf(mydata$x)
lines(x = knots(ec),
y=(1:length(mydata$x))/length(mydata$x) * max(h$density),
col ='red')
you can try a ggplot approach with a second axis
set.seed(15)
a <- rnorm(500, 50, 10)
# calculate ecdf with binsize 30
binsize=30
df <- tibble(x=seq(min(a), max(a), diff(range(a))/binsize)) %>%
bind_cols(Ecdf=with(.,ecdf(a)(x))) %>%
mutate(Ecdf_scaled=Ecdf*max(a))
# plot
ggplot() +
geom_histogram(aes(a), bins = binsize) +
geom_line(data = df, aes(x=x, y=Ecdf_scaled), color=2, size = 2) +
scale_y_continuous(name = "Density",sec.axis = sec_axis(trans = ~./max(a), name = "Ecdf"))
Edit
Since the scaling was wrong I added a second solution, calculatin everything in advance:
binsize=30
a_range= floor(range(a)) +c(0,1)
b <- seq(a_range[1], a_range[2], round(diff(a_range)/binsize)) %>% floor()
df_hist <- tibble(a) %>%
mutate(gr = cut(a,b, labels = floor(b[-1]), include.lowest = T, right = T)) %>%
count(gr) %>%
mutate(gr = as.character(gr) %>% as.numeric())
# calculate ecdf with binsize 30
df <- tibble(x=b) %>%
bind_cols(Ecdf=with(.,ecdf(a)(x))) %>%
mutate(Ecdf_scaled=Ecdf*max(df_hist$n))
ggplot(df_hist, aes(gr, n)) +
geom_col(width = 2, color = "white") +
geom_line(data = df, aes(x=x, y=Ecdf*max(df_hist$n)), color=2, size = 2) +
scale_y_continuous(name = "Density",sec.axis = sec_axis(trans = ~./max(df_hist$n), name = "Ecdf"))
As already pointed out, this is problematic because the plots you want to merge have such different y-scales. You can try
set.seed(15)
mydata<-runif(50)
hist(mydata, freq=F)
lines(ecdf(mydata))
to get
Although a bit late... Another version which is working with preset bins:
set.seed(15)
dt <- rnorm(500, 50, 10)
h <- hist(
dt,
breaks = seq(0, 100, 1),
xlim = c(0,100))
ec <- ecdf(dt)
lines(x = h$mids, y=ec(h$mids)*max(h$counts), col ='red')
lines(x = c(0,100), y=c(1,1)*max(h$counts), col ='red', lty = 3) # indicates 100%
lines(x = c(which.min(abs(ec(h$mids) - 0.9)), which.min(abs(ec(h$mids) - 0.9))), # indicates where 90% is reached
y = c(0, max(h$counts)), col ='black', lty = 3)
(Only the second y-axis is not working yet...)
In addition to previous answers, I wanted to have ggplot do the tedious calculation (in contrast to #Roman's solution, which was kindly enough updated upon my request), i.e., calculate and draw the histogram and calculate and overlay the ECDF. I came up with the following (pseudo code):
# 1. Prepare the plot
plot <- ggplot() + geom_hist(...)
# 2. Get the max value of Y axis as calculated in the previous step
maxPlotY <- max(ggplot_build(plot)$data[[1]]$y)
# 3. Overlay scaled ECDF and add secondary axis
plot +
stat_ecdf(aes(y=..y..*maxPlotY)) +
scale_y_continuous(name = "Density", sec.axis = sec_axis(trans = ~./maxPlotY, name = "ECDF"))
This way you don't need to calculate everything beforehand and feed the results to ggpplot. Just lay back and let it do everything for you!
So I am trying to add some graphs to my notes. I have created a simple interest function that will plot several simple interest functions using different rates and I would like to add a legend that would simple say...
"i =: 0%, x%, y%, z%" on one single line, where each 0,x,y,z is in the different color of the representative function using that interest rate.
I looked into the paste() function and attempted to make it one string but I am not sure exactly how to loop it into the int_seq and pull out each individual index and make it a different color then put it into a single string.
# indexs to be used
t = 0:50
int_seq = seq(0.025,0.10,by=0.025) # intere rate sequence
colors = c("red","blue","green","orange") #colors of interest rate seq
index = 1:length(int_seq)
# AV Simple Interest (all good)
avSimple = function(i,t){
av = (1 + (i * t))
return(av)}
# Plot range for y-axis (all good)
yrange = c(avSimple(min(int_seq),min(t)) * 0.95,
avSimple(max(int_seq),max(t)) * 1.05)
# Plots Simple Interest with different interest rates (all good)
plot(t,avSimple(0,t), type="l", main = "AV Simple Interest", xlab = "Time",
ylab = "AV", ylim = yrange)
# loops through the int_seq and plots line based on interest rate
# and specified color (all good)
for (i in index)
lines(t,avSimple(int_seq[i],t), col = colors[i])
# Adds legend to plot for different interest rates
# !!This is where I need the help, not sure best way to approach!!
legend(0,avSimple(0.075,50), c("i =: 0%", for (i in index) int_seq[i]),
col = colors)
Not sure what kind of legend you want. Since you say in one line, you might want to add horiz = TRUE, but here are some other options:
You can pass full vectors to legend so there is no need for a loop in this case. Just create a vector of labels but also use a vector of colors corresponding to each label (which you have already done).
# indexs to be used
t = 0:50
int_seq = seq(0.025,0.10,by=0.025) # intere rate sequence
colors = c("red","blue","green","orange") #colors of interest rate seq
index = 1:length(int_seq)
# AV Simple Interest (all good)
avSimple = function(i,t){
av = (1 + (i * t))
return(av)}
# Plot range for y-axis (all good)
yrange = c(avSimple(min(int_seq),min(t)) * 0.95,
avSimple(max(int_seq),max(t)) * 1.05)
plot(t, type="n", main = "AV Simple Interest", xlab = "Time",
ylab = "AV", ylim = yrange)
# for (i in index)
# lines(t,avSimple(int_seq[i],t), col = colors[i])
# Adds legend to plot for different interest rates
# !!This is where I need the help, not sure best way to approach!!
labs <- sprintf('i =: %s%%', c(0, int_seq))
labs2 <- paste0(c(0, int_seq), '%')
legend('topleft', legend = labs, col = colors, lty = 1, title = 'normal')
l <- legend('top', legend = rep('i =:', length(labs)), lty = 1,
col = colors, text.width = max(strwidth(labs)) + 1,
title = 'right-justified')
text(l$rect$left + l$rect$w, l$text$y, labs2, pos = 2)
legend('topright', legend = labs, text.col = colors, title = 'colored')
legend('bottom', legend = labs, col = colors, lty = 1, horiz = TRUE,
cex = .7, title = 'horizontal')
I am plotting the standard normal distribution.
curve(dnorm(x), from=-4, to=4,
main = "The Standard Normal Distibution",
ylab = "Probability Density",
xlab = "X")
For pedagogical reasons, I want to shade the area below a certain quantile of my choice. How can I do this?
If you want to use curve and base plot, then you can write a little function yourself with polygon:
colorArea <- function(from, to, density, ..., col="blue", dens=NULL){
y_seq <- seq(from, to, length.out=500)
d <- c(0, density(y_seq, ...), 0)
polygon(c(from, y_seq, to), d, col=col, density=dens)
}
A little example follows:
curve(dnorm(x), from=-4, to=4,
main = "The Standard Normal Distibution",
ylab = "Probability Density",
xlab = "X")
colorArea(from=-4, to=qnorm(0.025), dnorm)
colorArea(from=qnorm(0.975), to=4, dnorm, mean=0, sd=1, col=2, dens=20)
We could use the following R code too, in order to shade the regions under the standard normal curve below a certain (given) quantile:
library(ggplot2)
z <- seq(-4,4,0.01)
fz <- dnorm(z)
q <- qnorm(0.1) # the quantile
x <- seq(-4, q, 0.01)
y <- c(dnorm(x), 0, 0)
x <- c(x, q, -4)
ggplot() + geom_line(aes(z, fz)) +
geom_polygon(data = data.frame(x=x, y=y), aes(x, y), fill='blue')