Plot a curve of data frame - r

df1 <- read.csv("C:\\Users\\Unique\\Desktop\\Data Science\\
R Scripts\\LimeBison_ch1.csv")
df2 <- df1[df1[,3] == 73.608125,]
plot(df2[,1],df2[,2], xlab = "Milliseconds", ylab = "Amplitude",
main = "Amplitude vs Time Graph",type = "p", pch =16, col = "red",
xlim = c(-200,1200), ylim = c(-1.5,1.5))
x <- tapply(df2$Amplitude, df2$Time, mean)
df3 <- data.frame(Time = names(x), Average_Amplitude = x)
How can I plot a curve of the data frame df3 over the scatter plot of df2?

I'm not sure about your data, but if you are using base plotting, then you can plot a line on top of a scatter plot by using the lines function
df <- data.frame(x = 1:40, y = c(1:20, 20: 1))
plot(df$x, df$y, cex = 2)
lines(df$x, df$y)

Related

Calculate and plot multiple densities?

I have a matrix with multiple columns and I'd like do calculate the density of each column, and then plot those densities in one single R base plot. Also It would be easier if the plot had a corrected scale automatically.
m <- matrix(rnorm(10), 5, 10))
Create a list of densities d, compute the xlim and ylim values and use those to create an empty plot. Finally draw each of the densities on that plot and optionally draw a legend. As requested, this uses only base R.
set.seed(123)
m <- matrix(rnorm(50), 5, 10) # test data
d <- apply(m, 2, density)
xlim <- range(sapply(d, "[[", "x"))
ylim <- range(sapply(d, "[[", "y"))
plot(NA, xlim = xlim, ylim = ylim, ylab = "density")
nc <- ncol(m)
cols <- rainbow(nc)
for(i in 1:nc) lines(d[[i]], col = cols[i])
legend("topright", legend = 1:nc, lty = 1, col = cols, cex = 0.7)
It can also be done with ggplot2:
library(reshape2)
library(ggplot2)
#Data
set.seed(123)
m <- as.data.frame(matrix(rnorm(50), 5, 10))
#Melt
meltdata <- melt(m)
#Plot 1
ggplot(meltdata,aes(value,color=variable))+
geom_density()+ggtitle('Plot 1')
#Plot 2
ggplot(meltdata,aes(value,fill=variable))+
geom_density(alpha=0.6)+ggtitle('Plot 2')

labeling nmds plot by column values in r

I ran metaMDS and want to plot and color code by a grouping based on certain data frame characters. In my original data frame, df$yr are years and df$2 are sites. I want to color by the years.
caltmds <- metaMDS(df[,3:12], k=3)
plot(caltmds, type = 'n')
cols <- c("red2", "mediumblue")
points(caltmds, col = cols[df$yr])
I also tried from this post:
scl <- 3
colvec <- c("red2", "mediumblue")
plot(caltmds, type = "n", scaling = scl)
with(df, points(caltmds, display = "sites", col = colvec[yr], pch = 21, bg = colvec[yr]))
text(caltmds, display = "species", cex = 0.8, col = "darkcyan")
with(df, legend("topright", legend = levels(yr), bty = "n", col = colvec, pch = 21, pt.bg = colvec))
Nothing plots
#DATA
df1 = mtcars
mycolors = df1$cyl #Identify the grouping vector
library(vegan)
m = metaMDS(df1)
x = scores(m) #Extract co-ordinates
plot(x, col = as.numeric(as.factor(mycolors)))

R Bubble chart: labels overlapping

Attempting to code a function that returns a bubble chart from aggregated data.
I'm passing it a column of a data.frame in "agg".
aggs2 <- function(agg, deporur=0, all=TRUE){
##create aggregate from library data
agg1 <- aggregate(agg, by=list(NoNA$IMD_NATIONAL_QUINTILE, NoNA$UR),
FUN=function(x) c(mn=mean(x), n=length(x)))
##bind into a dataframe
agg1 <- cbind(agg1[,1:2], agg1[,3])
##add column holding values of Deprivation Quantile and Urban/Rural status
agg1$NewCol <- do.call(paste, c("Deprivation Quantile", agg1[c("Group.1", "Group.2")],
sep = " "))
##set column names
colnames(agg1) <- c("Deprivation", "Urban and Rural", "Mean", "Count", "DepUR")
##remove categories with low counts
if(all==FALSE){
agg1 <- subset(agg1, agg1$Count > 9)
}
##order data.frame by mean
agg1 <- agg1[order(agg1$Mean, decreasing=TRUE),]
##create bubble chart
if(deporur==1){
radius3 <- sqrt(agg1$Count/pi)
symbols(factor(agg1$DepUR), agg1$Mean, circles=radius3, inches=0.35,
xlim=c(0,10.0), ylim=c(min(agg1$Mean-0.25),10.0), fg="white", bg="purple",
xlab="Deprivation Quantile and Urban/Rural Status", ylab="Mean Response")
text(factor(agg1$DepUR), agg1$Mean-.1, agg1$DepUR, cex=0.7)
}
#return ordered dataframe
agg1
}
This returns a sorted data.frame by mean, and the following chart:
Because this function will need to create graphs from a variety of different documents and columns, I would like to code it so that the labels do not overlap the bubbles, or other labels.
I have looked at the directlabels library, but I have been unable to work out how to code it properly.
Would greatly appreciate any assistance.
I'm not aware of any solution for non-overlapping labels with regards to other labels AND other circles. Nevertheless, wordcloud::textplot might be a starting point:
library(wordcloud)
set.seed(8)
df <- data.frame(x = runif(10), y = runif(10), size = sample(10:20, 10), lab = paste0("label", 1:10))
par(mfrow = c(1,2))
with(df, {
plot(x, y, cex = size, pch = 19, col = adjustcolor("violet", alpha.f = .4), main = "non-overlapping")
textplot(x, y, lab, new = FALSE, show.lines = FALSE, cex = 2)
plot(x, y, cex = size, pch = 19, col = adjustcolor("violet", alpha.f = .4), main = "overlapping")
text(x, y, lab, cex = 2)
})

Dates on x-axis; big dataset

I would like to plot a time series. I created an example to show how the graph should look like:
set.seed(1)
r <- rnorm(20,0,1)
z <- c(1,1,1,1,1,-1,-1,-1,1,-1,1,1,1,-1,1,1,-1,-1,1,-1)
data <- as.data.frame(na.omit(cbind(z, r)))
series1 <- ts(cumsum(c(1,data[,2]*data[,1])))
series2 <- ts(cumsum(c(1,data[,2])))
d1y <- seq(as.Date("1991-01-01"),as.Date("2015-01-01"),length.out=21)
plot_strategy <- function(series1, series2, currency)
{x11()
matplot(cbind(series1, series2), xaxt = "n", xlab = "Time",
ylab = "Value", col = 1:3, ann = TRUE, type = 'l',
lty = 1)
axis(1, at=seq(2,20,2), labels=format(d1y[seq(2,20,2)],"%Y"))
legend(x = "topleft", legend = c("TR", "BA"),
lty = 1,col = 1:3)
dev.copy2pdf(file= currency, width = 11.69, height = 8.27)}
plot_strategy(series1, series2,
currency= "all.pdf")
The actual dataset contains 6334 values. I therefore change the code to this:
axis(1, at=seq(2,6334,365), labels=format(d1y[seq(2,6334,365)],"%Y"))
But now, there are no values on the x-axis. Any suggstions?

QQ plot: More than two data

I wanted to graph a QQ plot similar to this picture:
I managed to get a QQ plot using two samples, but I do not know how to add a third one to the plot.
Here is my result:
Here is the code I used:
qqplot(table$Bedouin, table$Tunisia, xlim = c(-0.25,0.25), ylim = c(-025,0.25))
In my table data frame I have other populations I would like to add. But I can't.
Thank you in advance.
I suppose you're looking for a scatterplot of sorted values since all variables are stored in the same data frame.
An example dataset:
set.seed(10)
dat <- data.frame(A = rnorm(20), B = rnorm(20), C = rnorm(20))
This is a way to create the plot with basic R functions:
# create a QQ-plot of B as a function of A
qqplot(dat$A, dat$B, xlim = range(dat), ylim = range(dat),
xlab = "A", ylab = "B/C")
# create a diagonal line
abline(a = 0, b = 1)
# add the points of C
points(sort(dat$A), sort(dat$C), col = "red")
# create a legend
legend("bottomright", legend = c("B", "C"), pch = 1, col = c("black", "red"))
You can add the line
par(new=TRUE)
Then use the qqplot() again to over-plot the first plot as follows:
set.seed(10)
dat <- data.frame(A = rnorm(20), B = rnorm(20), C = rnorm(20))
# create a QQ-plot of B as a function of A
qqplot(dat$A, dat$B,
xlim = range(dat), ylim = range(dat),
xlab = "Distribution A", ylab = "Other distributions")
# set overplotting
par(new=TRUE)
# create a QQ-plot of B as a function of C
qqplot(dat$A, dat$C,
xlim = range(dat), ylim = range(dat),
xlab = "Distribution A",
ylab = "Other distributions",
col = "red")
# create a diagonal line
abline(a = 0, b = 1)
# create a legend
legend("bottomright", legend = c("B", "C"), pch = 1, col = c("black", "red"))

Resources