I have the following df + code example: (my df is from 01/2016 - 04/2020)
Date A01_Price A02_Price A03_Price A04_Price A05_Price A06_Price A07_Price A08_Price A09_Price A10_Price A11_Price A12_Price A13_Price A14_Price A15_Price
1 2016-01-04 49.5010 21.6400 90.0100 93.676 81.6110 27.6450 28.4600 44.1930 33.0140 216.5460 36.201 41.7360 25.495 16.200 69.197
2 2016-01-05 49.7855 21.9870 88.5695 92.329 82.4590 28.2235 28.2790 44.8180 34.0180 218.5370 35.929 40.8520 25.431 16.157 69.828
3 2016-01-06 49.0595 21.5060 87.4735 88.601 81.4320 28.1725 27.4720 43.5065 36.2670 207.5960 35.256 39.9220 25.076 16.061 66.947
4 2016-01-07 47.7785 20.8415 82.8735 83.725 78.9340 26.7820 26.3485 39.0500 34.3570 203.4775 33.967 38.2115 24.062 15.738 64.135
5 2016-01-08 47.7435 20.2600 82.9275 82.609 79.0000 26.4495 26.1980 41.0055 33.1400 199.9250 33.361 37.3630 23.993 15.641 63.434
6 2016-01-09 47.8160 20.3800 83.0530 83.503 79.3925 26.4900 26.2460 41.0680 33.1910 200.9835 33.447 37.4530 24.110 15.734 63.510
7 2016-01-10 47.7770 20.4750 82.9860 83.325 79.6450 26.4680 26.2200 41.0340 33.1640 201.3680 33.401 37.4050 23.998 15.713 63.469
8 2016-01-11 48.8095 20.8440 83.0320 83.513 78.6720 26.1275 26.5150 40.3455 33.9770 202.6630 33.516 37.5030 23.947 15.753 65.583
9 2016-01-12 48.9545 21.0340 83.7325 85.732 81.0900 27.0205 27.5920 41.3570 35.6450 205.9610 34.443 38.0980 24.004 15.939 66.032
10 2016-01-13 48.0195 20.4640 82.6305 81.151 81.1780 27.1925 26.8050 39.2130 35.3825 197.9070 33.023 37.5945 23.423 15.737 64.682
I would like to have the long and short exponential moving average for each "Axy_Price" displayed in the plot for my function. Like in this graphic. It would be great if I could somehow implement it in ggplot, because I want to show more variables in the plot. Anybody got an idea? I've tried this so far:
library(plyr)
library(tidyverse)
library(quantmod)
library(TTR)
library(zoo)
library(PerformanceAnalytics)
library(xts)
#### EMA function ####
EMAf <- function (price,n){
ema <- c()
ema[1:(n-1)] <- NA
ema[n]<- mean(price[1:n])
beta <- 2/(n+1)
for (i in (n+1):length(price)){
ema[i]<-beta * price[i] +
(1-beta) * ema[i-1]
}
ema <- reclass(ema,price)
return(ema)
}
#
df_A01 <- df %>%
dplyr::select(Date, A01_Price)
#
EMAf_A01 <- ddply(df_A01, "A01", f)
#
library(ggplot2)
ggplot(df_A01, aes(x=Year, y=Value)) +
geom_line(mapping=aes(shape=Type), size=0.5) +
theme_bw() +
geom_line(data = madf, mapping=aes(x = Date, y = EMAf_A01, linetype=Type,
color = A01), size = 1) +
ylab(expression(paste("mean",mu, "g",C~L^{-1}, day^{-1}))) +
theme(legend.key = element_blank())
I also tried it with ChartSeries but i get a zoo error:
#
xts_stock01L <- as.xts(df_A01[, c(2)], order.by=df_A01[[1]])
#
chartSeries(xts_stock01L,
subset="2016-01::2020-04",
theme=chartTheme("white"))
addEMA(n=30,on=1,col = "blue")
addEMA(n=200,on=1,col = "red")
Maybe I need to approach this whole thing differently
You can add lines one by one and use TTR::EMA function.
Code:
library(TTR)
library(quantmod)
getSymbols("AAPL")
df = as.data.frame(AAPL)
library(ggplot2)
df$EMA_short = EMA(df$AAPL.Close,1000)
df$EMA_long = EMA(df$AAPL.Close,100)
df$time = rownames(df) %>% as.POSIXct()
df = na.omit(df)
ggplot(df) + geom_line(aes(y=EMA_short,x =time),col="green") + geom_line(aes(y=EMA_long,x=time),col="red") + geom_line(aes(y=AAPL.Close,x=time))
If you want to plot lines by one command you should melt df, there some nice posts how to do that.
I think the problem might be that are using multiple data frame. Let all the variables be in one file.
Related
I have been trying to plot a 3d plot of my data but I cannot figure out how to overcome some errors. Any help is highly appreciated.
>head(d1) #produced through the melt function as seen below
Date variable value
1 2007 Q2 0.890 1.1358560
2 2007 Q3 0.890 1.1560433
3 2007 Q4 0.890 0.3747925
4 2008 Q1 0.890 0.3866533
5 2008 Q2 0.890 0.3872620
6 2008 Q3 0.890 0.3844887
I have successfully managed to plot a heatmap using this:
d1<-melt(mydata,id.vars = "Date")
P1 <- ggplot(data=d1, aes(x=Date, y=variable, fill=value)) +
geom_tile() +
ggtitle("My heatmap") +scale_fill_gradientn(colors=colorRampPalette(c("lightgray","royalblue","seagreen","orange","red","brown"))(500),name="Variable") +
labs(x = "Quarter",y="Alpha") +
theme_bw()
ggplotly(P1)
*Don't know how to automatically pick scale for object of type yearqtr. Defaulting to continuous.*
However, I want to create a 3d plot.
open3d()
rgl.surface(x=d1$variable, y=d1$Date,
coords=c(1,3,2),z=d1$value,
color=colorzjet[ findInterval(d1$value, seq(min(d1$value), max(d1$value), length=100))] )
axes3d()
Error in rgl.surface(x = d1$variable, y = d1$Date, coords = c(1, 3, 2), :
'y' length != 'x' rows * 'z' cols
plot_ly(x=d1$Date,y=d1$variable,z=d1$value,type="surface",colors=colors)
Error: `z` must be a numeric matrix
I have tried to use as.matrix(apply(d1,2,as.numeric)), but this returns NAs to the date argument.
Could it be the nature of the Quarterly dates that messes up the graph? (because even the heat map doesn't show the dates as Quarterly. Any tips?
dput(d1) output here: dput(d1) output
The file you uploaded is a CSV file, not dput output. But you can read it and plot it like this:
d1csv <- read.csv("dput_output.csv")
year <- as.numeric(sub(" .*", "", d1csv$Date))
quarter <- as.numeric(sub(".*Q", "", d1csv$Date))
Date <- matrix(year + (quarter - 1)/4, 55)
variable <- matrix(d1csv$variable, 55)
value <- matrix(d1csv$value, 55)
persp3d(Date, variable, value, col = "red")
This gives the following plot:
I am trying to construct Renko Chart using the obtained from Yahoo finance and was wondering if there is any package to do so. I had a look at the most financial packages but was only able to find Candlestick charts.
For more information on Renko charts use the link given here
Really cool question! Apparently, there is really nothing of that sort available for R. There were some attempts to do similar things (e.g., waterfall charts) on various sites, but they all don't quite hit the spot. Soooo... I made a little weekend project out of it with data.table and ggplot.
rrenko
There are still bugs, instabilities, and visual things that I would love to optimize (and the code is full of commented out debug notes), but the main idea should be there. Open for feedback and points for improvement.
Caveats: There are still case where the data transformation screws up, especially if the size is very small or very large. This should be fixable in the near future. Also, the renko() function at the moment expects a dataframe with two columns: date (x-axis) and close (y-axis).
Installation
devtools::install_github("RomanAbashin/rrenko")
library(rrenko)
Code
renko(df, size = 5, style = "modern") +
scale_y_continuous(breaks = seq(0, 150, 10)) +
labs(x = "", y = "")
renko(df, size = 5, style = "classic") +
scale_y_continuous(breaks = seq(0, 150, 10)) +
labs(x = "", y = "")
Data
set.seed(1702)
df <- data.frame(date = seq.Date(as.Date("2014-05-02"), as.Date("2018-05-04"), by = "week"),
close = abs(100 + cumsum(sample(seq(-4.9, 4.9, 0.1), 210, replace = TRUE))))
> head(df)
date close
1: 2014-05-02 104.0
2: 2014-05-09 108.7
3: 2014-05-16 111.5
4: 2014-05-23 110.3
5: 2014-05-30 108.9
6: 2014-06-06 106.5
I'm R investment developer, I used some parts of Roman's code to optimize some lines of my Renko code. Roman's ggplot skills are awesome. The plot function was just possible because of Roman's code.
If someone is interesting:
https://github.com/Kinzel/k_rrenko
It will need the packages: xts, ggplot2 and data.table
"Ativo" need to be a xts, with one of columns named "close" to work.
EDIT:
After TeeKea request, how to use it is simple:
"Ativo" is a EURUSD xts 15-min of 2015-01-01 to 2015-06-01. If the "close" column is not found, it will be used the last one.
> head(Ativo)
Open High Low Close
2015-01-01 20:00:00 1.20965 1.21022 1.20959 1.21006
2015-01-01 20:15:00 1.21004 1.21004 1.20979 1.21003
2015-01-01 20:30:00 1.21033 1.21041 1.20982 1.21007
2015-01-01 20:45:00 1.21006 1.21007 1.20978 1.21002
2015-01-01 21:00:00 1.21000 1.21002 1.20983 1.21002
2015-01-02 00:00:00 1.21037 1.21063 1.21024 1.21037
How to use krenko_plot:
krenko_plot(Ativo, 0.01,withDates = F)
Link to image krenko_plot
Compared to plot.xts
plot.xts(Ativo, type='candles')
Link to image plot.xts
There are two main variables: size and threshold.
"size" is the size of the bricks. Is needed to run.
"threshold" is the threshold of new a brick. Default is 1.
The first brick is removed to ensure reliability.
Here's a quick and dirty solution, adapted from a python script here.
# Get some test data
library(rvest)
url <- read_html("https://coinmarketcap.com/currencies/bitcoin/historical-data/?start=20170602&end=20181126")
df <- url %>% html_table() %>% as.data.frame()
# Make sure to have your time sequence the right way up
data <- apply(df[nrow(df):1, 3:4], 1, mean)
# Build the renko function
renko <- function(data, delta){
pre <- data[1]
xpos <- NULL
ypos <- NULL
xneg <- NULL
yneg <- NULL
for(i in 1:length(data)){
increment <- data[i] - pre
incrementPerc <- increment / pre
pre <- data[i]
if(incrementPerc > delta){
xpos <- c(xpos, i)
ypos <- c(ypos, data[i])
}
if(incrementPerc < -delta){
xneg <- c(xneg, i)
yneg <- c(yneg, data[i])
}
}
signal <- list(xpos = xpos,
ypos = unname(ypos),
xneg = xneg,
yneg = unname(yneg))
return(signal)
}
# Apply the renko function and plot the outcome
signals <- renko(data = data, delta = 0.05)
plot(1:length(data), data, type = "l")
points(signals$xneg, signals$yneg, col = "red", pch = 19)
points(signals$xpos, signals$ypos, col = "yellowgreen", pch = 19)
NOTE: This is not a renko chart (thanks to #Roman). Buy and sell signals are displayed only. See reference mentioned above...
I have following data in csv format
0.828666667 0.100333333
0.725666667 0.153666667
0.364333333 0.036666667
0.475666667 0.051
0.522333333 0.052333333
0.457 0.041666667
0.644666667 0.093333333
0.404333333 0.039333333
0.497 0.042333333
0.155666667 0.031666667
0.160666667 0.081333333
0.145666667 0.026666667
0.138666667 0.033666667
0.094333333 0.03
0.141 0.023666667
0.148666667 0.052
0.195666667 0.039
0.196333333 0.039333333
......
I am using following code
library(ggplot2)
data<-read.csv("sample.csv",header=TRUE,sep=",")
ggplot(data,aes(x=A,y=B,))+ geom_line() + scale_x_continuous(breaks=seq(0,9,0.5)) +
scale_y_continuous(breaks=seq(0,9,0.5))
I want have same scale in x and y axis thats y i am using breaks..but this doesnt give what i want it does over plotting
But i want to image as follows see example second image
Thanks for help
I think you need to manipulate the data a little bit...
library(reshape2)
library(ggplot2)
dat <- YOUR CSV
names(dat) <- c('a', 'b')
# need an x for the plot
dat$Num <- as.numeric(row.names(dat))
meltDat <- melt(dat, id.vars = 'Num')
ggplot(meltDat,
aes(x = Num, y = value, group = variable, color = variable)) +
geom_line()
I'm not sure I understand exactly what you want to do. But here's my attempt to get from your data to something similar to your second plot.
data <- read.table(text="0.828666667 0.100333333
0.725666667 0.153666667
0.364333333 0.036666667
0.475666667 0.051
0.522333333 0.052333333
0.457 0.041666667
0.644666667 0.093333333
0.404333333 0.039333333
0.497 0.042333333
0.155666667 0.031666667
0.160666667 0.081333333
0.145666667 0.026666667
0.138666667 0.033666667
0.094333333 0.03
0.141 0.023666667
0.148666667 0.052
0.195666667 0.039
0.196333333 0.039333333")
names(data) <- c("A", "B")
# prepare data for plotting
require(reshape2)
data$id <- 1:nrow(data)
df <- melt(data, id.var="id")
# plot
library(ggplot2)
ggplot(df, aes(x=id, y=value, color=variable)) + geom_line()
If this did not answer your question, please try to be more specific.
Suppose I have following data for a student's score on a test.
set.seed(1)
df <- data.frame(question = 0:10,
resp = c(NA,sample(c("Correct","Incorrect"),10,replace=TRUE)),
score.after.resp=50)
for (i in 1:10) {
ifelse(df$resp[i+1] == "Correct",
df$score.after.resp[i+1] <- df$score.after.resp[i] + 5,
df$score.after.resp[i+1] <- df$score.after.resp[i] - 5)
}
df
.
question resp score.after.resp
1 0 <NA> 50
2 1 Correct 55
3 2 Correct 60
4 3 Incorrect 55
5 4 Incorrect 50
6 5 Correct 55
7 6 Incorrect 50
8 7 Incorrect 45
9 8 Incorrect 40
10 9 Incorrect 35
11 10 Correct 40
I want to get following graph:
library(ggplot2)
ggplot(df,aes(x = question, y = score.after.resp)) + geom_line() + geom_point()
My problem is: I want to color segments of this line according to student response. If correct (increasing) line segment will be green and if incorrect response (decreasing) line should be red.
I tried following code but did not work:
ggplot(df,aes(x = question, y = score.after.resp, color=factor(resp))) +
geom_line() + geom_point()
Any ideas?
I would probably approach this a little differently, and use geom_segment instead:
df1 <- as.data.frame(with(df,cbind(embed(score.after.resp,2),embed(question,2))))
colnames(df1) <- c('yend','y','xend','x')
df1$col <- ifelse(df1$y - df1$yend >= 0,'Decrease','Increase')
ggplot(df1) +
geom_segment(aes(x = x,y = y,xend = xend,yend = yend,colour = col)) +
geom_point(data = df,aes(x = question,y = score.after.resp))
A brief explanation:
I'm using embed to transform the x and y variables into starting and ending points for each line segment, and then simply adding a variable that indicates whether each segment went up or down. Then I used the previous data frame to add the original points themselves.
Alternatively, I suppose you could use geom_line something like this:
df$resp1 <- c(as.character(df$resp[-1]),NA)
ggplot(df,aes(x = question, y = score.after.resp, color=factor(resp1),group = 1)) +
geom_line() + geom_point(color = "black")
By default ggplot2 groups the data according to the aesthetics that are mapped to factors. You can override this default by setting group explicitly,
last_plot() + aes(group=NA)
I have a dataset with multiple response variables and three treatments. Treatment2 is nested within treatment1 and treatment3 is nested within treatment 2. I have shown only three response variables for the sake of simplicity. I would like to run this over 22 response variable of which 3 are shown in the demo table.
My objective:
To visualize how the response variable(s) change based on the treatment combination. I have created a script to perform this on one response variable. I am copy pasting this code to run through other columns which to me is an extremely crude way to do it. Which leads to my second objective.
Automate or modify the following script so that it can automatically loops through the column and produce desired table and graphs.
Demo data:
demo.table
Here is my script:
library(doBy)
length2 <- function (x, na.rm=FALSE) {
if (na.rm) sum(!is.na(x))
else length(x)
}
attach (demo)
cdataNA <- summaryBy(tyr ~ spp + wat + ins, data=demo, FUN=c(length2,mean,sd), na.rm=TRUE)
# Rename column change.length to just N
names(cdataNA)[names(cdataNA)=="tyr.length2"] <- "N"
# Calculate standard error of the mean
cdataNA$tyr.SE <- cdataNA$tyr.sd / sqrt(cdataNA$N)
cdataNA
# Now create a barplot using ggplot2
library(ggplot2)
a <- ggplot(cdataNA, aes(x = wat, y = tyr.mean, fill = ins))
b <- a + geom_bar(stat = "identity", position = "dodge") + facet_grid (~ spp)
# Now put errorbars.
c <- b + geom_errorbar(aes(ymin=tyr.mean-tyr.SE, ymax=tyr.mean+tyr.SE),
width=.2, # Width of the error bars
position=position_dodge(.9)) +
xlab ("wat") +
ylab ("tyr (PA/PA std)")
c
## esc
library(doBy)
length2 <- function (x, na.rm=FALSE) {
if (na.rm) sum(!is.na(x))
else length(x)
}
cdataNA1 <- summaryBy(esc ~ spp + wat + ins, data=demo, FUN=c(length2,mean,sd), na.rm=TRUE)
# Rename column change.length to just N
names(cdataNA1)[names(cdataNA1)=="esc.length2"] <- "N"
# Calculate standard error of the mean
cdataNA1$esc.SE <- cdataNA1$esc.sd / sqrt(cdataNA1$N)
cdataNA1
# Now create a barplot using ggplot2
library(ggplot2)
a1 <- ggplot(cdataNA1, aes(x = wat, y = esc.mean, fill = ins))
b1 <- a1 + geom_bar(stat = "identity", position = "dodge") + facet_grid (~ spp)
# Now put errorbars.
c1 <- b1 + geom_errorbar(aes(ymin=esc.mean-esc.SE, ymax=esc.mean+esc.SE),
width=.2, # Width of the error bars
position=position_dodge(.9)) +
xlab ("wat") +
ylab ("esc (PA/PA std)")
c1
Resulting table for tyr:
spp wat ins N tyr.mean tyr.sd tyr.SE
1 Bl High No 4 0.305325 0.034102041 0.017051020
2 Bl High Yes 5 0.186140 0.045165894 0.020198802
3 Bl Low No 5 0.310540 0.061810096 0.027642315
4 Bl Low Yes 5 0.202840 0.029034944 0.012984822
5 Man High No 4 0.122725 0.075867005 0.037933503
6 Man High Yes 5 0.081800 0.013463469 0.006021046
7 Man Low No 5 0.079880 0.009569587 0.004279650
8 Man Low Yes 4 0.083550 0.018431947 0.009215973
Resulting graph for esc:
demo figure for esc
So the whole thing works but still requires considerable manual labor which impedes the work-flow. it would be great to achieve automation.
Thanks in advance.
You can organize the data in just two lines:
melt.dta <- melt(dta, id.vars = c("spp", "wat", "ins"), measure.vars = "tyr")
cast(melt.dta, spp + wat + ins ~ .,
function (x) c("N" = sum(!is.na(x)),
"mean" = mean(x, na.rm = TRUE),
"sd" = sd(x, na.rm = TRUE),
"se" = sd(x, na.rm = TRUE)/sqrt(sum(!is.na(x)))))
It returns:
spp wat ins N mean sd se
1 Bl High No 4 0.3053 0.03410 0.01705
2 Bl High Yes 5 0.1861 0.04517 0.02020
3 Bl Low No 5 0.3105 0.06181 0.02764
4 Bl Low Yes 5 0.2028 0.02903 0.01298
5 Man High No 4 0.1227 0.07587 0.03793
6 Man High Yes 5 0.0818 0.01346 0.00602
7 Man Low No 5 0.0799 0.00957 0.00428
8 Man Low Yes 4 0.0835 0.01843 0.00922