I have made a graph but I don't know how to view the exact values of the bars on the graph. Here is my code in case it is needed. I also have a picture of my graph.
Step 1: Load the tidyverse and tidyquant:
install.packages("tidyverse")
install.packages("tidyquant")
library("tidyverse")
library("tidyquant")
#STEP 2: Getting stocks data:
stocks <- c("TSLA", "UPST", "PLTR", "SPOT", "SHOP", "SPY", "BND")
stocks_df <- tq_get(stocks, from = '2017-01-01')
#Step 3: Group data:
port <- tq_get(c("TSLA", "UPST", "PLTR", "SPOT", "SHOP", "SPY", "BND"),
from = '2017-01-01')%>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "daily",
col_rename = "ret")
#Step 4: Computing portfolio returns:
myport <- port %>% tq_portfolio(symbol,ret, c(0.2, 0.2, 0.2, 0.2, 0.2, 0, 0))
benchmark <- port %>% tq_portfolio(symbol, ret, c(0, 0, 0, 0, 0, 0.6, 0.4))
#Step 5: Computing portfolio measure:
mVaR <- myport %>% tq_performance(portfolio.returns,
performance_fun = VaR,
p = 0.95,
method = "historical",
portfolio_method = "single") %>%
add_column(symbol = "MyPort", .before = 1)
bVaR <- benchmark %>% tq_performance(portfolio.returns,
performance_fun = VaR,
p = 0.95,
method = "gaussian",
portfolio_method = "single") %>%
add_column(symbol = "Benchmark", .before = 1)
#Step 6: Computing portfolio measure: Expected Shortfall (ES):
mES <- myport %>% tq_performance(portfolio.returns,
performance_fun = ES,
p = 0.95,
method = "historical",
portfolio_method = "single") %>%
add_column(symbol = "MyPort", .before = 1)
bES <- benchmark %>% tq_performance(portfolio.returns,
performance_fun = ES,
p = 0.95,
method = "gaussian",
portfolio_method = "single") %>%
add_column(symbol = "Benchmark", .before = 1)
#Step 7: Combining the results into a single table using rbind (row bind):
bothVaR <- rbind(mVaR, bVaR)
bothES <- rbind(mES, bES)
results <- inner_join(bothVaR, bothES)
#Step 8: Re-shaping the table into a data frame suitable for plotting:
results <- results %>%
pivot_longer(!symbol, names_to = "measure", values_to = "value")
#Step 9: Plot the results:
results %>% ggplot(aes(x = measure, y = abs(value), fill = symbol)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Value at Risk Approach to Measure a Diversified Portfolio",
x = "Risk Measure", y = " ", fill = " ") + theme_minimal() +
theme(plot.title = element_text(hjust = 0.5), legend.position = "top")
I tried looking up on Google but the examples they give is for a specific set of data with different names and values. I don't know to implement it into my code for my specific script and graph.
If you want to plot the values on the plot, this could work:
library(ggrepel)
results %>% ggplot(aes(x = measure, y = abs(value), fill = symbol)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Value at Risk Approach to Measure a Diversified Portfolio",
x = "Risk Measure", y = " ", fill = " ") + theme_minimal() +
theme(plot.title = element_text(hjust = 0.5), legend.position = "top") +
geom_text_repel(aes(label = round(abs(value), digits = 3)),
position = position_dodge(width = 1), direction = "y", size = 2.5)
One way to visualize the values when you hover over them is to use ggplotly as follow:
install.packages("plotly")
library(plotly)
Once you load the library, you store then your plot in a variable that I name
p
and do following:
plotly(p)
Related
I want to create a visualization thatchampion_radar_plot <- funct
ion(df, champion_name) {
slam_win_cnt_chp = df %>% filter(WINNER == champion_name)
chp_num_wins <- slam_win_cnt_chp$NUM_WINS
l <- length(chp_num_wins)
max_v <- 10 # choosing the same maximum value for all champions
chp_df <- data.frame(rbind(max = rep(max_v, l), min = rep(0, l), chp_num_wins))
colnames(chp_df) <- slam_win_cnt_chp$TOURNAMENT
seg_n <- max_v
radarchart(chp_df, axistype = 1, caxislabels = seq(0, max_v, 1), seg = seg_n,
centerzero = TRUE, pcol = rgb(0.2, 0.5, 0.5, 0.9) , pfcol = rgb(0.2, 0.5, 0.5, 0.3),
plwd = 1, cglcol = "grey", cglty = 1, axislabcol = "blue",
vlcex = 0.8, calcex = 0.7, title = champion_name)
}
champion_radar_plot(slam_win_cnt, "Roger Federer")sv
maybe this is of help for you.
library("tidyverse")
slam_win <- read.csv("grand_slam_data.csv")
slam_win$tournament <- factor(slam_win$tournament)
slam_win %>%
filter(!tournament %in% c("Australian Open (Dec)", "Australian Open (Jan)")) %>%
group_by(tournament, winner) %>%
summarise(wins = n()) %>%
arrange(desc(tournament, wins)) %>%
slice_max(order_by = wins, n=3)
Turning tournament in a factor will you allow to group by it in either dplyr or Ggplot2.
The slice function will give you the n (you define it in the arguments) number of highest values of each group.
The next step is to plot
plot_slam <- slam_win %>%
filter(!tournament %in% c("Australian Open (Dec)", "Australian Open (Jan)")) %>%
group_by(tournament, winner) %>%
summarise(wins = n()) %>%
arrange(desc(tournament, wins)) %>%
slice_max(order_by = wins, n=3)
ggplot(plot_slam, aes(wins, reorder(tournament, wins), fill = reorder(winner, wins))) +
geom_col(position = position_dodge()) +
geom_text(aes(label = winner), position = position_dodge(0.9), hjust = 1.1)
You can add title and axis names by adding labs(title = "Grand Slam Tournaments", x = "Number of wins", y = "Tournament") and remove the legend via theme(legend.position = 'none'
This is the resulting:
I am working with the Growthrates package to generate parameter estimates of growth rate curves for my data. I have made the regressions and looked at the produced plots and I am happy with the data, but I would like to reproduce the following plots in ggplot2.
Figure 1: Multiplot of a regression for each group:treatment combo
I would like a multiplot of the regression lines for each group:Treatment combination, but with all the regressions I performed on it in ((i.e. logistic, gompertz, gompertz2, etc..). So far I have:
library(growthrates)
####Using logistic regression to fit the data across mutliple groups
p <- c(y0 = 1, mumax = 0.5, K = 200)
lower <- c(y0 = 0, mumax = 0, K = 20)
upper <- c(y0 = 100, mumax = 5, K = 400)
many_logistics <- all_growthmodels(y_data ~
grow_logistic(total_time_days, parms) | sample + treatment,
data = Alldata,
p = p,
lower = lower,
upper = upper,
log = "y")
pp <- coef(many_logistics)
par(mfrow = c(5, 3))
par(mar = c(2.5, 4, 2, 1))
plot(many_logistics)
many_logistics_results <- results(many_logistics)
xyplot(mumax ~ treatment | sample, data = many_logistics_results, layout = c(3, 1))
xyplot(r2 ~ treatment | sample, data = many_logistics_results, layout = c(3, 1))
xyplot(K ~ treatment | sample, data = many_logistics_results, layout = c(3, 1))
curve_logistics <- predict(many_logistics) #Prediction for given data (data for curve)
est_logistics <- predict(many_logistics, newdata=data.frame(time=seq(0, 1, 0.1))) #Extrapolation/Interpolation from curve
####Using Gompertz regression to fit the data across mutliple groups
p <- c(y0 = 1, mumax = 0.5, K = 200)
lower <- c(y0 = 0, mumax = 0, K = 20)
upper <- c(y0 = 100, mumax = 5, K = 400)
many_gompertz <- all_growthmodels(y_datay_data ~
grow_gompertz(total_time_days, parms) | sample + treatment,
data = Alldata,
p = p,
lower = lower,
upper = upper)
pp <- coef(many_gompertz)
par(mfrow = c(5, 3))
par(mar = c(2.5, 4, 2, 1))
plot(many_gompertz)
many_gompertz_results <- results(many_gompertz)
xyplot(mumax ~ treatment | sample, data = many_gompertz_results, layout = c(3, 1))
xyplot(r2 ~ treatment | sample, data = many_gompertz_results, layout = c(3, 1))
xyplot(K ~ treatment | sample, data = many_gompertz_results, layout = c(3, 1))
curve_gompertz <- predict(many_gompertz) #Prediction for given data (data for curve)
est_gompertz <- predict(many_gompertz, newdata=data.frame(time=seq(0, 1, 0.1))) #Extrapolation/Interpolation from curve
#Prepare the data frames
curve_logistics2 <- curve_logistics %>%
map_df(as_tibble, .id = "src") %>%
separate(src, c("sample", "treatment"), ":") %>%
mutate(regression = "logistic")
curve_gompertz2 <- curve_gompertz %>%
map_df(as_tibble, .id = "src") %>%
separate(src, c("sample", "treatment"), ":") %>%
mutate(regression = "gompertz")
alldata2<- Alldata %>%
select("sample", "treatment","total_time_days", "y_data") %>%
rename(time = "total_time_days") %>%
rename(y = "y_data") %>%
mutate(regression = "none")
comp_reg <- bind_rows(curve_logistics2, curve_gompertz2, alldata2)
#define the function to automatically generate plots#define the function to automatically generate plots
REGRESSION_LINE_PLOT <-function(x) {ggplot(data = x, aes(x=time, y=y, colour = regression, linetype = regression)) +
geom_point(size = 2.5, data = subset(x, regression %in% c("none"))) +
stat_smooth(data = subset(x, regression %in% c("gompertz", "logistic"))) +
theme_bw() +
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
panel.spacing = unit(0,"cm"),
axis.line=element_line(colour="black"),
# axis.title.x = element_text(size=14, colour = "black"),
axis.title.x = element_blank(),
# axis.title.y = element_text(size=14, colour = "black"),
axis.title.y = element_blank(),
# axis.text.y = element_text(size=14, colour = "black"),
# axis.text.x = element_text(size=14, colour = "black"),
strip.background = element_blank(),
strip.text = element_text(size = 12, colour="black", face = "bold"),
legend.text= element_text(size = 12, colour = "black"),
legend.title=element_blank(),
text = element_text(size=12, family="Arial")) +
# plot.margin=unit(c(0.1,0.1,0.1,0.1),"cm")) +
#scale_colour_manual(values = cbbPalette) + ### here I tell R to use my custom colour palette
#scale_x_continuous(limits = c(-1,14)) + # set time range from -1 to 70 since we started sampling on day -1
#scale_y_continuous(limits = c(-1,350), breaks = seq(0, 360, 90)) + # For comparison purposes, i want all my panels to have the same y axis scale
ylab("") +
xlab("")
}
comp_reg_nested<- comp_reg %>%
group_by(sample, treatment) %>%
nest() %>%
mutate(plots=map(.x=data, ~REGRESSION_LINE_PLOT(.x)))
fo_ad_line <- comp_reg_nested[[1,"plots"]]
However, I do not think the regression lines are properly represented in ggplot22. Is there a better way to do this?
I created a data example more or less similar to your data structure from the builtin data of the package and simplified the code a little bit, omitting the default plot functions. I very much enjoyed your data frame construction method with map_df, thank you. Then I added a simple ggplot, that can of course be extended and adapted to your needs.
library(growthrates)
library(dplyr)
library(purrr)
library(tidyr)
library(ggplot2)
## use subset of built-in example data of the package
## and adapt it to the existing part of the script
data(bactgrowth)
Alldata <- bactgrowth[(bactgrowth$conc < 1) & bactgrowth$replicate == 1, ]
names(Alldata) <- c("sample", "replicate", "treatment", "total_time_days", "y_data")
Alldata$y_data <- Alldata$y_data * 1000
Alldata$treatment <- as.character(Alldata$treatment)
####Using logistic regression to fit the data across mutliple groups
p <- c(y0 = 1, mumax = 0.5, K = 200)
lower <- c(y0 = 0, mumax = 0, K = 20)
upper <- c(y0 = 100, mumax = 5, K = 400)
many_logistics <- all_growthmodels(y_data ~
grow_logistic(total_time_days, parms) | sample + treatment,
data = Alldata,
p = p,
lower = lower,
upper = upper)
many_logistics_results <- results(many_logistics)
curve_logistics <- predict(many_logistics)
####Using Gompertz regression to fit the data across mutliple groups
many_gompertz <- all_growthmodels(y_data ~
grow_gompertz(total_time_days, parms) | sample + treatment,
data = Alldata,
p = p,
lower = lower,
upper = upper)
many_gompertz_results <- results(many_gompertz)
curve_gompertz <- predict(many_gompertz)
#Prepare the data frames
curve_logistics2 <- curve_logistics %>%
map_df(as_tibble, .id = "src") %>%
separate(src, c("sample", "treatment"), ":") %>%
mutate(regression = "logistic")
curve_gompertz2 <- curve_gompertz %>%
map_df(as_tibble, .id = "src") %>%
separate(src, c("sample", "treatment"), ":") %>%
mutate(regression = "gompertz")
alldata2<- Alldata %>%
rename(time = "total_time_days", y = "y_data")
## combine the two curves to a joint data frame
comp_reg <- bind_rows(curve_logistics2, curve_gompertz2)
## plot it
ggplot(comp_reg, aes(time, y)) +
geom_point(data = alldata2) +
geom_line(aes(color = regression)) +
facet_grid(treatment ~ sample)
I am trying to do some exploratory data analysis using tidyverse. I have a large and complex dataset, but the important parts boil down to something similar to this:
my_df <- data.frame(Expt = rep(c("Expt1", "Expt2", "Expt3", "Expt4"), each = 96),
ExpType = rep(c("A", "B"), each = 192),
Treatment = c(rep("T1", 192), rep("T2", 144), rep("T1", 48)),
Subject = c(rep(c("S01", "S02", "S03", "S04", "S05", "S06", "S07", "S08"), 24), rep("S01", 96), rep("S06", 96)),
xvar = as.factor(rep(rep(c(10, 5, 2.5, 1.25, 0.6, 0.3, 0.16, 0.08, 0.04, 0.02, 0, "NA"), each = 8), 4)),
yvar = runif(384))
(Expt is a unique but uninformative identifier for each individual experiment. Each Expt always has only one ExpType, but may include one or multiple levels of Treatment and Subject.)
I'm grouping my data by ExpType, Treatment, Subject, and Expt, then making graphs. Thus, I'm making a ton of graphs, and it would make my life much easier if the graphs had informative titles.
I can group the data and make graphs of everything, like so:
my_df2 <- my_df %>%
group_by(ExpType, Treatment, Expt) %>%
nest() %>%
mutate(plots1 = map(
.x = data,
~ggplot(data = .x, aes(x=as.factor(xvar), y = yvar)) + #
theme_classic() + theme(legend.key.width = unit(2, "lines"), legend.justification = c(1, 1), legend.position = c(1, 1)) +
geom_smooth(method = "loess", se = FALSE, aes(group=Subject, color=Subject, linetype = Subject))+
geom_point(aes(fill = Subject), size = 2.5)
))
walk(.x = my_df2$plots1, ~print(.x))
What I can't figure out how to do is add a title to each plot to tell me what it is. I've tried making a unique identifier that includes all the relevant information:
my_df3 <- my_df %>%
mutate(FullID = paste0(my_df$ExpType, "_", my_df$Treatment, "_", my_df$Expt)) %>%
group_by(ExpType, Treatment, Expt) %>%
nest() %>%
arrange(ExpType, Treatment)
And I can get the FullIDs back out again:
# Either of these will successfully extract a list of FullIDs
map(my_df3$data, "FullID")
my_df3$data %>%
map("FullID")
What I can't figure out how to do is to get down that extra level of nesting in the map(~ggplot call to use FullID as a plot title, using something like:
my_df3 <- my_df3 %>%
mutate(plots2 = map2(
.x = data,
.y = map_chr(data$FullID),
~ggplot(.x, aes(x=xvar, y = yvar)) + #
theme_classic() + theme(legend.key.width = unit(2, "lines"), legend.justification = c(1, 1), legend.position = c(1, 1)) +
geom_smooth(method = "loess", se = FALSE, aes(group=Subject, color=Subject, linetype = Subject))+
geom_point(aes(fill=Subject, shape = Subject), size = 2.5) +
labs(title = unique(.y))
))
I know there must be a way to do it, and I just don't get the syntax. Any suggestions?
The FullID can also be created with unite (Note that we don't need .$ inside the dplyr functions). After the nest/arrange, in the OP's code, map2 was used with one of the input arguments as map_chr(data$FullID). For map to works, it needs a function (.f) to be applied, which is not present. Also, as we are extracting the info from one of the columns in the list column 'data'. we don't need a map2, but a single map and later can extract the column info in labs
my_df2 <- my_df %>%
unite(FullID, ExpType, Treatment, Expt, sep="_", remove = FALSE) %>%
group_by(ExpType, Treatment, Expt) %>%
nest %>%
arrange(ExpType, Treatment) %>%
mutate(plots = map(data, ~
ggplot(.x, aes(x=xvar, y = yvar)) +
theme_classic() +
theme(legend.key.width = unit(2, "lines"),
legend.justification = c(1, 1), legend.position = c(1, 1)) +
geom_smooth(method = "loess", se = FALSE,
aes(group=Subject, color=Subject, linetype = Subject))+
geom_point(aes(fill=Subject, shape = Subject), size = 2.5) +
labs(title = first(.x$FullID))))
-checking
my_df2$plots[[1]]
I am working with a dataset called HappyDB for a class presentation and analyzing demographic differences in word frequency. I'm using tidytext for most of the analyses, and using their online guide to create most of my visuals. However, I'm running into a problem with the code to create the frequency plot of words with labels. My dataset is structured differently from theirs, and I thought I was accounting for it but I evidently was not. This is their sample code to generate the graph (comparing Jane Austen with the Bronte sisters and H.G. Wells)
library(tidyr)
frequency <- bind_rows(mutate(tidy_bronte, author = "Brontë Sisters"),
mutate(tidy_hgwells, author = "H.G. Wells"),
mutate(tidy_books, author = "Jane Austen")) %>%
mutate(word = str_extract(word, "[a-z']+")) %>%
count(author, word) %>%
group_by(author) %>%
mutate(proportion = n / sum(n)) %>%
select(-n) %>%
spread(author, proportion) %>%
gather(author, proportion, `Brontë Sisters`:`H.G. Wells`)
library(scales)
# expect a warning about rows with missing values being removed
ggplot(frequency, aes(x = proportion, y = `Jane Austen`, color = abs(`Jane Austen` - proportion))) +
geom_abline(color = "gray40", lty = 2) +
geom_jitter(alpha = 0.1, size = 2.5, width = 0.3, height = 0.3) +
geom_text(aes(label = word), check_overlap = TRUE, vjust = 1.5) +
scale_x_log10(labels = percent_format()) +
scale_y_log10(labels = percent_format()) +
scale_color_gradient(limits = c(0, 0.001), low = "darkslategray4", high = "gray75") +
facet_wrap(~author, ncol = 2) +
theme(legend.position="none") +
labs(y = "Jane Austen", x = NULL)
And that code generates this plot:
I'm hoping to emulate this with demographics in my dataset, but keep getting errors. Here is my code, which uses a dataset that I have already tidied:
library(dplyr)
library(tidyr)
library(ggplot2)
library(tidytext)
library(stringr)
windowsFonts(Franklin=windowsFont("Franklin Gothic Demi"))
marriedmen <- tidy_hm[which(tidy_hm$marital =="married" &
tidy_hm$gender == "m"),]
marriedwomen <- tidy_hm[which(tidy_hm$marital =="married" &
tidy_hm$gender == "f"),]
singlemen <- tidy_hm[which(tidy_hm$marital =="single" &
tidy_hm$gender == "m"),]
frequency <- bind_rows(mutate(marriedmen, status = "Married men"),
mutate(marriedwomen, status = "Married women"),
mutate(singlemen, status = "Single men")) %>%
count(status, word) %>%
group_by(status) %>%
mutate(proportion = n / sum(n)) %>%
select(-n) %>%
spread(status, proportion) %>%
gather(status, proportion, `Married women`:`Single men`)
library(scales)
# expect a warning about rows with missing values being removed
ggplot(frequency, aes(x = proportion, y = 'Married men', color = abs(`Married men` - proportion)) +
geom_abline(color = "gray40", lty = 2) +
geom_jitter(alpha = 0.1, size = 2.5, width = 0.3, height = 0.3) +
geom_text(aes(label = word), check_overlap = TRUE, vjust = 1.5) +
scale_x_log10(labels = percent_format()) +
scale_y_log10(labels = percent_format()) +
scale_color_gradient(limits = c(0, 0.001), low = "darkslategray4", high = "gray75") +
facet_wrap(~status, ncol = 2) +
theme(legend.position="none") +
labs(y = NULL, x = NULL)
But I keep getting this error:
Error in log(x, base) : non-numeric argument to mathematical function
I tried removing the scale rows, but that caused a bunch of data to get eliminated and the plot didn't look anything like it was supposed to, and had no line, labels, or colors. I'm pretty new to r and coding in general so any help is appreciated.
I have a plot with labels on the y axis for the groups within the area plot. I added an overlay and want to name these.
Reproducible data at the bottom. For context I'm showing website session data and want to overlay when TV Campaigns are running.
Here's my ggplot and what it looks like. Below that is the commands to generate random data that I am using.
timeline <- ggplot(dataset, aes(x = Month, y = Sessions,fill = Channel, group = Channel)) +
geom_area(alpha = 0.2) +
stat_summary(aes(group = 2), fun.y = sum, geom = 'line', size = 2, alpha = 0.5) +
theme(axis.text.x=element_text(angle=90, hjust=1),
axis.title.x = element_blank()) +
geom_rect(data = tv_overlay, inherit.aes = FALSE,
aes(xmin = start, xmax = end,
ymin = -Inf, ymax = Inf),
fill = "black", alpha = 0.1)
This produces the following plot. Note the rectangle overlays which are meant to denote a TV campaign. How can I add a label to say "TV Campaign" to these:
Reproducible data which will allow the above commands for timeline <- to run
# dimensions
channels <- c("Facebook", "Youtube", "SEM", "Organic", "Direct", "Email")
last_month <- Sys.Date() %m+% months(-1) %>% floor_date("month")
mts <- seq(from = last_month %m+% months(-23), to = last_month, by = "1 month")
yr_month <- format(mts, "%b-%Y")
dimvars <- expand.grid(Month = yr_month, Channel = channels)
# metrics
rws <- nrow(dimvars)
set.seed(42)
# generates variablility in the random data
randwalk <- function(initial_val, ...){
initial_val + cumsum(rnorm(...))
}
Sessions <- ceiling(randwalk(3000, n = rws, mean = 8, sd = 1500)) %>% abs()
Transactions <- ceiling(randwalk(200, n = rws, mean = 0, sd = 75)) %>% abs()
Revenue <- ceiling(randwalk(10000, n = rws, mean = 0, sd = 3500)) %>% abs()
# make primary df
dataset <- cbind(dimvars, Sessions, Transactions, Revenue)
# make TV and Mass df for overlays
tv_begin <- sample(mts, 4)
tv_end <- tv_begin %m+% months(1)
tv_overlay <- data.frame(start = format(tv_begin, "%b-%Y"), end = format(tv_end, "%b-%Y"))
Map alpha to a character values to get an extra legend entry:
ggplot(dataset, aes(x = Month, y = Sessions,fill = Channel, group = Channel)) +
geom_area(alpha = 0.2) +
stat_summary(aes(group = 2), fun.y = sum, geom = 'line', size = 2, alpha = 0.5) +
geom_rect(aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf, alpha = "TV Campaign"),
tv_overlay, inherit.aes = FALSE, fill = "black") +
scale_alpha_manual(name = '', values = c("TV Campaign" = 0.1)) +
theme(axis.text.x=element_text(angle=90, hjust=1),
axis.title.x = element_blank())