I am trying to create a line plot for 2 stocks AAPL and FB. Instead of adding a separate legend, I would like to print the stock symbols along with the lines. How can I add geom_text to the following code? I appreciate any help you could provide.
library (ggplot2)
library(quantmod)
getSymbols('AAPL')
getSymbols('FB')
AAPL = data.frame(AAPL)
FB = data.frame(FB)
p1 = ggplot(AAPL)+geom_line(data=AAPL,aes(as.Date(rownames(AAPL)),AAPL.Adjusted,color="AAPL"))
p2 = p1+geom_line(data=FB,aes(as.Date(rownames(FB)),FB.Adjusted,color="FB"))
p2 + xlab("Year")+ylab("Price")+theme_bw()+theme(legend.position="none")
This is the sort of plot that is perfect for the directlabels package. And it is easier to plot if the data is available in one dataframe.
# Data
library(quantmod)
getSymbols('AAPL')
getSymbols('FB')
AAPL = data.frame(AAPL)
FB = data.frame(FB)
# rbind into one dataframe
AAPL$label = "AAPL"
FB$label = "FB"
names = gsub("^FB\\.(.*$)", "\\1", names(FB))
names(AAPL) = names
names(FB) = names
df = rbind(AAPL, FB)
# Packages
library(ggplot2)
library(directlabels)
# The plot - labels at the beginning and the ends of the lines.
ggplot(df, aes(as.Date(rownames(df)), Adjusted, group = label, colour = label)) +
geom_line() +
scale_colour_discrete(guide = 'none') +
geom_dl(aes(label = label), method = list(dl.combine("first.points", "last.points")))
A better plot: Increase the space between the end points of the lines and the labels. See here for other options.
ggplot(df, aes(as.Date(rownames(df)), Adjusted, group = label, colour = label)) +
geom_line() +
scale_colour_discrete(guide = 'none') +
scale_x_date(expand=c(0.1, 0)) +
geom_dl(aes(label = label), method = list(dl.trans(x = x + .2), "last.points")) +
geom_dl(aes(label = label), method = list(dl.trans(x = x - .2), "first.points"))
Question is possibly a duplicate of this one.
You simply have to add geom_text as u said:
Define the x, y positions, the label you want to appear (and the color):
library(quantmod)
getSymbols('AAPL')
getSymbols('FB')
AAPL = data.frame(AAPL)
FB = data.frame(FB)
p1 = ggplot(AAPL)+geom_line(data=AAPL,aes(as.Date(rownames(AAPL)),AAPL.Adjusted,color="AAPL"))
p2 = p1+geom_line(data=FB,aes(as.Date(rownames(FB)),FB.Adjusted,color="FB"))
p2 + xlab("Year") + ylab("Price")+theme_bw()+theme(legend.position="none") +
geom_text(aes(x = as.Date("2011-06-07"), y = 60, label = "AAPL", color = "AAPL")) +
geom_text(aes(x = as.Date("2014-10-01"), y = 45, label = "FB", color = "FB"))
EDIT
If you want to automatically find positions for x and y in geom_text, you will face new problems with overlapping labels if you increase the number of variables.
Here is a beginning of solution, you might adapt the method to define x and `y
AAPL$date = rownames(AAPL)
AAPL$var1 = "AAPL"
names(AAPL)[grep("AAPL", names(AAPL))] = gsub("AAPL.", "", names(AAPL)[grep("AAPL", names(AAPL))])
FB$date = rownames(FB)
FB$var1 = "FB"
names(FB)[grep("FB", names(FB))] = gsub("FB.", "", names(FB)[grep("FB", names(FB))])
# bind the 2 data frames
df = rbind(AAPL, FB)
# where do you want the legend to appear
legend = data.frame(matrix(ncol = 3, nrow = length(unique(df$var1))))
colnames(legend) = c("x_pos" , "y_pos" , "label")
legend$label = unique(df$var1)
legend$x_pos = as.POSIXct(legend$x_pos)
df$date = as.POSIXct(df$date)
for (i in legend$label)
{
legend$x_pos[legend$label == i] <- as.POSIXct(min(df$date[df$var1 == i]) +
as.numeric(difftime(max(df$date[df$var1 == i]), min(df$date[df$var1 == i]), units = "sec"))/2)
legend$y_pos[legend$label == i] <- df$Adjusted[df$date > legend$x_pos[legend$label == i] & df$var1 == i][1]
}
# Plot
ggplot(df, aes(x = as.POSIXct(date), y = Adjusted, color = var1)) +
geom_line() + xlab("Year") + ylab("Price") +
geom_text(data = legend, aes(x = x_pos, y = y_pos, label = label, color = label, hjust = -1, vjust = 1))
+ guides(color = F)
Related
I've got a plot that looks like the output of the following code using the iris data
require(tidyverse)
require(purrr)
require(forcats) # Useful for ordering facets found at [here][1]
# Make some long data and set a custom sorting order using some of t
tbl <- iris %>%
pivot_longer(., cols = 1:4, names_to = "Msr", values_to = "Vls") %>%
mutate(Msr = factor(Msr)) %>%
mutate(plot_fct = fct_cross(Species, Msr)) %>%
mutate(plot_fct = fct_reorder(plot_fct, Vls))
# A functioning factory for minor log breaks found [here][1] (very helpful)
minor_breaks_log <- function(base) {
# Prevents lazy evaluation
force(base)
# Wrap calculation in a function that the outer function returns
function(limits) {
ggplot2:::calc_logticks(
base = base,
minpow = floor(log(limits[1], base = base)),
maxpow = ceiling(log(limits[2], base = base))
)$value
}
}
# Plot the images
ggplot(data = tbl, aes(x =plot_fct, y = Vls, fill = Species)) +
geom_violin() +
coord_flip() + # swap coords
scale_y_log10(labels = function(x) sprintf("%g", x),
minor_breaks = minor_breaks_log(10)) + # format for labels # box fills
theme_bw(base_size = 12) +
annotation_logticks(base = 10, sides = "b") +
facet_wrap(~Species, nrow = 1, scales = "free")
I would now like to list the number of observations per violin on the right side of each facet just inside the maximum border, which I'm sure is possible but cannot seem to find an example that does this sort of labeling, with violins and facets.
ggplot(data = tbl, aes(y = plot_fct, fill = Species)) +
geom_violin(aes(x = Vls)) +
geom_text(aes(label = after_stat(count)), hjust = 1,
stat = "count", position = "fill") +
scale_x_log10(labels = function(x) sprintf("%g", x),
minor_breaks = minor_breaks_log(10)) + # format for labels # box fills
theme_bw(base_size = 12) +
annotation_logticks(base = 10, sides = "b") +
facet_wrap(~Species, nrow = 1, scales = "free")
I'm attempting to create a ggplot2 object in power bi that will render any number of horizontal lines depending on measures dropped in and out of the "Values" bin. I thought I'd do this with a for loop that adds an additional geom_hline to the object depending on the length of the dataframe. I also want each line to have a different color, and the value of the hline to render with the label in the legend.
The dataframe I am using has 3 static columns titled - Year, Escalation, and Type. Any additional columns beyond the first 3 would be considered data to be used for the horizontal lines.
This is what I have so far...
library(ggplot2)
library(RColorBrewer)
# create a unique color set
n <- 60
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
# create an dynamic x axis label depending on the number of years to be plotted
scale <- if(length(dataset$Year) < 30) {
scale_x_continuous(breaks = seq(min(dataset$Year), max(dataset$Year)))
} else if (length(dataset$Year) >= 30 & length(dataset$Year) <= 60) {
scale_x_continuous(breaks = seq(min(dataset$Year), max(dataset$Year), by = 2))
} else {
scale_x_continuous(breaks = seq(min(dataset$Year), max(dataset$Year), by = 5))
}
#ggplot object
plot <- ggplot(dataset, aes(x = Year, y = Escalation)) +
geom_point(aes(color = "#094780"), size = 3) +
geom_hline(aes(yintercept = mean(dataset$Escalation), color = col_vector[1]), linetype = "dashed") +
geom_hline(aes(yintercept = median(dataset$Escalation), color = col_vector[2]), linetype = "dashed") +
theme(axis.text.x = element_text(colour = "#942832")) +
theme(axis.text.y = element_text(colour = "#942832")) +
scale +
scale_y_continuous(breaks = round(seq(min(dataset$Escalation), max(dataset$Escalation), by = 0.02),2))
# add horizontal comparison lines
addline <- function(data){
c <- list(unlist(unique(dataset[3])), paste("Mean ", round(mean(dataset$Escalation), 3)), paste("Median", round(median(dataset$Escalation),3)))
t <- list("#094780", col_vector[1], col_vector[2])
for (i in 1:data){
line = geom_hline(aes(yintercept = dataset[1,3+i], color = col_vector[2+i]))
plot = plot + line
c[3+i] = paste(unlist(names(dataset)[3+i]), " Escalation :", round(dataset[1 ,3+i], 3))
t[3+i] = col_vector[2+i]
}
m = scale_color_manual(name = "", values = t, labels = c)
plot = plot + m
return(plot)
}
addline(NCOL(dataset)-3)
It's rendering, but its not giving me what I'm expecting when I add data for more than 1 horizontal line (its shifting the line with the data but not properly naming it or coloring it). For reference, if there were 2 horizontal lines and it were hardcoded, I would want the code to look like this (this renders correctly in power BI).
library(ggplot2)
library(RColorBrewer)
n <- 60
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
scale <- if(length(dataset$Year) < 30) {
scale_x_continuous(breaks = seq(min(dataset$Year), max(dataset$Year)))
} else if (length(dataset$Year) >= 30 & length(dataset$Year) <= 60) {
scale_x_continuous(breaks = seq(min(dataset$Year), max(dataset$Year), by = 2))
} else {
scale_x_continuous(breaks = seq(min(dataset$Year), max(dataset$Year), by = 5))
}
plot <- ggplot(dataset, aes(x = Year, y = Escalation)) +
geom_point(aes(color = "#094780"), size = 3) +
geom_hline(aes(yintercept = mean(dataset$Escalation), color = col_vector[1]), linetype = "dashed") +
geom_hline(aes(yintercept = median(dataset$Escalation), color = col_vector[2]), linetype = "dashed") +
theme(axis.text.x = element_text(colour = "#942832")) +
theme(axis.text.y = element_text(colour = "#942832")) +
scale +
scale_y_continuous(breaks = round(seq(min(dataset$Escalation), max(dataset$Escalation), by = 0.02),2)) +
geom_hline(aes(yintercept = dataset[1,4], color = col_vector[3]),) +
geom_hline(aes(yintercept = dataset[1,5], color = col_vector[4]),) +
scale_color_manual(
name = "",
values = list("#094780", col_vector[1], col_vector[2], col_vector[3], col_vector[4]),
labels = list(unlist(unique(dataset[3])),
paste("Mean ", round(mean(dataset$Escalation), 3)),
paste("Median", round(median(dataset$Escalation),3)),
paste(unlist(names(dataset)[4]), " Escalation :", round(dataset[1 ,4], 3)),
paste(unlist(names(dataset)[5]), " Escalation :", round(dataset[1 ,5], 3))
)
)
plot
I'm still a novice when it comes to coding, so I'm pretty sure I'm just not understanding something basic about how the loop works.
I know this is a bit to shift through, but I'm having a hard time really debugging because I'd have to export the data set from power BI into r studio. Any help is appreciated!
I have two very similar plots, which have two y-axis - a bar plot and a line plot:
code:
sec_plot <- ggplot(data, aes_string (x = year, group = 1)) +
geom_col(aes_string(y = frequency), fill = "orange", alpha = 0.5) +
geom_line(aes(y = severity))
However, there are no labels. I want to get a label for the barplot as well as a label for the line plot, something like:
How can I add the labels to the plot, if there is only pone single group? is there a way to specify this manually? Until know I have only found option where the labels can be added by specifying them in the aes
EXTENSION (added a posterior):
getSecPlot <- function(data, xvar, yvar, yvarsec, groupvar){
if ("agegroup" %in% xvar) xvar <- get("agegroup")
# data <- data[, startYear:= as.numeric(startYear)]
data <- data[!claims == 0][, ':=' (scaled = get(yvarsec) * max(get(yvar))/max(get(yvarsec)),
param = max(get(yvar))/max(get(yvarsec)))]
param <- data[1, param] # important, otherwise not found in ggplot
sec_plot <- ggplot(data, aes_string (x = xvar, group = groupvar)) +
geom_col(aes_string(y = yvar, fill = groupvar, alpha = 0.5), position = "dodge") +
geom_line(aes(y = scaled, color = gender)) +
scale_y_continuous(sec.axis = sec_axis(~./(param), name = paste0("average ", yvarsec),labels = function(x) format(x, big.mark = " ", scientific = FALSE))) +
labs(y = paste0("total ", yvar)) +
scale_alpha(guide = 'none') +
theme_pubclean() +
theme(legend.title=element_blank(), legend.background = element_rect(fill = "white"))
}
plot.ExposureYearly <- getSecPlot(freqSevDataAge, xvar = "agegroup", yvar = "exposure", yvarsec = "frequency", groupvar = "gender")
plot.ExposureYearly
How can the same be done on a plot where both the line plot as well as the bar plot are separated by gender?
Here is a possible solution. The method I used was to move the color and fill inside the aes and then use scale_*_identity to create and format the legends.
Also, I needed to add a scaling factor for severity axis since ggplot does not handle the secondary axis well.
data<-data.frame(year= 2000:2005, frequency=3:8, severity=as.integer(runif(6, 4000, 8000)))
library(ggplot2)
library(scales)
sec_plot <- ggplot(data, aes(x = year)) +
geom_col(aes(y = frequency, fill = "orange"), alpha = 0.6) +
geom_line(aes(y = severity/1000, color = "black")) +
scale_fill_identity(guide = "legend", label="Claim frequency (Number of paid claims per 100 Insured exposure)", name=NULL) +
scale_color_identity(guide = "legend", label="Claim Severity (Average insurance payment per claim)", name=NULL) +
theme(legend.position = "bottom") +
scale_y_continuous(sec.axis =sec_axis( ~ . *1, labels = label_dollar(scale=1000), name="Severity") ) + #formats the 2nd axis
guides(fill = guide_legend(order = 1), color = guide_legend(order = 2)) #control which scale plots first
sec_plot
As you can see on the image, R automatically assigns the values 0, 0.25... 1 for the size of the point. I was wondering if I could replace the 0, 0.25... 1 and make these text values instead while keeping the actual numerical values from the data.
library(ggplot2)
library(scales)
data(SLC4A1, package="ggplot2")
SLC4A1 <- read.csv(file.choose(), header = TRUE)
# bubble chart showing position of polymorphisms on gene, the frequency of each of these
# polymorphisms, where they are prominent on earth, and p-value
SLC4A1ggplot <- ggplot(SLC4A1, aes(Position, log10(Frequency)))+
geom_jitter(aes(col=Geographical.Location, size =(p.value)))+
labs(subtitle="Frequency of Various Polymorphisms", title="SLC4A1 Gene") +
labs(color = "Geographical Location") +
labs(size = "p-value") + labs(x = "Position of Polymorphism on SLC4A1 Gene") +
scale_size_continuous(range=c(1,4.5), trans = "reverse") +
guides(size = guide_legend(reverse = TRUE))
library(tidyver)
df <- data.frame(x = 1:5, y = 1:5,z = 1:5)
ggplot(df,aes(x = x, y = y, size = z)) +
geom_point()
ggplot(df,aes(x = x, y = y, size = z)) +
geom_point() +
scale_size_continuous(range = 1:2) # control range of circle size
See more here:
https://ggplot2.tidyverse.org/reference/scale_size.html
I have the following data.frame:
hist.df <- data.frame(y = c(rnorm(30,1,1), rnorm(15), rnorm(30,0,1)),
gt = c(rep("ht", 30), rep("hm", 15), rep("hm", 30)),
group = c(rep("sc", 30), rep("am", 15), rep("sc",30)))
from which I produce the following faceted histogram ggplot:
main.plot <- ggplot(data = hist.df, aes(x = y)) +
geom_histogram(alpha=0.5, position="identity", binwidth = 2.5,
aes(fill = factor(gt))) +
facet_wrap(~group) +
scale_fill_manual(values = c("darkgreen","darkmagenta"),
labels = c("ht","hm"),
name = "gt",
limits=c(0, 30))
In addition, I have this data.frame:
text.df = data.frame(ci.lo = c(0.001,0.005,-10.1),
ci.hi = c(1.85,2.25,9.1),
group = c("am","sc","sc"),
factor = c("nu","nu","alpha"))
Which defines the text annotations I want to add to the faceted histograms, so that the final figure will be:
So text.df$ci.lo and text.df$ci.hi are confidence intervals on the corresponding text.df$factor and they correspond to the faceted histograms through text.df$group
Note that not every histogram has all text.df$factor's.
Ideally, the ylim's of the faceted histograms will leave enough space for the text to be added above the histograms so that they appear only on the background.
Any idea how to achieve this?
Wrapping my comment into an answer:
text.df$ci <- paste0(text.df$factor, ' = [', text.df$ci.lo, ', ', text.df$ci.hi, ']')
new_labels <- aggregate(text.df$ci, by = list(text.df$group),
FUN = function(x) paste(x, collapse = '\n'))$x
hist.df$group <- factor(hist.df$group)
hist.df$group <- factor(hist.df$group,
labels = paste0(levels(hist.df$group), '\n', new_labels))
main.plot <- ggplot(data = hist.df, aes(x = y)) +
geom_histogram(alpha=0.5, position="identity", binwidth = 2.5,
aes(fill = factor(gt))) +
facet_wrap(~group) +
scale_fill_manual(values = c("darkgreen","darkmagenta"),
labels = c("ht","hm"),
name = "gt")
main.plot + theme(strip.text = element_text(size=20))
If you wish to stick to the original idea, this question has an answer that will help.