Add a label or geom_text to scatter plot - R/ggplot2 - r

How can I add a label/geon_text to every point that will include its dataframe index and the xvar and yvar values (for example the label for the first one will be (“Point 1: 500,570”)?
Code:
library(ggplot2))
xvar <- c(500,450,490,560,618, 660,700,650, 590,550)
yvar <- c(570,600,650,670,660,650,630,580,570,550)
dat <- data.frame(xvar,yvar)
ggplot(dat, aes(x=xvar, y=yvar)) + geom_point(shape=1)

Create a variable label and plot it to geom_text():
library(ggplot2)
> xvar <- c(500,450,490,560,618, 660,700,650, 590,550)
> yvar <- c(570,600,650,670,660,650,630,580,570,550)
> dat <- data.frame(xvar,
yvar)
> dat$point <- sprintf("Point %s",
seq(from = 1,
to = length(xvar)))
> dat$labvar <- sprintf("%s: %s, %s",
dat$point,
dat$xvar,
dat$yvar)
> dat
xvar yvar point labvar
1 500 570 Point 1 Point 1: 500, 570
2 450 600 Point 2 Point 2: 450, 600
3 490 650 Point 3 Point 3: 490, 650
4 560 670 Point 4 Point 4: 560, 670
5 618 660 Point 5 Point 5: 618, 660
6 660 650 Point 6 Point 6: 660, 650
7 700 630 Point 7 Point 7: 700, 630
8 650 580 Point 8 Point 8: 650, 580
9 590 570 Point 9 Point 9: 590, 570
10 550 550 Point 10 Point 10: 550, 550
> ggplot(dat, aes(x=xvar,
y=yvar)) +
geom_text(aes(label = labvar),
hjust = -0.05,
vjust = 0.05) +
geom_point(shape = 1)

library(ggplot2)
library(ggrepel)
library(dplyr)
xvar <- c(500,450,490,560,618, 660,700,650, 590,550)
yvar <- c(570,600,650,670,660,650,630,580,570,550)
dat <- data.frame(xvar,yvar, xindex = seq_along(xvar)) %>%
dplyr::mutate(label = paste0("x: ", xvar, ", y:", yvar, ", index: ", xindex))
ggplot(dat, aes(x=xvar, y=yvar)) + geom_point(shape=1) +
geom_text_repel(aes(label = label))

Related

custom color for provinces in specific country using raster package in R

I am trying to label(green, red, blue) each province in a specific country as follows:
library(raster)
library(rgeos)
library(ggplot2)
library(dplyr)
iran <- getData("GADM", country = "Iran", level = 1)
map <- fortify(iran)
map$id <- as.integer(map$id)
dat <- data.frame(id = 1:(length(iran#data$NAME_1)),
state = iran#data$NAME_1,
pr = c(530, -42, 1673, 75, 206, 544, 1490, 118, 75,
40, 105, 191, 111, 810, 609, 425, 418, 550, 40, 425, -54, -50,
16, 18, 133,425, -30, 241,63, 191,100))
dat <- dat %>% mutate(color_province = case_when(pr <= 50 ~ 'green',
pr > 150 ~ 'red',
TRUE ~ 'yellow'))
I want to set custom color to each province (using 'dat' data frame) as follows:
> dat
id state pr color_province
1 1 Alborz 530 red
2 2 Ardebil -42 green
3 3 Bushehr 1673 red
4 4 Chahar Mahall and Bakhtiari 75 yellow
5 5 East Azarbaijan 206 red
6 6 Esfahan 544 red
7 7 Fars 1490 red
8 8 Gilan 118 yellow
9 9 Golestan 75 yellow
10 10 Hamadan 40 green
11 11 Hormozgan 105 yellow
12 12 Ilam 191 red
13 13 Kerman 111 yellow
14 14 Kermanshah 810 red
15 15 Khuzestan 609 red
16 16 Kohgiluyeh and Buyer Ahmad 425 red
17 17 Kordestan 418 red
18 18 Lorestan 550 red
19 19 Markazi 40 green
20 20 Mazandaran 425 red
21 21 North Khorasan -54 green
22 22 Qazvin -50 green
23 23 Qom 16 green
24 24 Razavi Khorasan 18 green
25 25 Semnan 133 yellow
26 26 Sistan and Baluchestan 425 red
27 27 South Khorasan -30 green
28 28 Tehran 241 red
29 29 West Azarbaijan 63 yellow
30 30 Yazd 191 red
31 31 Zanjan 100 yellow
For example 'Ardebil' province is green and 'Zanjan' province is yellow. I used 'Fill' argument to set custom color and here is my try:
map_df <- inner_join(map, dat, by = "id")
centers <- data.frame(gCentroid(iran, byid = TRUE))
centers$state <- dat$state
ggplot() +
geom_map(data = map_df, map = map_df,
aes(map_id = id, group = group,
x = long, y = lat,
fill = as.factor(color_province))) +
geom_text(data = centers, aes(label = state, x = x, y = y), size = 3) +
coord_map() +
labs(x = "", y = "", title = "Iran Province") +
scale_fill_manual(values = list(yellow = 'yellow', red = 'red', green = 'green'))
But it does not work. According to the 'dat' data frame 'Ardebil' is 'green' but it is red on map. Is there a better way to do this?
Since you asked me, I had a quick look of your code. Your assumption for id was basically wrong. When you used fortify(), you probably thought id was assigned in a normal sequence (e.g., 1 to n). But this was not the case. Just run unique(mymap$id). You will see what I mean. So what was the solution? When you create dat, you need rownames(iran#data). Once this is done, you should be fine. See the final graphic.
library(raster)
library(rgeos)
library(ggplot2)
library(dplyr)
iran <- getData("GADM", country = "Iran", level = 1)
mymap <- fortify(iran) # This is the hidden cause
mymap$id <- as.integer(mymap$id)
dat <- data.frame(id = rownames(iran#data), # This is what you needed.
state = iran#data$NAME_1,
pr = c(530,-42,1673,75,206,544,1490,118,75,
40,105,191,111,810, 609,425,418,550, 40, 425, -54,-50,
16, 18, 133,425, -30, 241,63, 191,100)) %>%
mutate(color_province = case_when(pr <= 50 ~ 'green',
pr > 150 ~ 'red',
TRUE ~ 'yellow'))
mydf <- inner_join(mymap, dat, by = "id")
centers <- data.frame(gCentroid(iran, byid = TRUE))
centers$state <- dat$state
ggplot() +
geom_map(data = mydf,
map = mydf,
aes(map_id = id, group = group,
x = long, y = lat,
fill = as.factor(color_province))) +
geom_text(data = centers,
aes(label = state, x = x, y = y), size = 3) +
coord_map() +
labs(x = "", y = "", title = "Iran Province") +
scale_fill_manual(values = c("green", "red", "yellow"),
name = "Province")

Stretching the x axis and applying a different binwidth to range of values in histogram in ggplot2 R

Here is an example ggplot that I would like to build. In my data I have a problem that I have lots of values in small stretch of the histogram. Thus, I would like to make the x axis disproportionately stretched (here between the values of 80,81,82,83,84,85). So, the tickmarks would be spaced evenly on the graph, and the space between the tickmarks would not be proportionate to the incremental increase in the values on that graph. Consequently, I would also like to apply a different bin size to that part of the histogram (let's say binwidth = 1).
library(ggplot2)
set.seed(42)
data <- data.frame(c(rnorm(mean=80,sd=20,30)),seq(1,30,1),
c("A","B","B","A","A","B","B","A","A","A",
"A","B","B","A","A","B","B","A","A","B",
"B","A","A","B","B","A","A","B","B","A"))
colnames(data) <- c("vals","respondent","category")
# Plot the number of vals
ggplot(data,aes(x = vals,fill = category)) +
geom_histogram(position = "stack",binwidth = 5) +
ggtitle("plot")+
#scale_x_continuous(c(40,50,60,70,80,81,82,83,84,85,95,105,115))+
theme_minimal() +
ylab("Number of respondents")+xlab("Number of vals")
You can calculate the size (width / height) yourself, as a series of stacked rectangles.
Using the diamonds dataset for illustration, suppose this is our original histogram, and we want to zoom in for the [500, 1000] price range:
ggplot(diamonds,
aes(x = price, fill = color)) +
geom_histogram(binwidth = 500) +
theme_bw()
Define your preferred axis breaks:
x.axis.breaks <- c(0, # binwidth = 500
seq(500, 900, 100), # binwidth = 100
seq(1000, 19000, 500)) # binwidth = 500
> x.axis.breaks
[1] 0 500 600 700 800 900 1000 1500 2000 2500 3000 3500 4000 4500
[15] 5000 5500 6000 6500 7000 7500 8000 8500 9000 9500 10000 10500 11000 11500
[29] 12000 12500 13000 13500 14000 14500 15000 15500 16000 16500 17000 17500 18000 18500
[43] 19000
Calculate xmin / xmax / ymin / ymax for each interval:
library(dplyr)
diamonds2 <- diamonds %>%
mutate(price.cut = cut(price,
breaks = x.axis.breaks)) %>%
count(price.cut, color) %>%
mutate(xmin = x.axis.breaks[as.integer(price.cut)],
xmax = x.axis.breaks[as.integer(price.cut) + 1]) %>%
group_by(price.cut) %>%
arrange(desc(color)) %>%
mutate(ymax = cumsum(n)) %>%
mutate(ymin = lag(ymax)) %>%
mutate(ymin = ifelse(is.na(ymin), 0, ymin)) %>%
ungroup()
> diamonds2
# A tibble: 294 x 7
price.cut color n xmin xmax ymax ymin
<fct> <ord> <int> <dbl> <dbl> <int> <dbl>
1 0 J 158 0 500 158 0
2 500 J 80 500 600 80 0
3 600 J 84 600 700 84 0
4 700 J 51 700 800 51 0
5 800 J 43 800 900 43 0
6 900 J 47 900 1000 47 0
7 1000 J 145 1000 1500 145 0
8 1500 J 198 1500 2000 198 0
9 2000 J 163 2000 2500 163 0
10 2500 J 72 2500 3000 72 0
# ... with 284 more rows
Plot:
p <- ggplot(diamonds2,
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = color)) +
geom_rect() +
theme_bw()
p
I'm not inclined to "stretch" part of a continuous axis, as it distorts interpretation. But you can zoom in using facet_zoom from the ggforce package:
library(ggforce)
p + facet_zoom(x = xmin >= 500 & xmax <= 1000)
If you don't want the neighbouring bars to be visible in the zoomed facet, set the x-axis range expansion parameters as 0.
p +
facet_zoom(x = xmin >= 500 & xmax <= 1000) +
scale_x_continuous(expand = c(0, 0))
Edit
To have a different binwidth at the end with customised label, you can make the following changes:
# use even binwidth (500) up to 15000, then jump to the end
x.axis.breaks <- c(0, # binwidth = 500
seq(500, 900, 100), # binwidth = 100
seq(1000, 15000, 500), # binwidth = 500
19000) # everything else
# reduce the largest xmax value in order to have the same bar width
diamonds2 <- diamonds2 %>%
mutate(xmax = ifelse(xmax == max(xmax),
xmin + 500,
xmax))
# define breaks & labels for x-axis
p <- p +
scale_x_continuous(breaks = seq(0, 15000, 5000),
labels = c(seq(0, 10000, 5000),
"15000+"))

Plot observations in same x-axis point which linked with id variable

I need help. This is a view of my database :
482 940 914 1
507 824 1042 2
514 730 1450 3
477 595 913 4
My aim is to plot in the same point of x-axis each row.
Example:
in 1 (=x) i want to plot 482, 940 and 914
in 2 (=x) I want to plot 507, 824 and 1042.
So three points in vertical for each x axis points.
it's a good idea to share the data in a reproducible way - I'm using readClipboard to read in the copied vector into R. Anyway, here's a quick answer:
x <- as.numeric(unlist(strsplit(readClipboard(), " ")))
This makes it into a numeric vector. We now need to split into groups based on the description you provided. I'm using matrix to achieve this and will then convert to data.frame for plotting using ggplot2:
m <- matrix(x, ncol = 4, byrow = T)
> m
[,1] [,2] [,3] [,4]
[1,] 482 940 914 1
[2,] 507 824 1042 2
[3,] 514 730 1450 3
[4,] 477 595 913 4
df <- as.data.frame(m)
# Assign names to the data.frame
names(df) <- letters[1:4]
> df
a b c d
1 482 940 914 1
2 507 824 1042 2
3 514 730 1450 3
4 477 595 913 4
To get the plot:
library(ggplot2)
ggplot(df, aes(x = d)) +
geom_point(aes(y = a), color = "red") +
geom_point(aes(y = b), color = "green") +
geom_point(aes(y = c), color = "blue")
OUTPUT
You can play around with ggtitle and xlab etc. to change the plot labels and add legends.
Hope this is helpful!

Issue with a drawing a vertical line in ggplot for categorical variable x-axis in R

I have the following table. I want to plot a vertical line using the "st_date_wk" column for each county. Please see my code below but it DOES NOT draw the vertical line using the "st_date_wk" column. Cannot figure out what I am doing wrong here.
Any help is appreciated.
Thanks.
dfx1:
YEAR Week Area acc_sum percentage COUNTY st_date_wk
1998 10-1 250 250 12.4 133 10-4
1998 10-2 300 550 29.0 133 10-4
1998 10-3 50 600 58.0 133 10-4
1998 10-4 100 700 75.9 133 10-4
1998 10-5 100 800 100.0 133 10-4
1999 9-3 75 75 22.0 205 10-2
1999 10-1 250 250 12.4 205 10-2
1999 10-2 300 550 29.0 205 10-2
1999 10-3 50 600 58.0 205 10-2
1999 10-4 100 700 75.9 205 10-2
1999 10-5 100 800 100.0 205 10-2
.
.
dfx1$YEAR <- as.factor(dfx1$YEAR)
dfx1$COUNTY <- as.factor(dfx1$COUNTY)
dfx1$percentage <- as.numeric(dfx1$percentage)
dfx1$acc_sum <- as.numeric(dfx1$acc_sum)
dfx1$Week <- factor(dfx1$Week, ordered = T)
dfx1$st_date_wk <- factor(dfx1$st_date_wk,ordered = T)
dfx1$Week <- factor(dfx1$Week, levels=c("6-1","6-2","6-3","6-4","6-5","7-1","7-2","7-3","7-4","7-5","8-1","8-2","8-3","8-4","8-5","9-1","9-2","9-3","9-4","9-5","10-1","10-2","10-3","10-4","10-5","11-1","11-2","11-3","11-4","11-5","12-1","12-2","12-3","12-4","12-5"))
gg <- ggplot(dfx1, aes(Week,percentage, col=YEAR, group = YEAR))
gg <- gg + geom_line()
gg <- gg + facet_wrap(~COUNTY, 2, scales = "fixed")
gg <- gg + theme(text = element_text(size=15), axis.text.x = element_text(angle=90, hjust=1))
gg <- gg + geom_vline(data=dfx1, aes(xintercept = dfx1$st_date_wk), color = "blue", linetype = "dashed", size = 1.0)+ facet_wrap(~COUNTY)
plot(gg)
1: In Ops.ordered(x, from[1]) : '-' is not meaningful for ordered factors
It is a very interesting issue, and I haven't quite figured out why it does not work. However, there is a fix for it.
First, This is the data that is used in the answer:
dfx1 <- read.table(text =
"YEAR Week Area acc_sum percentage COUNTY st_date_wk
1998 10-1 250 250 12.4 133 10-4
1998 10-2 300 550 29.0 133 10-4
1998 10-3 50 600 58.0 133 10-4
1998 10-4 100 700 75.9 133 10-4
1998 10-5 100 800 100.0 133 10-4
1999 9-3 75 75 22.0 133 10-1",
header = TRUE)
Convert types of Year, COUNTY, percentage, and acc_sum:
dfx1$YEAR <- as.factor(dfx1$YEAR)
dfx1$COUNTY <- as.factor(dfx1$COUNTY)
dfx1$percentage <- as.numeric(dfx1$percentage)
dfx1$acc_sum <- as.numeric(dfx1$acc_sum)
Create a vector with the week_levels (more reader-friendly):
week_levels <- c("6-1","6-2","6-3","6-4","6-5",
"7-1","7-2","7-3","7-4","7-5",
"8-1","8-2","8-3","8-4","8-5",
"9-1","9-2","9-3","9-4","9-5",
"10-1","10-2","10-3","10-4","10-5",
"11-1","11-2","11-3","11-4","11-5",
"12-1","12-2","12-3","12-4","12-5")
Transform Week and st_date_wk to an ordered factor with the same levels:
dfx1$Week <- factor(dfx1$Week, levels = week_levels, ordered = TRUE)
dfx1$st_date_wk <- factor(dfx1$st_date_wk, levels = week_levels, ordered = TRUE)
Create labels for scale_x_discrete (a named vector where the names correspond to the breaks of the x-axis):
labels <- week_levels
names(labels) <- seq_along(week_levels)
Create the visualisation, but instead of using the factors on the x-axis, use numeric, in geom_vline() use which() to get the number that corresponds to a Week on the x-axis. Then use scale_x_discrete() to add the weeks.
library(ggplot2)
ggplot(dfx1, aes(x = as.numeric(Week), y = percentage, col=YEAR, group = YEAR)) +
geom_line() +
geom_vline(xintercept = which(levels(dfx1$Week) %in% dfx1$st_date_wk), color = "blue", linetype = "dashed") +
scale_x_continuous(breaks = seq_along(labels), labels = labels) +
theme(text = element_text(size=15), axis.text.x = element_text(angle=90, hjust=1)) +
facet_wrap(~COUNTY, 2, scales = "fixed")
This will give you:
EDIT AFTER COMMENT:
library(dplyr)
dfx1 <- merge(dfx1,
(dfx1 %>%
group_by(COUNTY, st_date_wk) %>%
summarise(x = which(levels(st_date_wk) %in% st_date_wk[COUNTY == COUNTY]))),
by = c("COUNTY", "st_date_wk"), all.x = TRUE
)
ggplot(dfx1, aes(x = as.numeric(Week), y = percentage, col=YEAR, group = YEAR)) +
geom_line() +
geom_vline(data = dfx1, aes(xintercept = x), color = "blue", linetype = "dashed") +
scale_x_continuous(breaks = seq_along(labels), labels = labels) +
theme(text = element_text(size=15), axis.text.x = element_text(angle=90, hjust=1)) +
facet_wrap(~COUNTY, 2, scales = "fixed")
You just have to change the aes in the geom_vline
aes(xintercept = dfx1$st_date_wk %>% as.numeric())

ggplot changing colors of bar plot

I came across this R script that use ggplot:
dat <- read.table(text = "A B C D E F G
1 480 780 431 295 670 360 190
2 720 350 377 255 340 615 345
3 460 480 179 560 60 735 1260
4 220 240 876 789 820 100 75", header = TRUE)
library(reshape2)
dat$row <- seq_len(nrow(dat))
dat2 <- melt(dat, id.vars = "row")
library(ggplot2)
ggplot(dat2, aes(x=variable, y=value, fill=row)) +
geom_bar(stat="identity") +
xlab("\nType") +
ylab("Time\n") +
guides(fill=FALSE) +
theme_bw()
That was what I've been looking for. However, I could not:
change the default colours (for example, I tried to use the "RdYlGn"
palette)
convert the raw values to frequencies.
Any suggestions?
You could try this:
library(reshape2)
library(dplyr)
library(ggplot2)
library(ggplot2)
dat%>%
melt(id.vars = "row",variable.name = "grp")%>%
group_by(grp)%>%
mutate(tot=sum(value), fq=value/tot)%>%
ggplot(aes(x=grp,y=fq,fill=row,label = sprintf("%.2f%%", fq*100)))+
geom_bar(stat = "identity")+
geom_text(size = 3, position = position_stack(vjust = 0.5))+
xlab("\nType") +
ylab("Time\n") +
guides(fill=FALSE) +
scale_fill_distiller(palette = "RdYlGn")+
theme_bw()

Resources