data <- as.matrix(data.frame("A" = c(18,7),
"B+" = c(3,2),
"B" = c(3,3),
"C+" = c(6,0),
"C" = c(7,0),
"D" = c(0,4),
"E" = c(5,23)))
barplot (data,
col = c("red","blue"),
beside = TRUE,
xlab = "Grade",
ylab = "Frequency")
legend("topleft",
c("IFC6503-A","IFC6510"),
fill = c("red","blue"),
inset = c(.01,0)
)
I plot two barplot together, the barplot it contain grade, from two different data, the grade is by A B+ B C+ C D E , but the result B+ and C+ is not appear, just appear B. and C. , is my code is wrong or can you guys correct my code?
Do not use special character + in colnames as already pointed by Bernhard:
Here is a way where you could relabel the x axis of the plot by hand:
Give adequate colnames in your matrix: see here comment of whuber: https://stats.stackexchange.com/questions/163280/naming-convention-for-column-names
use xaxt = "n" in barplot to remove x labels
use axis to insert x labels manually:
data <- as.matrix(data.frame("A" = c(18,7),
"Bplus" = c(3,2),
"B" = c(3,3),
"Cplus" = c(6,0),
"C" = c(7,0),
"D" = c(0,4),
"E" = c(5,23)))
barplot (data,
col = c("red","blue"),
beside = TRUE,
xlab = "Grade",
ylab = "Frequency",
xaxt = "n")
axis(1, at = seq(2, 20, 3), labels = c("A", "B+", "B", "C+", "C", "D", "E"))
legend("topleft",
c("IFC6503-A","IFC6510"),
fill = c("red","blue"),
inset = c(.01,0)
)
The name of the columns in the data doesn't have B+ and C+
You can just rename the columns like this after loading the file:
colnames(data)[2] <-'B+'
colnames(data)[4] <-'C+'
Now plot with the same code
barplot (data,
col = c("red","blue"),
beside = TRUE,
xlab = "Grade",
ylab = "Frequency")
legend("topleft",
c("IFC6503-A","IFC6510"),
fill = c("red","blue"),
inset = c(.01,0)
)
A tidyverse approach
library(tidyverse)
data %>%
pivot_longer(cols = everything()) %>%
group_by(name) %>%
mutate(
name = str_replace(name,"\\.","+"),
grp = row_number(),
grp = if_else(grp == 1,"IFC6503-A","IFC6510")
) %>%
ggplot(aes(x = name, y = value,fill = grp))+
geom_col(position = position_dodge())+
scale_fill_manual(values = c("IFC6503-A" = "red","IFC6510" = "blue"))+
labs(
x = "Grade",
y = "Frequency"
)
Related
We have a data
x <- 1:10
y1 <- x
y2 <- x^2
y3 <- x + 1
z1 <- c("m", "m", "m", "m", "m", "m", "m", "n", "n", "n")
z2 <- c("n", "n", "n", "n", "n", "n", "p", "p", "p", "p")
z3 <- c("m", "m", "m", "m", "m", "n", "n", "n", "p", "p")
df1 <- data.frame(x, y1, z1)
df2 <- data.frame(x, y2, z2)
df3 <- data.frame(x, y3, z3)
I would like to create a plot where df1 is red, df2 is blue and df3 is green and have a legend based on color.
I also would like to change the line type based on vectors z and have a legend for that too.
Here's what I could code
ggplot() +
geom_line(data = df1, aes(x, y1, linetype = z1, color = "#f8766d"), size = 1, show.legend = TRUE) +
geom_line(data = df2, aes(x, y2, linetype = z2, color = "#619cff"), size = 1, show.legend = TRUE) +
geom_line(data = df3, aes(x, y3, linetype = z3, color = "#00ba38"), size = 1, show.legend = TRUE) +
scale_linetype_manual(values = c("m" = "solid", "n" = "dotted", "p" = "dashed"), guide = "legend", labs(title = "Line type legend")) +
scale_color_manual(values = c("#f8766d", "#619cff", "#00ba38"), guide = "legend", labs(title = "color legend"), labels = c("y1", "y2", "y3")) +
xlim(0,10) +
ylim(0,10)
and the output is
The problem is that in the legend, the colors are shown properly but in the plot, the color of y1 and y3 are swapped.
I can kind of fix it by adding
scale_color_identity() +
but adding this line removes the color legend!
Could someone help please?
I think you can fix this with two changes:
set the color= literal to be label you want displayed in the legend, and
name your manual colors (and remove labels=).
ggplot() +
geom_line(data = df1, aes(x, y1, linetype = z1, color = "y1"), size = 1, show.legend = TRUE) +
geom_line(data = df2, aes(x, y2, linetype = z2, color = "y2"), size = 1, show.legend = TRUE) +
geom_line(data = df3, aes(x, y3, linetype = z3, color = "y3"), size = 1, show.legend = TRUE) +
scale_linetype_manual(values = c("m" = "solid", "n" = "dotted", "p" = "dashed"), guide = "legend", labs(title = "Line type legend")) +
scale_color_manual(values = c(y1="#f8766d", y2="#619cff", y3="#00ba38"), guide = "legend", labs(title = "color legend")) +
xlim(0,10) +
ylim(0,10)
Extra credit: you might consider pivoting and combining your data so that you can use one geom:
library(dplyr)
library(tidyr)
dflong <- lapply(
list(df1, df2, df3), pivot_longer,
-x, names_pattern = "(\\D+)(\\d+)", names_to = c(".value", "num")) %>%
bind_rows(.id = "id")
ggplot(dflong) +
geom_line(aes(x, y, linetype = z, color = num), na.rm = TRUE) +
scale_linetype_manual(values = c("m" = "solid", "n" = "dotted", "p" = "dashed"), guide = "legend", labs(title = "Line type legend")) +
scale_color_manual(values = c("1"="#f8766d", "2"="#619cff", "3"="#00ba38"), guide = "legend", labs(title = "color legend")) +
xlim(0,10) +
ylim(0,10)
Notes:
We aren't using id here since in this case it appears to be redundant with num= from the pivoting. Not sure if it matters, I'll keep it for discussion at least.
The labels within color legend are now just 1, 2, and 3, as determined by the number portion of y1, y2, and y3 in the original data; you can easily change those to whatever you need, perhaps something like
dflong <- lapply(
list(df1, df2, df3), pivot_longer,
-x, names_pattern = "(\\D+)(\\d+)", names_to = c(".value", "num")) %>%
bind_rows(.id = "id") %>%
mutate(num = paste0("y", num))
The impetus for shifting from multiple frames as in the first code block and this single-longer-frame mindset is that if you add groups (e.g., df4 through df13), you need to add multiple calls to geom_line and perhaps whatever other geoms you might need; in this longer version, you include the frames in the list(...) that I iteratively pivot/row-bind, and then the plot mostly works. You will always have a little more work so long as you manually control linetype= and color=.
I have data from a questionnaire given before and after an educational module. I am trying to visualise the data with ggplot in a stacked bar chart, more specifically stacked butterfly/tornado graph, where I want to make the two different occasions (before = 1, after =2) act as horisontal facets. I have made a hand-drawn picture of what I want, and also what I currently got as well as my ggplot code.
What I have:
What I need:
My code:
likert_viz <- ggplot(all.gg.data, aes(x = number, y = item, fill = opinion)) +
geom_col(width = 5.0) +
geom_vline(xintercept = 50, color = c("white")) +
scale_y_discrete(limits = all.gg.data$item, labels = scales::wrap_format(50)) +
scale_x_continuous(labels = scales::percent) +
labs(title = "Opinion", subtitle = "Subtitle", x = "Percentage", y = "", color = "") +
facet_wrap(~occasion)
My data: (https://pastebin.com/CdgSseKJ)
I have played around with facets hoping to be able to rearrange them the way I want but failed arranging them so that the "before" answers stacked above the "after" answers for one question at a time. Hoping to get a helpful suggestion or solution.
This was fairly tricky, I found two solutions: create one facet for the positive opinions and one for the negative ones, and removing the space between the facets; explicitly define the start and end for each bar and passing that to geom_segment instead of geom_col.
Common part
Independently from which method you prefer, there are lots of common things:
The first thing you asked was to stack together the bars for different occasions. That can be done by adding position = "dodge" or "dodge2" inside geom_...; Or by creating a new y axis by doing paste(item, "- Occasion", occasion). The first option is more elegant, but i couldn't get it to work.
First step: create the new y axis, and transform number into a frequency (so that the x axis goes form -100% to 100%):
all.gg.data2 = df %>%
mutate(item2 = paste(item, "- Occasion", occasion)) %>%
group_by(item, occasion) %>%
mutate(number = number / sum(number))
Second step: as we want N to be centered in zero, divide that opinion in half, the "positive part", and the "negative part"
all.gg.data.N = all.gg.data2 %>%
filter(opinion == "N") %>%
{rbind(mutate(., number = number/2, opinion = "N-"),
mutate(., number = number/2, opinion = "N+"))}
Third step: add the new N rows, create the scale variable, and transform the number column depending by what method we want. Fourth step: plotting. Now i divide the explanation by methods.
Obs: both use the custom function (based on scales::wrap_format) that removes extra text from the y axis:
my_wrap_format = function(x) {
x[seq(2, length(x), 2)] = gsub(".+(- Occasion [1-2])", "\\1", x[seq(2, length(x), 2)])
unlist(lapply(strwrap(x, width = 50, simplify = FALSE),
paste0, collapse = "\n"))}
Faceted geom_col method
Third step: change the sign of number for the negative part.
all.gg.data = all.gg.data2 %>%
filter(opinion != "N") %>%
rbind(all.gg.data.N) %>%
mutate(opinion = factor(opinion, levels = c("SD", "D", "N-", "SA", "A", "N+")),
scale = ifelse(opinion %in% c("SD", "D", "N-"), "-", "+"),
number = ifelse(scale == "-", -number, number))
Fourth step: create facets based on scale (facet_wrap(vars(scale))), remove extra space in the x axis (expand = c(0, 0) and panel.spacing = unit(0, "cm")), and join "N+" and "N-" with scale_fill_manual.
ggplot(all.gg.data, aes(x = number, y = item2, fill = opinion)) +
geom_col() +
facet_wrap(vars(scale), ncol = 2, scales = "free_x") +
scale_y_discrete(labels = my_wrap_format) +
scale_x_continuous(labels = scales::percent, expand = c(0, 0)) +
labs(title = "Opinion", subtitle = "Subtitle", x = "Percentage", y = "", color = "") +
scale_fill_manual(values = c(SD = "darkblue", D = "blue", `N-` = "grey",
`N+` = "grey", A = "red", SA = "darkred"),
labels = c("SD", "D", "N", "A", "SA"),
breaks = c("SD", "D", "N-", "A", "SA")) +
theme(panel.spacing = unit(0, "cm"),
strip.text = element_blank(),
strip.background = element_blank())
Result:
geom_segment method
Third step: define the x and xend aesthetics for geom_segment. xend was created by summing the number values with purrr::accumulate, but in one direction for the positive scale, and the other for the negative. x was created by the lag of xend and a different starting point for each scale.
my_accumulate = function(number, scale) {
accumulate(number*ifelse(scale == "-", -1, 1), sum, .dir = ifelse(scale[1] == "-", "backward", "forward"))}
all.gg.data = all.gg.data2 %>%
filter(opinion != "N") %>%
rbind(all.gg.data.N) %>%
mutate(opinion = factor(opinion, levels = c("SD", "D", "N-", "N+", "A", "SA")),
scale = ifelse(opinion %in% c("SD", "D", "N-"), "-", "+")) %>%
arrange(item, occasion, opinion) %>%
group_by(item, occasion, scale) %>%
mutate(number = my_accumulate(number, scale),
numberStart = if(scale[1] == "-") {c(stats::lag(number, -1)[-1], 0)} else {c(0, stats::lag(number)[-3])})
Obs: you can think that explicitly defining the start and ending points for each bar was automatically done by geom_col, that's why this method is larger.
Fourth step: join "N+" and "N-" with scale_color_manual.
ggplot(all.gg.data, aes(x = numberStart, xend = number,
y = item2, yend = item2, color = opinion)) +
geom_segment(size = 2) +
scale_y_discrete(labels = my_wrap_format) +
scale_x_continuous(labels = scales::percent) +
labs(title = "Opinion", subtitle = "Subtitle", x = "Percentage", y = "", color = "") +
scale_color_manual(values = c(SD = "darkblue", D = "blue", `N-` = "grey",
`N+` = "grey", A = "red", SA = "darkred"),
labels = c("SD", "D", "N", "A", "SA"),
breaks = c("SD", "D", "N-", "A", "SA"))
Result:
Appendix: full code
geom_segment method:
my_wrap_format = function(x) {
x[seq(2, length(x), 2)] = gsub(".+(- Occasion [1-2])", "\\1", x[seq(2, length(x), 2)])
unlist(lapply(strwrap(x, width = 50, simplify = FALSE),
paste0, collapse = "\n"))}
my_accumulate = function(number, scale) {
accumulate(number*ifelse(scale == "-", -1, 1), sum, .dir = ifelse(scale[1] == "-", "backward", "forward"))}
all.gg.data2 = df %>%
mutate(item2 = paste(item, "\n- Occasion", occasion)) %>%
group_by(item, occasion) %>%
mutate(number = number / sum(number))
all.gg.data.N = all.gg.data2 %>%
filter(opinion == "N") %>%
{rbind(mutate(., number = number/2, opinion = "N-"),
mutate(., number = number/2, opinion = "N+"))}
all.gg.data = all.gg.data2 %>%
filter(opinion != "N") %>%
rbind(all.gg.data.N) %>%
mutate(opinion = factor(opinion, levels = c("SD", "D", "N-", "N+", "A", "SA")),
scale = ifelse(opinion %in% c("SD", "D", "N-"), "-", "+")) %>%
arrange(item, occasion, opinion) %>%
group_by(item, occasion, scale) %>%
mutate(number = my_accumulate(number, scale),
numberStart = if(scale[1] == "-") {c(stats::lag(number, -1)[-1], 0)} else {c(0, stats::lag(number)[-3])})
ggplot(all.gg.data, aes(x = numberStart, xend = number,
y = item2, yend = item2, color = opinion)) +
geom_segment(size = 10) +
scale_y_discrete(labels = my_wrap_format) +
scale_x_continuous(labels = scales::percent) +
labs(title = "Opinion", subtitle = "Subtitle", x = "Percentage", y = "", color = "") +
scale_color_manual(values = c(SD = "darkblue", D = "blue", `N-` = "grey",
`N+` = "grey", A = "red", SA = "darkred"),
labels = c("SD", "D", "N", "A", "SA"),
breaks = c("SD", "D", "N-", "A", "SA"))
Faceted geom_col method:
all.gg.data2 = df %>%
mutate(item2 = paste(item, "\n- Occasion", occasion)) %>%
group_by(item, occasion) %>%
mutate(number = number / sum(number))
all.gg.data.N = all.gg.data2 %>%
filter(opinion == "N") %>%
{rbind(mutate(., number = number/2, opinion = "N-"),
mutate(., number = number/2, opinion = "N+"))}
all.gg.data = all.gg.data2 %>%
filter(opinion != "N") %>%
rbind(all.gg.data.N) %>%
mutate(opinion = factor(opinion, levels = c("SD", "D", "N-", "SA", "A", "N+")),
scale = ifelse(opinion %in% c("SD", "D", "N-"), "-", "+"),
number = ifelse(scale == "-", -number, number))
ggplot(all.gg.data, aes(x = number, y = item2, fill = opinion)) +
geom_col() +
facet_wrap(vars(scale), ncol = 2, scales = "free_x") +
scale_y_discrete(labels = my_wrap_format) +
scale_x_continuous(labels = scales::percent, expand = c(0, 0)) +
labs(title = "Opinion", subtitle = "Subtitle", x = "Percentage", y = "", color = "") +
scale_fill_manual(values = c(SD = "darkblue", D = "blue", `N-` = "grey",
`N+` = "grey", A = "red", SA = "darkred"),
labels = c("SD", "D", "N", "A", "SA"),
breaks = c("SD", "D", "N-", "A", "SA")) +
theme(panel.spacing = unit(0, "cm"), panel.border = element_blank(),
strip.text = element_blank(), strip.background = element_blank())
You can copy the following code for the example which creates a barplot using ggplot2:
set.seed(999)
similarity_context_set1 = matrix(rnorm(10*3,10,1),ncol=3)
similarity_context_set2 = matrix(rnorm(10*3,10,1),ncol=3)
attraction_prop_context_set1 = matrix(rnorm(10*3,10,1),ncol=3)
attraction_prop_context_set2 = matrix(rnorm(10*3,10,1),ncol=3)
compromise_context_set1 = matrix(rnorm(10*3,10,1),ncol=3)
compromise_context_set2 = matrix(rnorm(10*3,10,1),ncol=3)
library(tidyverse)
library(ggthemes)
# add all matrices in a list. I use lst here since the ouptut is
# a named list
df <- data.frame(name1 = rep(rep(c("attraction", "compromise", "similarity"), each = 3), 2),
name2 = rep(c("1", "2"), each = 9),
x = rep(c("Third", "X", "Y"), 6),
y = rep(12, 18),
label = c("Now", "you", "can", "use", "any", "label", "you", "want",
"by", "inserting", "it", "as", "a", "string", "into", "this",
"character", "vector"))
lst(similarity_context_set1,
similarity_context_set2,
attraction_prop_context_set2,
attraction_prop_context_set1,
compromise_context_set1,
compromise_context_set2) %>%
# transform to tibble and add column names
map(as_tibble) %>%
map(set_names, c("X", "Y","Third")) %>%
# bind the list to one dataframe, add list names to column by
# setting .id
bind_rows(.id = "name") %>%
# transform data from wide to long as it is recommended ggplot
#input format here
pivot_longer(-1,names_to = "x", values_to = "y") %>%
# make to columns for facetting
separate(name, into = c("name1", "name2"), sep = "_", extra = "merge") %>%
mutate(name2 = str_extract(name2, "[0-9]")) %>%
# finally the plot
ggplot(aes(x, y, group=x, fill = x)) + theme_hc(base_size = 13)+
geom_bar(stat = "summary", fun = "mean",alpha=0.8 )+
scale_fill_manual(values = c("Y" = "gray1","X" = "gray1","Third" = "gray1"), guide="none" )+
facet_grid(name2~name1)+
stat_summary(fun.data = mean_se, geom = "errorbar", width=0.2)+
ggtitle("Perceptual Domain")+
theme(plot.title = element_text(hjust = 0.5))+
labs(x = "Response", y = "Mean Choice Proportion")+
geom_text(data = df, aes(label = label))
My question is how can I add different lines with labels? On the picture below you can see an example of what I mean:
Does anyone have an idea how I can do something like that?
This is much the same as your last question, and the answer much the same as the last answer. Please take time to read and understand what is happening in the code.
Create this data frame:
df2 <- data.frame(name1 = rep(c("attraction", "compromise", "similarity"), 2),
name2 = rep(c("1", "2"), each = 3),
yintercept = runif(6, 5, 10),
label = c("Now", "use", "whatever",
"label", "you", "like"))
And add this line:
geomtextpath::geom_texthline(data = df2, aes(yintercept = yintercept, label = label),
color = "red", size = 6, hjust = 0.8, vjust = -0.2, fontface = 2)
I'm trying to highlight (change the color) specific lines in a plot.
The input data looks like this:
dt <- data.frame(Marker = paste0('m', rep(seq(1,10), 10)),
Year = rep(1990:1999, each = 10),
Ahat = rnorm(100, 0.5, 0.1)) %>%
mutate(Group = if_else(Marker %in% c("m1", "m2", "m3"), "A",
if_else(Marker %in% c("m4", "m5", "m6"), "B",
if_else(Marker %in% c("m7", "m8"), "C", "D")) ) )
And the general plot can be created by:
ggplot(dt, aes(x = Year, y = Ahat, group = interaction(as.factor(Group), Marker), color = as.factor(Group) ) ) +
geom_line(alpha = 0.5, size = 0.5) +
theme_classic() +
scale_y_continuous(name = "Predicted Value", breaks = pretty_breaks()) +
scale_colour_manual(name = "Groups", values = c("black", "red", "blue", "orange")) +
facet_wrap(~Group)
What I'd like to do is to highlight (e.g. make some lines black) some specific lines in specific groups (e.g. "m1" and "m9").
I've tried using something like this gghighlight(Marker %in% c("m1", "m9")), but it doesn't work.
I'd like to have something like this (sorry for my poor drawing skills):
Any suggestion?
P.S: My real data has 50K markers.
Thank you!
One option would be to first group data in subgroups (nesting in the dataframe) and then build the plots...
library(tidyverse)
library(scales)
library(patchwork)
# 1. Create dataframe ----
dt <- data.frame(Marker = as.factor(paste0('m', rep(seq(1,10), 10))),
Year = rep(1990:1999, each = 10),
Ahat = rnorm(100, 0.5, 0.1)) %>%
mutate(Group = case_when(
Marker %in% c("m1", "m2", "m3") ~ "A",
Marker %in% c("m4", "m5", "m6") ~ "B",
Marker %in% c("m7", "m8") ~ "C",
TRUE ~ "D"))
# 2. Function to choose which Market of sub_df should be Highlight
getHighlightMarketBasedOntAhatValue <- function(sub_dt) {
sub_dt <- sub_dt %>%
group_by(Marker) %>%
mutate(mean_Ahat = mean(Ahat))
# using mean to choose Ahat is just a doomed example... also instead of a single value you could get an array of values.
# Here I am not using the index...1, 2... any more (as was in first solution), but the factor itself.
highlightMarket <- first(sub_dt$Marker[sub_dt$mean_Ahat == max(sub_dt$mean_Ahat)])
}
# 3. Function to build plot for sub_df
my_plot <- function(sub_dt, highlighted_one) {
custom_pallete = rep("grey", length(levels(sub_dt$Marker)))
names(custom_pallete) <- levels(sub_dt$Marker)
custom_pallete[highlighted_one] = "blue"
dt %>% ggplot(aes(x = Year,
y = Ahat,
color = as.factor(Marker))) +
geom_line(alpha = 0.5, size = 0.5) +
theme_classic() +
scale_y_continuous(name = "Predicted Value", breaks = pretty_breaks()) +
scale_colour_manual(name = "Marker", values = custom_pallete)
}
# 4. Main ----
# 4.1 Nesting ----
nested_dt <- dt %>%
group_by(Group) %>%
nest()
# 4.2 Choosing highlight Market for each subgroup ----
nested_dt <- nested_dt %>%
mutate(highlighted_one = getHighlightMarketBasedOntAhatValue(data[[1]]))
# 4.3 Build plots ----
nested_dt <- nested_dt %>%
mutate(plot = map2(.x = data,
.y = highlighted_one,
.f = ~ my_plot(.x, .y)))
# 4.4 Use patchwork ... ----
# to combine plots ... see patchwork help to find out how to
# manage titles, labels, etc.
nested_dt %>% pull(plot) %>% patchwork::wrap_plots()
```
One way could be to set color as Marker.
Then you can change the color of the Marker in this line
scale_colour_manual(name = "Groups", values = c("black", "red", "blue", "orange", "green", "black", "red", "blue", "orange", "green")) +
Change the colors as you like:
ggplot(dt, aes(x = Year, y = Ahat, group = interaction(as.factor(Group), Marker), color = Marker ) ) +
geom_line(alpha = 0.5, size = 0.5) +
theme_classic() +
scale_y_continuous(name = "Predicted Value", breaks = pretty_breaks()) +
scale_colour_manual(name = "Groups", values = c("black", "red", "blue", "orange", "green",
"black", "red", "blue", "orange", "green")) +
facet_wrap(~Group)
I'm interested in visualizing the location of certain words in sentences. Say I have 500 sentences between 3-5 words long and want to visualize the location of word A in each sentence:
Data:
set.seed(123)
w1 <- sample(LETTERS[1:3], 1000, replace = TRUE)
w2 <- sample(LETTERS[1:5], 1000, replace = TRUE)
w3 <- sample(LETTERS[1:6], 1000, replace = TRUE)
w4 <- sample(c(NA,LETTERS[1:7]), 1000, replace = TRUE)
w5 <- sample(c(NA,LETTERS[1:8]), 1000, replace = TRUE)
df <- data.frame(
position = rep(1:5, each = 1000), # position of word in sentence
word = c(w1, w2, w3, w4, w5) # the words in the sentences
)
I can produce the location plot in base R. But the code involves a very slow for loop and does not have the aesthetic qualities of ggplot2. So how can the same type of visualization be produced faster and in ggplot2?
This is the code that produces the location plot in base R:
# Plot dimensions:
x <- rep(1:5, 100)
y <- 1:500
# Plot parameters:
par(mar=c(2,1.5,1,1.5), par(xpd = T))
# Plot:
plot(y ~ x, type = "n", frame = F, axes = F, ylab="", xlab="",
main="Location of word 'A' in sentences", cex.main=0.9)
axis(1, at=seq(1:5), labels=c("w1", "w2", "w3", "w4", "w5"), cex.axis=0.9)
# Legend:
legend(2.25, 530, c("A", "other", "NA"), fill=c("blue", "orange", "black"),
horiz = T, cex = 0.7, bty = "n", border = "white")
# For loop to print 'A' as color in positions:
for(i in unique(df$position)){
text(i, 1:500, "__________", cex = 1,
col = ifelse(df[df$position==i,]$word=="A", "blue", "orange"))
}
For example using geom_segment, and then with a conditional aesthetic.
I am using ggh4x for the truncated axis.
library(tidyverse)
library(ggh4x)
df <-
df %>% group_by(position) %>%mutate(index = row_number())
ggplot(df, aes( color = word=="A")) +
geom_segment(aes(x = position-.4, xend = position+.4,
y = index, yend = index),
key_glyph= "rect") +
scale_color_manual(name = NULL,
values=c(`TRUE` = "blue", `FALSE` = "orange"),
labels = c(`TRUE` = "A", `FALSE` = "other"),
na.value="black")+
guides(x = "axis_truncated") +
scale_x_continuous(breaks = 1:5, labels = paste0("w", 1:5))+
theme_classic() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
plot.title = element_text(hjust = .5),
legend.position = "top") +
labs( y = NULL, x = NULL, title = "Location of A")
Here's an initial attempt. (I'm not quite clear, are you looking to show just the first 500 of the 1000 sentences?)
My approach here is to first summarize the data in terms of contiguous sections that are A / other / NA. This way, the plot area is filled exactly without needing to tweak line thickness, and it should plot more quickly by reducing the number of plotted elements.
library(dplyr)
df_plot <- df %>%
mutate(A_spots = case_when(word == "A" ~ "A",
word != "A" ~ "other",
TRUE ~ "NA")) %>%
group_by(position) %>%
mutate(col_chg = A_spots != lag(A_spots, default = ""),
group_num = cumsum(col_chg)) %>%
ungroup() %>%
count(position, group_num, A_spots)
library(ggplot2)
ggplot(df_plot, aes(position, n, fill = A_spots, group = group_num)) +
geom_col() +
scale_x_continuous(name = NULL, breaks = 1:5, #stolen from #tjebo's answer
labels = paste0("w", 1:5))+
scale_fill_manual(
values = c("A" = "blue","other" = "orange", "NA" = "black")) +
labs(title = "Location of word 'A' in sentences") +
theme_minimal()