Make boxplots of columns in R - r

I am a beginner in R, and have a question about making boxplots of columns in R. I just made a dataframe:
SUS <- data.frame(RD = c(4, 3, 4, 1, 2, 2, 4, 2, 4, 1), TK = c(4, 2, 4, 2, 2, 2, 4, 4, 3, 1),
WK = c(3, 2, 4, 1, 3, 3, 4, 2, 4, 2), NW = c(2, 2, 4, 2, NA, NA, 5, 1, 4, 2),
BW = c(3, 2, 4, 1, 4, 1, 4, 1, 5, 1), EK = c(2, 4, 3, 1, 2, 4, 2, 2, 4, 2),
AN = c(3, 2, 4, 2, 3, 3, 3, 2, 4, 2))
rownames(SUS) <- c('Pleasant to use', 'Unnecessary complex', 'Easy to use',
'Need help of a technical person', 'Different functions well integrated','Various function incohorent', 'Imagine that it is easy to learn',
'Difficult to use', 'Confident during use', 'Long duration untill I could work with it')
I tried a number of times, but I did not succeed in making boxplots for all rows. Someone who can help me out here?

You can do it as well using tidyverse
library(tidyverse)
SUS %>%
#create new column and save the row.names in it
mutate(variable = row.names(.)) %>%
#convert your data from wide to long
tidyr::gather("var", "value", 1:7) %>%
#plot it using ggplot2
ggplot(., aes(x = variable, y = value)) +
geom_boxplot()+
theme(axis.text.x = element_text(angle=35,hjust=1))

As #blondeclover says in the comment, boxplot() should work fine for doing a boxplot of each column.
If what you want is a boxplot for each row, then actually your current rows need to be your columns. If you need to do this, you can transpose the data frame before plotting:
SUS.new <- as.data.frame(t(SUS))
boxplot(SUS.new)

Related

Is there a way, i can order the axis on a melted ggplot? [duplicate]

This question already has answers here:
Order discrete x scale by frequency/value
(7 answers)
How do you specifically order ggplot2 x axis instead of alphabetical order? [duplicate]
(2 answers)
ggplot2, Ordering y axis
(1 answer)
R ggplot ordering bars within groups
(1 answer)
Closed 6 months ago.
I have a Problem with a Plot I want to order, but it seems like it cant be.
install.packages("reshape2")
library(reshape2)
install.packages("ggplot2")
library(ggplot2)
df <- createRegressionTable(data,colname)
gg <- melt(df, id = "colname")
return(
ggplot(gg, aes(
x = colname, y = variable, fill = value
)) +
geom_tile(show.legend = FALSE) +
geom_text(aes(label = value), alpha = 0.6) +
scale_fill_gradient(low = "#D5E8D4", high = "#F8CECC") +
labs(
x = "Regressant",
y = "Regressor"
) +
theme(legend.key = element_blank())
)
I know the function createRegressionTable is a black box but this is the result:
list(colname = c("zielrichtungU", "zielrichtungO",
"imitationU", "imitationO", "steuerungU", "steuerungO", "neuheitU",
"neuheitO", "netzwerkU", "netzwerkO"), zielrichtungU = c(5, 1,
5, 1, 3, 4, 1, 1, 1, 1), zielrichtungO = c(1, 5, 1, 5, 1, 5,
3, 5, 1, 1), imitationU = c(5, 1, 5, 5, 1, 5, 1, 1, 4, 1), imitationO = c(1,
5, 5, 5, 1, 1, 5, 5, 5, 5), steuerungU = c(3, 1, 1, 1, 5, 5,
1, 2, 1, 1), steuerungO = c(4, 5, 5, 1, 5, 5, 3, 5, 1, 3), neuheitU = c(1,
3, 1, 5, 1, 3, 5, 5, 1, 1), neuheitO = c(1, 5, 1, 5, 2, 5, 5,
5, 1, 1), netzwerkU = c(1, 1, 4, 5, 1, 1, 1, 1, 5, 5), netzwerkO = c(1,
1, 1, 5, 1, 3, 1, 1, 5, 5))
I tested whether the output of melt is scrambled, but it seems to be ordered, as I wished, and now I don't know where the problem lies
And here is the Plot, that I'd love to order:

How can I change size of y-axis text labels on a likert() object in R?

I'm working with the likert() library to generate nice looking diverging stacked bar charts in R. Most of the formatting has come together, but I can't seem to find a way to shrink the text for the y-axis labels (e.g. "You and your family in the UK", "People in your local area..." etc.) which are too large for the plot. Any ideas here? I'm starting to wonder if I need to revert to ggplot, which will require more code, but have more customisability...
# Ingest data to make reproducible example:
climate_experience_data <- structure(list(Q25_self_and_family = c(4, 2, 3, 5, 3, 3, 4, 2,
4, 2, 4, 4, 3, 3, 2, 5, 3, 4, 1, 3, 3, 2, 4, 2, 2, 2, 4, 3, 3,
3, 2, 5, 5, 4, 2, 2, 2, 3, 1, 3, 2, 1, 2, 4, 2), Q25_local_area = c(3,
3, 3, 5, 3, 2, 4, 2, 4, 2, 4, 3, 2, 3, 2, 5, 4, 5, 1, 4, 3, 3,
4, 2, 3, 2, 3, 3, 2, 3, 2, 5, 5, 2, 2, 2, 2, 3, 1, 1, 2, 1, 2,
4, 3), Q25_uk = c(4, 3, 3, 5, 2, 3, 5, 2, 4, 2, 4, 3, 3, 3, 3,
5, 4, 5, 2, 3, 3, 2, 4, 2, 4, 3, 4, 3, 2, 4, 4, 5, 5, 4, 3, 3,
2, 4, 2, 5, 2, 2, 2, 3, 3), Q25_outside_uk = c(4, 4, 3, 5, 4,
4, 5, 2, 4, 3, 3, 3, 3, 4, 3, 5, 4, 5, 4, 3, 3, 2, 4, 2, 5, 3,
3, 2, 2, 3, 4, 4, 5, 4, 4, 3, 2, 4, 4, 5, 2, 3, 2, 2, 2)), row.names = c(NA,
-45L), class = c("tbl_df", "tbl", "data.frame"))
# load libraries:
require(tidyverse)
require(likert)
# Q25 - generate diverging stacked bar chart using likert()
q25_data <- select(climate_experience_data, Q25_self_and_family:Q25_outside_uk)
names(q25_data) <- c("You and your family in the UK", "People in your local area or city", "The UK as a whole", "Your family and/or friends living outside the UK")
# Set up levels text for question responses
q25_levels <- paste(c("not at all", "somewhat", "moderately", "very", "extremely"),
"serious")
q25_likert_table <- q25_data %>%
mutate(across(everything(),
factor, ordered = TRUE, levels = 1:5, labels=q25_levels)) %>%
as.data.frame %>%
# make plot:
plot(q25_likert_table, wrap=20, text.size=3, ordered=FALSE, low.color='#B18839', high.color='#590048') +
ggtitle(title) +
labs(title = "How serious a threat do you think \nclimate change poses to the following?", y="") +
guides(fill = guide_legend(title = NULL)) +
theme_ipsum_rc() +
theme()
Here's a sample of output:
As your plot is still a ggplot object you could adjust the size of the y axis labels via theme(axis.text.y = ...):
library(tidyverse)
library(likert)
library(hrbrthemes)
q25_likert_table <- q25_data %>%
mutate(across(everything(),
factor,
ordered = TRUE, levels = 1:5, labels = q25_levels
)) %>%
as.data.frame() %>%
likert()
plot(q25_likert_table, wrap = 20, text.size = 3, ordered = FALSE, low.color = "#B18839", high.color = "#590048") +
ggtitle(title) +
labs(title = "How serious a threat do you think \nclimate change poses to the following?", y = "") +
guides(fill = guide_legend(title = NULL)) +
theme_ipsum_rc() +
theme(axis.text.y = element_text(size = 4))

How to group mean center several columns at the same time?

I have no prolem doing group-mean centering for one variable at a time but how can I do that for multiple variables at the same time?
library(misty)
x<-(c(1, 2, 3, 4, 3, 1))
y<-(c(1, NA, 3, 5, 3, 2))
group<-as.factor(c(1, 2, 2, 1, 2, 1))
mydata<-data.frame(x, y, group)
mydata<-mydata%>% mutate(x_cwc = center(mydata$x, type="CWC", group=mydata$group))
mydata<-mydata%>% mutate(y_cwc = center(mydata$y, type="CWC", group=mydata$group))

How to bar plot answers per category in ggplot?

I have a tibble created like this:
tibble(district = c(1, 5, 3, 5, 2, 7, 8, 1, 1, 2, 2, 4, 5, 6, 8, 6, 3),
housing = c(1, 1, 2, 1, 2, 2, 2, 1, 1, 2, 3, 2, 1, 1, 1, 3, 2))
Now I would like to know how the type of housing is distributed per district. Since the amount of respondents per district is different, I would like to work with percentages. Basically I'm looking for two plots;
1) One barplot in which the percentage of housing categories is visualized in 1 bar per district (since it is percentages all the bars would be of equal height).
2) A pie chart for every district, with the percentage of housing categories for that specific district.
I am however unable to group the data is the wished way, let along compute percentages of them. How to make those plots?
Thanks ahead!
Give this a shot:
library(tidyverse)
library(ggplot2)
# original data
df <- data.frame(district = c(1, 5, 3, 5, 2, 7, 8, 1, 1, 2, 2, 4, 5, 6, 8, 6, 3),
housing = c(1, 1, 2, 1, 2, 2, 2, 1, 1, 2, 3, 2, 1, 1, 1, 3, 2))
# group by district
df <- df %>%
group_by(district) %>%
summarise(housing=sum(housing))
# make percentages
df <- df %>%
mutate(housing_percentage=housing/sum(df$housing)) %>%
mutate(district=as.character(district)) %>%
mutate(housing_percentage=round(housing_percentage,2))
# bar graph
ggplot(data=df) +
geom_col(aes(x=district, y=housing_percentage))
# pie chart
ggplot(data=df, aes(x='',y=housing_percentage, fill=district)) +
geom_bar(width = 1, stat = "identity", color = "white") +
coord_polar("y", start = 0) +
theme_void()
Which yields the following plots:

How to update a list in a for loop(cannot store ggplot object into the list) [duplicate]

My problem is similar to this one; when I generate plot objects (in this case histograms) in a loop, seems that all of them become overwritten by the most recent plot.
To debug, within the loop, I am printing the index and the generated plot, both of which appear correctly. But when I look at the plots stored in the list, they are all identical except for the label.
(I'm using multiplot to make a composite image, but you get same outcome if you print (myplots[[1]])
through print(myplots[[4]]) one at a time.)
Because I already have an attached dataframe (unlike the poster of the similar problem), I am not sure how to solve the problem.
(btw, column classes are factor in the original dataset I am approximating here, but same problem occurs if they are integer)
Here is a reproducible example:
library(ggplot2)
source("http://peterhaschke.com/Code/multiplot.R") #load multiplot function
#make sample data
col1 <- c(2, 4, 1, 2, 5, 1, 2, 0, 1, 4, 4, 3, 5, 2, 4, 3, 3, 6, 5, 3, 6, 4, 3, 4, 4, 3, 4,
2, 4, 3, 3, 5, 3, 5, 5, 0, 0, 3, 3, 6, 5, 4, 4, 1, 3, 3, 2, 0, 5, 3, 6, 6, 2, 3,
3, 1, 5, 3, 4, 6)
col2 <- c(2, 4, 4, 0, 4, 4, 4, 4, 1, 4, 4, 3, 5, 0, 4, 5, 3, 6, 5, 3, 6, 4, 4, 2, 4, 4, 4,
1, 1, 2, 2, 3, 3, 5, 0, 3, 4, 2, 4, 5, 5, 4, 4, 2, 3, 5, 2, 6, 5, 2, 4, 6, 3, 3,
3, 1, 4, 3, 5, 4)
col3 <- c(2, 5, 4, 1, 4, 2, 3, 0, 1, 3, 4, 2, 5, 1, 4, 3, 4, 6, 3, 4, 6, 4, 1, 3, 5, 4, 3,
2, 1, 3, 2, 2, 2, 4, 0, 1, 4, 4, 3, 5, 3, 2, 5, 2, 3, 3, 4, 2, 4, 2, 4, 5, 1, 3,
3, 3, 4, 3, 5, 4)
col4 <- c(2, 5, 2, 1, 4, 1, 3, 4, 1, 3, 5, 2, 4, 3, 5, 3, 4, 6, 3, 4, 6, 4, 3, 2, 5, 5, 4,
2, 3, 2, 2, 3, 3, 4, 0, 1, 4, 3, 3, 5, 4, 4, 4, 3, 3, 5, 4, 3, 5, 3, 6, 6, 4, 2,
3, 3, 4, 4, 4, 6)
data2 <- data.frame(col1,col2,col3,col4)
data2[,1:4] <- lapply(data2[,1:4], as.factor)
colnames(data2)<- c("A","B","C", "D")
#generate plots
myplots <- list() # new empty list
for (i in 1:4) {
p1 <- ggplot(data=data.frame(data2),aes(x=data2[ ,i]))+
geom_histogram(fill="lightgreen") +
xlab(colnames(data2)[ i])
print(i)
print(p1)
myplots[[i]] <- p1 # add each plot into plot list
}
multiplot(plotlist = myplots, cols = 4)
When I look at a summary of a plot object in the plot list, this is what I see
> summary(myplots[[1]])
data: A, B, C, D [60x4]
mapping: x = data2[, i]
faceting: facet_null()
-----------------------------------
geom_histogram: fill = lightgreen
stat_bin:
position_stack: (width = NULL, height = NULL)
I think that mapping: x = data2[, i] is the problem, but I am stumped! I can't post images, so you'll need to run my example and look at the graphs if my explanation of the problem is confusing.
Thanks!
In addition to the other excellent answer, here’s a solution that uses “normal”-looking evaluation rather than eval. Since for loops have no separate variable scope (i.e. they are performed in the current environment) we need to use local to wrap the for block; in addition, we need to make i a local variable — which we can do by re-assigning it to its own name1:
myplots <- vector('list', ncol(data2))
for (i in seq_along(data2)) {
message(i)
myplots[[i]] <- local({
i <- i
p1 <- ggplot(data2, aes(x = data2[[i]])) +
geom_histogram(fill = "lightgreen") +
xlab(colnames(data2)[i])
print(p1)
})
}
However, an altogether cleaner way is to forego the for loop entirely and use list functions to build the result. This works in several possible ways. The following is the easiest in my opinion:
plot_data_column = function (data, column) {
ggplot(data, aes_string(x = column)) +
geom_histogram(fill = "lightgreen") +
xlab(column)
}
myplots <- lapply(colnames(data2), plot_data_column, data = data2)
This has several advantages: it’s simpler, and it won’t clutter the environment (with the loop variable i).
1 This might seem confusing: why does i <- i have any effect at all? — Because by performing the assignment we create a new, local variable with the same name as the variable in the outer scope. We could equally have used a different name, e.g. local_i <- i.
Because of all the quoting of expressions that get passed around, the i that is evaluated at the end of the loop is whatever i happens to be at that time, which is its final value. You can get around this by eval(substitute(ing in the right value during each iteration.
myplots <- list() # new empty list
for (i in 1:4) {
p1 <- eval(substitute(
ggplot(data=data.frame(data2),aes(x=data2[ ,i]))+
geom_histogram(fill="lightgreen") +
xlab(colnames(data2)[ i])
,list(i = i)))
print(i)
print(p1)
myplots[[i]] <- p1 # add each plot into plot list
}
multiplot(plotlist = myplots, cols = 4)
Using lapply works too as x exists within the anonymous function environment (using mtcars as data):
plot <- lapply(seq_len(ncol(mtcars)), FUN = function(x) {
ggplot(data = mtcars) +
geom_line(aes(x = mpg, y = mtcars[ , x]), size = 1.4, color = "midnightblue", inherit.aes = FALSE) +
labs(x="Date", y="Value", title = "Revisions 1M", subtitle = colnames(mtcars)[x]) +
theme_wsj() +
scale_colour_wsj("colors6")
})
I have run the code in the question and in the answer, changing geom_histogram to geom_bar to avoid the error: Error: StatBin requires a continuous x variable.
Here is the code with the visualizations:
Question
#generate plots
myplots <- list() # new empty list
for (i in 1:4) {
p1 <- ggplot(data=data.frame(data2),aes(x=data2[ ,i]))+
geom_bar(fill="lightgreen") +
xlab(colnames(data2)[ i])
print(i)
print(p1)
myplots[[i]] <- p1 # add each plot into plot list
}
multiplot(plotlist = myplots, cols = 4)
#> Loading required package: grid
Answer
myplots <- vector('list', ncol(data2))
for (i in seq_along(data2)) {
message(i)
myplots[[i]] <- local({
i <- i
p1 <- ggplot(data2, aes(x = data2[[i]])) +
geom_bar(fill = "lightgreen") +
xlab(colnames(data2)[i])
print(p1)
})
}
multiplot(plotlist = myplots, cols = 4)
Same result using lapply:
plot_data_column = function (data, column) {
ggplot(data, aes_string(x = column)) +
geom_bar(fill = "lightgreen") +
xlab(column)
}
myplots <- lapply(colnames(data2), plot_data_column, data = data2)
multiplot(plotlist = myplots, cols = 4)
#> Loading required package: grid
Created on 2021-04-09 by the reprex package (v0.3.0)

Resources