Likert Package - Plot Percents [duplicate] - r

I've created some charts using the Likert package, however when I create plots by groups the plot.percents = TRUE won't give me the labels for each response category. The plot.percents.high =TRUE and plot.percents.low = TRUE gives me the combined percentage, however I want it for all of the response categories. It works fine with the ungrouped data. The code I`m using is:
Make some data
library(likert)
library (reshape)
Group <- c("Group 1", "Group 1", "Group 1", "Group 1", "Group 1", "Group 1", "Group 1", "Group 2", "Group 2", "Group 2", "Group 2", "Group 2",
"Group 2","Group 2", "Group 3", "Group 3", "Group 3", "Group 3","Group 3","Group 3","Group 3")
Var1 <- c("Agree", "Agree", "Strongly agree", "Agree", "Strongly disagree", "Agree","Strongly agree", "Disagree", "Strongly agree",
"Strongly agree", "Agree", "Disagree", "Agree", "Strongly disagree", "Agree", "Agree", "Agree", "Disagree", "Strongly agree",
"Strongly disagree", "Strongly agree")
df <- as.data.frame (cbind(Group, Var1))
Variable <- c("Var1")
df2 <- (df[Variable])
likert.df <- likert (df2)
likert.df.group <- likert (df2, grouping=df$Group)
likert.df is the responses for all, likert.df.group is the responses for each group. When I run the plot (below) with just likert.df, I get the percentages for each response, when I run it for likert.df.group, they disappear.
likert.bar.plot(likert.df, low.color = "#007CC2",
high.color = "#F7971C", neutral.color = "grey90",
neutral.color.ramp = "white", plot.percent.low = FALSE,
plot.percent.high = FALSE, plot.percent.neutral = FALSE,
plot.percents = TRUE, text.size = 4,
text.color = "black", centered = FALSE,
include.center = FALSE, ordered = FALSE,
wrap.grouping = 50, legend = "Response",
legend.position = "bottom", panel.arrange = "v",
panel.strip.color = "grey90")+
ggtitle("Chart Title") +
theme (panel.background = element_rect(fill="NA")) +
theme (axis.text.y = element_text (colour="black", size="10", hjust=0))+
theme (axis.text.x = element_text (colour="black", size="10")) +
theme (legend.title = element_blank())+
theme (plot.margin = unit (c(0,0,0,0),"mm"))
Am I missing something?

According to the function source, printing of plot.percents is not currently supported for grouped analysis. See https://github.com/jbryer/likert/blob/master/R/plot.likert.bar.r#L174
There's a slight problem with the package code, which is easy to fix (unless I am overlooking something else).
On line 175 https://github.com/jbryer/likert/blob/master/R/plot.likert.bar.r#L175 change:
# lpercentpos <- ddply(results[results$value > 0,], .(Item), transform,
lpercentpos <- ddply(results[results$value > 0,], .(Group, Item), transform,
on line 177 https://github.com/jbryer/likert/blob/master/R/plot.likert.bar.r#L177 change:
# p + geom_text(data=lpercentpos, aes(x=Group, y=pos, label=paste0(round(value), '%'),
p <- p + geom_text(data=lpercentpos, aes(x=Group, y=pos, label=paste0(round(value), '%'),
and on line 184 https://github.com/jbryer/likert/blob/master/R/plot.likert.bar.r#L184 change:
# lpercentneg <- ddply(lpercentneg, .(Item), transform,
lpercentneg <- ddply(lpercentneg, .(Group, Item), transform,
Then uncomment this section and remove FALSE from the if statement
# if(FALSE & plot.percents) { #TODO: implement for grouping
if(plot.percents) {
Here's the snippet which goes inside the if statement:
# if(FALSE & plot.percents) { #TODO: implement for grouping
if(plot.percents) {
# warning('plot.percents is not currenlty supported for grouped analysis.')
lpercentpos <- ddply(results[results$value > 0,], .(Group, Item), transform,
pos = cumsum(value) - 0.5*value)
p <- p + geom_text(data=lpercentpos, aes(x=Group, y=pos, label=paste0(round(value), '%'),
group=Item), size=text.size)
lpercentneg <- results[results$value < 0,]
if(nrow(lpercentneg) > 0) {
lpercentneg <- lpercentneg[nrow(lpercentneg):1,]
lpercentneg$value <- abs(lpercentneg$value)
lpercentneg <- ddply(lpercentneg, .(Group, Item), transform,
pos = cumsum(value) - 0.5*value)
lpercentneg$pos <- lpercentneg$pos * -1
p <- p + geom_text(data=lpercentneg, aes(x=Item, y=pos, label=paste0(round(abs(value)), '%')),
size=text.size)
}
}
I haven't done much testing, but your test data works fine and produces this output:
I fixed this issue and submitted a pull request to Jason. In the meantime you can pull the changes from here: https://github.com/aseidlitz/likert

I wrote a little add-on based off the source code, if you don't want to bother modding the source material. Just takes the answer above and applies it. Shouldn't be too hard to put into a user function if you make a lot of graphs with it. I have been doing some work trying to get the percents added and then figure a way to add the N's somewhere on the graph.
library(likert)
library(reshape)
library(plyr)
#--------------- Works using likert package, problems with the modded source code)
rm(list=ls(all=T))
# ---------------- Example Data -------------------- #
likert.responses <- c("Agree", "Neutral", "Strongly agree", "Disagree", "Strongly disagree", NA)
questions <- c("Q_1","Q_2","Q_3")
groupA <- c("White", "Afr. American", "Hispanic", "Other")
set.seed(12345)
mydata <- data.frame(
race = sample(groupA, 100, replace=T, prob=c(.3,.3,.3,.01)),
Q_1 = sample(likert.responses, 100, replace=T, prob=c(.2,.2,.2,.2,.19,.01)),
Q_2 = sample(likert.responses, 100, replace=T, prob=c(.1,.2,.2,.29,.2, .01)),
Q_3 = sample(likert.responses, 100, replace=T, prob=c(.4,.2,.09,.15,.15,.01))
)
mydata.que <- mydata[questions]
mydata.que[] <- lapply(mydata.que, factor,
levels=c("Strongly disagree", "Disagree", "Neutral", "Agree","Strongly agree"))
mydata.1 <- likert(mydata.que)
mydata.group <- likert(mydata.que, grouping=mydata$race)
p <- plot(mydata.group, centered=F, # This controls stacked versus the "centered" option
ordered=F,
plot.percents = TRUE
) + ggtitle("Likert Test")
# --- Gets the percentages from the likert object -- #
results <- mydata.group$results
results <- reshape::melt(results, id=c('Group', 'Item'))
results$variable <- factor(results$variable, ordered=TRUE)
lpercentpos <- ddply(results[results$value > 0,], .(Group, Item), transform,
pos = cumsum(value) - 0.5*value)
lpercentpos <- subset(lpercentpos, variable != "Neutral" & value != 100 & value != 0)
# -- Double checking percents are right -- #
prop.table(table(mydata$race, mydata$Q_1),1)
pworks <- p + geom_text(data=lpercentpos, aes(x=Group, y=pos, label=paste0(round(value), '%'),
group=Item),
size=3)
pworks
# --- Using the OP's code --- #
p <- plot(likert.df.group, centered=F, # This controls stacked versus the "centered" option
ordered=F,
plot.percents = TRUE
) + ggtitle("Likert Test")
results <- likert.df.group$results
results <- reshape::melt(results, id=c('Group', 'Item'))
results$variable <- factor(results$variable, ordered=TRUE)
lpercentpos <- ddply(results[results$value > 0,], .(Group, Item), transform,
pos = cumsum(value) - 0.5*value)
lpercentpos <- subset(lpercentpos, variable != "Neutral" & value != 100 & value != 0)
prop.table(table(likert.df.group$race, likert.df.group$Q_1),1)
pworks <- p + geom_text(data=lpercentpos, aes(x=Group, y=pos, label=paste0(round(value), '%'),
group=Item),
size=3)
pworks

Even the example script that's included in the likert package documentation using the pisaitems data will not graph correctly the percent labels. It ends up looking like the image below when you run this code.
require(likert)
data(pisaitems)
##### Item 29: How often do you read these materials because you want to?
title <- "How often do you read these materials because you want to?"
items29 <- pisaitems[,substr(names(pisaitems), 1,5) == 'ST25Q']
head(items29); ncol(items29)
names(items29) = c("Magazines", "Comic books", "Fiction", "Non-fiction books", "Newspapers")
l29g <- likert(items29, grouping=pisaitems$CNT)
# Plots
plot(l29g, plot.percents=TRUE, plot.percent.low=FALSE,
plot.percent.high=FALSE, plot.percent.neutral=FALSE) +
ggtitle(title)

Hey I tried it out and it doesn't work for me either using the grouping data. There is no mention of why despite plot.percent.low and plot.percent.high working fine. Unless someone else cracks it all I can do is offer a workaround using plot() instead of likert.bar.plot and text()
Here I label the Agree category only for all three groups.
plot(likert.df.group, type="bar")
text(c(0.35,0.35,0.35), c(0.85,0.6,0.25),
labels = paste0(c(42.8,28.57,42.85),"%") )

Related

100% stacked bar chat

I'm new to R and trying to get this data in a 100% Stacked Bar chart in R to look like this
The data looks like this
ccEFFECT <- data$Q7_1
ccEFFECT [ccEFFECT == -99] <- NA
ccEFFECTS<- factor(ccEFFECT , labels = c("Strongly Disagree", "Disagree", "Neither Agree nor Disagree", "Agree", "Strongly Agree"))
levels(ccEFFECTS )
str(ccEFFECTS )
summary (ccEFFECTS )
length(na.omit(ccEFFECTS ))
length(ccEFFECTS )
ccEFFECTfrequency <- table (ccEFFECTS ) #frequency
ccEFFECTfrequency
#percentages
ccEFFECT_PERCENTAGE=prop.table(table(ccEFFECTS)) * 100
ccEFFECT_PERCENTAGE
barplot(ccEFFECT_PERCENTAGE)
Q2EFFECT<- data$Q7_2
Q2EFFECT [Q2EFFECT == -99] <- NA
Q2EFFECTS<- factor(Q2EFFECT , labels = c("Strongly Disagree", "Disagree", "Neither Agree nor Disagree", "Agree", "Strongly Agree"))
levels(Q2EFFECTS )# how many levels of a categorical variable
str(Q2EFFECTS )
summary (Q2EFFECTS )
length(na.omit(Q2EFFECTS ))
length(Q2EFFECTS )
Q2EFFECTfrequency <- table (Q2EFFECTS ) #frequency
Q2EFFECTfrequency
#percentages
Q2EFFECT_PERCENTAGE=prop.table(table(Q2EFFECTS)) * 100
Q2EFFECT_PERCENTAGE
barplot(Q2EFFECT_PERCENTAGE)
Any suggestions.
Let dummy is data you give as picture. Then,
dummy <- dummy %>% filter(Q71 != -99)
colnames(dummy) <- c("concern about home price", "concern about jobs", "concern about unemployment","concern about importation", "concern about inflation")
dummy %>%
reshape2::melt(value.name = "response",
measure.var = c("concern about home price",
"concern about jobs",
"concern about unemployment",
"concern about importation",
"concern about inflation")) %>%
group_by(variable, response) %>%
summarise(n = n()/ 29) %>%
ungroup %>%
mutate(response = factor(response, labels = c("Strongly Disagree", "Disagree", "Neither Agree nor Disagree", "Agree", "Strongly Agree"), ordered = T)) %>%
ggplot(aes(fill = response, y = n, x = variable)) +
geom_bar(position = "fill", stat = "identity", width = 0.2) +
coord_flip() + scale_fill_manual(values = c("steelblue", "yellow", "grey", "orange", "darkblue")) +
theme_minimal()
result is like

Is it possible to subset facets in a polyfreq in GGplot?

I was wondering if it was possible to use subset on a geom_polyfreq()?
I am running a topic model and in order to report the facets properly i want to remove 4 out of 10 facets.
My code is as follows:
ggplot(data = dat,
aes(x = date,
fill = Topics)) +
geom_freqpoly(binwidth = 3) +
labs(x = "",
y = "Topic Count",
title = "Mentions of Topic On a Monthly Basis")+
scale_x_date(date_breaks = "months", date_labels="%b")+
theme(text = element_text(size=8)) +
theme(axis.text.x = element_text(angle = 45))+
facet_wrap(Topics ~ ., scales = "free")
> ggplot(subset(dat, Topics %in% c(3, 4, 5, 7, 8, 9)),
aes(x = date,
fill = topic)) +
geom_freqpoly(binwidth = 3) +
labs(x = "",
y = "Topic Count",
title = "Mentions of Topic On a Monthly Basis")+
scale_x_date(date_breaks = "months", date_labels="%b")+
theme(text = element_text(size=9)) +
theme(axis.text.x = element_text(angle = 45))+
facet_wrap(Topics ~ ., scales = "free")
However, when I try to subset the data, I get an error that says:
Fejl: Faceting variables must have at least one value
Does anybody know what the issue is?
I hope this makes sense.
The full code is down below.
article.data <- article.data[!is.na(article.data$fulltext), ]
## Get date
article.data$date <- as.Date(article.data$date, "%Y-%m-%d")
#all of 2018
dat <- article.data[article.data$date > as.Date("2018-01-01", "%Y-%m-%d") &
article.data$date < as.Date("2018-12-01", "%Y-%m-%d"), ]
## 'tokenize' fulltext
quanteda_options("language_stemmer" = "danish")
texts <- gsub(":", " ", dat$fulltext, fixed = T)
texts <- tokens(texts, what = "word",
remove_numbers = T,
remove_punct = T,
remove_symbols = T,
remove_separators = T,
remove_hyphens = T,
remove_url = T,
verbose = T)
texts <- tokens_tolower(texts)
texts <- tokens_remove(texts, stopwords("danish"))
texts <- tokens_wordstem(texts)
texts <- tokens_remove(texts, stopwords("danish"))
# get actual dfm from tokens
txt.mat <- dfm(texts)
#remove frequent words with no substance
txt.mat <- txt.mat %>% dfm_remove(c("ad",
"af","aldrig","alene","alle",
"allerede","alligevel","alt",
"altid","anden","andet","andre",
"at","bag","bare", "bedre", "begge","bl.a.",
"blandt", "blev", "blevet", "blive","bliver",
"burde", "bør","ca.", "com", "da",
"dag", "dansk", "danske", "de",
"dem", "den", "denne","dens",
"der","derefter","deres","derfor",
"derfra","deri","dermed","derpå",
"derved","det","dette","dig",
"din","dine","disse","dit",
"dog","du","efter","egen",
"ej","eller","ellers","en",
"end","endnu","ene","eneste","enhver","ens",
"enten","er","et","f.eks.","far","fem",
"fik","fire","flere","flest",
"fleste","for", "foran",
"fordi","forrige","fra", "fx",
"få","får","før","først",
"gennem","gjorde","gjort","god",
"godt","gør","gøre","gørende",
"ham","han","hans","har",
"havde","have","hej","hel",
"heller","helt","hen","hende",
"hendes","henover","her",
"herefter","heri","hermed",
"herpå","hos","hun","hvad",
"hvem","hver","hvilke","hvilken",
"hvilkes","hvis",
"hvor", "hvordan","hvorefter","hvorfor",
"hvorfra","hvorhen","hvori","hvorimod",
"hvornår","hvorved","i", "ifølge", "igen",
"igennem","ikke","imellem","imens",
"imod","ind","indtil","ingen",
"intet","ja","jeg","jer","jeres",
"jo","kan","kom","komme",
"kommer", "kroner", "kun","kunne","lad",
"langs", "lang", "langt", "lav","lave","lavet",
"lidt","lige","ligesom","lille",
"længere","man","mand","mange",
"med","meget","mellem","men", "mener",
"mens","mere","mest","mig",
"min","mindre","mindst","mine",
"mit","mod","må","måske",
"ned","nej","nemlig","ni",
"nogen","nogensinde","noget",
"nogle","nok","nu","ny", "nye",
"nyt","når","nær","næste",
"næsten","og","også","okay",
"om","omkring","op","os",
"otte","over","overalt","pga.", "partier",
"partiets", "partiers", "politiske",
"procent", "på", "ritzau", "samme",
"sammen","se","seks","selv","selvom",
"senere","ser","ses","siden","sig",
"sige", "siger", "sin","sine","sit",
"skal","skulle","som","stadig",
"stor","store","synes","syntes",
"syv","så","sådan","således",
"tag","tage","temmelig","thi",
"ti","tidligere","til","tilbage",
"tit","to","tre","ud","uden",
"udover","under","undtagen","var",
"ved","vi","via","vil","ville", "viser",
"vor","vore","vores","vær","være",
"været","øvrigt","facebook","http", "https",
"www","millioner", "frem", "lars", "lars_løkke",
"rasmussen", "løkke_rasmussen", "statsminister", "politik",
"formand", "partiet", "år", "tid", "and", "fler",
"sid", "regeringen", "giv", "politisk", "folketing", "mer",
"ifølg"))
############################################################
## FEATURE SELECTION
############################################################
# check out top-appearing features in dfm
topfeatures(txt.mat)
# keep features (words) appearing in >2 documents
txt.mat <- dfm_trim(txt.mat, min_termfreq = 4)
# filter out one-character words
txt.mat <- txt.mat[, str_length(colnames(txt.mat)) > 2]
# filter out some html trash features
#txt.mat <- txt.mat[, !grepl("[[:digit:]]+px", colnames(txt.mat))]
#txt.mat <- txt.mat[, !grepl(".", colnames(txt.mat), fixed = T)]
#txt.mat <- txt.mat[, !grepl("_", colnames(txt.mat), fixed = T)]
#txt.mat <- txt.mat[, !grepl("#", colnames(txt.mat), fixed = T)]
#txt.mat <- txt.mat[, !grepl("#", colnames(txt.mat), fixed = T)]
############################################################
## SELECT FEATURES BY TF-IDF
############################################################
# Create tf_idf-weighted dfm
ti <- dfm_tfidf(txt.mat)
# Select from main dfm using its top features
txt.mat <- dfm_keep(txt.mat, names(topfeatures(ti, n = 1000)))
############################################################
## RUN TOPIC MODEL
############################################################
# convert quanteda dfm to tm 'dtm'
dtm <- convert(txt.mat, to = "topicmodels")
# run lda with 8 topics
lda <- LDA(dtm, k = 8)
# review terms by topic
terms(lda, 10)
############################################################
## LOOK FOR 'OPTIMAL' k
############################################################
# randomly sample test data
set.seed(61218)
select <- sample(1:nrow(dtm), size = 100)
test <- dtm[select, ]
train <- dtm[!(1:nrow(dtm) %in% select), ]
n.tops <- 3:14
metrics <- data.frame(topics = n.tops,
perplexity = NA)
for(i in n.tops) { # NB: takes awhile to run
print(i)
est <- LDA(train, k = i)
metrics[(i - 1), "perplexity"] <- perplexity(est, newdata = test)
}
save(metrics, file = "lda_perplexity2018.RData")
qplot(data = metrics, x = topics, y = perplexity, geom = "line",
xlab = "Number of topics",
ylab = "Perplexity on test data") + theme_bw()
#We found that 8 topics was one of those of lowest perplexity but
#also the ones which made the most sense
############################################################
## RERUN WITH BETTER CHOICE OF k
############################################################
# run lda with 10 topics
lda <- LDA(dtm, k = 10)
save(lda, file = "dr_ft_keep2018.RData")
# examine output
terms(lda, 20)
# put topics into original data
dat$topic <- topics(lda)
# add labels
#dat$date <- factor(dat$date,
#levels = 1:12,
#labels = c("januar","februar", "marts","april", "maj", "juni", "juli", "august", "september", "oktober", "november", "decemeber"))
dat$Topics <- factor(dat$topic,
levels = 1:10,
labels = c("Topc 1", "Topic 2", "Integration", "Taxation", "Burka Prohibition",
"Topic 6", "Justice", "Foreign Affairs", "Housing", "Topic 10"))
# frequency
qplot(data = dat, x = Topics,
geom = "bar", xlab = "",
ylab = "Topic Frequency", fill=Topics, main = "Figure 1: Main Topics in 2018 - DR") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90))
#Make visualization showing topics over time
ggplot(data = dat,
aes(x = date,
fill = Topics[1])) +
geom_freqpoly(binwidth = 30) +
facet_wrap(Topics ~ ., scales = "free")+
theme_classic() +
scale_x_date(breaks = as.Date(c( "2018-02-01", "2018-04-01", "2018-06-01", "2018-08-01", "2018-10-01", "2018-12-01", date_labels="%B"))) +
theme(axis.text.x = element_text(angle = 90))
ggplot(data = dat,
aes(x = date,
fill = Topics)) +
geom_freqpoly(binwidth = 3) +
labs(x = "",
y = "Topic Count",
title = "Mentions of Topic On a Monthly Basis")+
scale_x_date(date_breaks = "months", date_labels="%b")+
theme(text = element_text(size=8)) +
theme(axis.text.x = element_text(angle = 45))+
facet_wrap(Topics ~ ., scales = "free")
It's best practice on this forum to make your question reproducible, so that others can try it and test their solutions to confirm they work. It's also good if you can make it minimal, both to respect potential answerers' time and to help clarify your own understanding of the problem.
How to make a great R reproducible example
In this case, the error message suggests that your subsetting is removing all your data, which breaks the faceting. It can't plot any facets if the faceting variable has no values.
It looks like dat$Topics is a factor, but your loop is referring to Topics like they're numeric with Topics %in% c(3, 4, 5, 7, 8, 9). For example, I could define a factor vector with the same levels as your Topics variable:
Topics <- factor(1:10, levels = 1:10,
labels = c("Topc 1", "Topic 2", "Integration", "Taxation", "Burka Prohibition",
"Topic 6", "Justice", "Foreign Affairs", "Housing", "Topic 10"))
Compare the output of these three lines:
Topics %in% c(1, 2)
# [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
as.numeric(Topics) %in% c(1, 2)
# [1] TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
Topics %in% c("Topc 1", "Topic 2")
# [1] TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
In the top case, none of the data matches the test, so using that to subset the data would give you an empty data set and seems like a plausible cause for the error you got.
To refer to the Topics by their underlying level, we can refer to as.numeric(Topics) %in% c(1, 2). If we want to refer to the Topics by their labels, I could use Topics %in% c("Topc 1", "Topic 2").
Since I don't have your data, I can't confirm this exact syntax will work for you, but I hope something along these lines will.
For more on how to work with factors in R, I recommend: https://r4ds.had.co.nz/factors.html

Error when using Unicode characters to display mathematical operators in a mosaic plot

For example, this works:
library("vcd")
library("vcdExtra")
ppp1 <- Arthritis[,-1]
ppp1 <- ppp1 [,-3]
colnames(ppp1)[1] <- "Dose"
colnames(ppp1)[3] <- "Symptoms"
ppp1$Symptoms <- ordered(ppp1$Symptoms,
levels = c("None", "Some", "Marked"),
labels = c(">2", "1-2", "<1"))
ppp1$Dose <- ordered(ppp1$Dose,
levels = c("Placebo","Treated"),
labels = c("<= 1", ">=2"))
tab1 <- xtabs(~Symptoms+Sex+Dose, data=ppp1)
mosaic(tab1)
But this produces error:
ppp2<- Arthritis[,-1]
ppp2 <- ppp2 [,-3]
colnames(ppp2)[1] <- "Dose"
colnames(ppp2)[3] <- "Symptoms"
ppp2$Symptoms <- ordered(ppp2$Symptoms,
levels = c("None", "Some", "Marked"),
labels = c(">2", "1-2", "<1"))
ppp2$Dose <- ordered(ppp2$Dose,
levels = c("Placebo","Treated"),
labels = c("\u2264 1", "\u2265 2"))
tab2 <- xtabs(~Symptoms+Sex+Dose, data=ppp2)
mosaic(tab2)
Error in grid.Call.graphics(C_downviewport, name$name, strict) :
Viewport 'cell:Symptoms=<1,Sex=Male,Dose=≥ 2' was not found
Any ideas would be much appreciated.
Here is a solution based on the package ggmosaic.
Important: ggmosaic works fine with the CRAN version of ggplot2 which can be installed using install.packages("ggplot2")
library(vcd)
library(vcdExtra)
ppp2 <- Arthritis[,-c(1,4)]
names(ppp2) <- c("Dose", "Sex", "Symptoms")
ppp2$Symptoms <- ordered(ppp2$Symptoms, levels=c("None","Some","Marked"),
labels=c(">2", "1-2", "<1"))
ppp2$Dose <- ordered(ppp2$Dose, levels = c("Placebo","Treated"),
labels = c("<= 1", ">= 2"))
# Reverse the order of levels for Dose and Symptoms
ppp2$Symptoms <- factor(ppp2$Symptoms, levels=rev(levels(ppp2$Symptoms)), order=T)
ppp2$Dose <- factor(ppp2$Dose, levels=rev(levels(ppp2$Dose)), order=T)
# Plot mosaic using geom_mosaic from ggmosaic
library(ggplot2)
p <- ggplot(data=ppp2) +
geom_mosaic(aes(x=product(Dose, Sex, Symptoms), fill=Sex), offset=.03) +
coord_flip() + xlab("Symptoms; Dose")
# Get grobs
gt <- ggplot_gtable(ggplot_build(p))
gt$layout$clip[gt$layout$name == "panel"] <- "off"
# Modify ticks labels on y-axis
gt$grobs[[3]]$children[[2]]$grobs[[1]]$children[[1]]$label <-
c("\u2265 2; <1", "\u2264 1; <1",
"\u2265 2; 1-2", "\u2264 1; 1-2",
"\u2265 2; >2", "\u2264 1; >2")
# Plot the mosaic plot
library(grid)
grid.draw(gt)

Function for label variable before plotting in R

I hope, this question is not too easy for this forum (actually, I'm almost a bit embarrassed to ask this question here, but I'm struggeling with this small issue the whole day...)
I have dataframes look like the following:
df <- data.frame(runif(4),
c("po", "pr", "po", "pr"),
c("Control 1","Control 1", "Treatment 1", "Treatment 1"))
names(df) <- list("values", "test_type", "group")
Now, I want easliy re-label the variables "test_type" and "group" for the plot afterwards. (it's nicer to read "pretest" instead of "pr" in a presentation :-) )
I could do it manually with:
df$test_type <- factor(df$test_type,
levels = c("pr", "po"),
labels = c("pretest", "posttest"))
df$group <- factor(df$group,
levels = c("Control 1", "Treatment 1"),
labels = c("control", "EST"))
In this case, I would have to repeat this for a lot more dataframes, which lead me to write a function:
var_label <- function(df, test, groups){
# Create labels
df$test_type <- factor(df$test,
levels = c("pr", "po"),
labels = c("pretest", "posttest"))
df$group <- factor(df$groups,
levels = c("Control 1", "Treatment 1"),
labels = c("control", "EST"))
return(list(df$test_type, df$group))
}
Unfortunately, this doesn't work. I tried a lot slight different versions and also different command from the Hmisc package, but none of these worked. I know, I can solve this problem in another way, but I try to write more efficient and shorter codes and would be really interested, what I have to change to make this function work. Or even better do you have a suggestion for a more efficient way?
Thank you a lot in advance!!
As I mentioned above, I think forcats::fct_relabel() is what you want here, along with dplyr::mutate_at(). Assuming that your relabeling needs are no more complex than what has been outlined in your question, the following should get you what you appear to be looking for.
####BEGIN YOUR DATAFRAME CREATION####
df <- data.frame(runif(4),
c("po", "pr", "po", "pr"),
c("Control 1","Control 1", "Treatment 1", "Treatment 1"))
names(df) <- list("values", "test_type", "group")
#####END YOUR DATAFRAME CREATION#####
# Load dplyr and forcats
library(dplyr)
library(forcats)
# create a map of labels and levels based on your implied logic
# the setup is label = level
label_map <- c("pretest" = "pr"
,"posttest" = "po"
,"control" = "Control 1"
,"EST" = "Treatment 1")
# create a function to exploit the label map
fct_label_select <- function(x, map) {
names(which(map == x))
}
# create a function which is responsive to a character vector
# as required by fct_relabel
fct_relabeler <- function(x, map) {
unlist(lapply(x, fct_label_select, map = map))
}
fct_relabeler(levels(df$test_type), map = label_map)
# function to meet your apparent needs
var_label <- function(df, cols, map){
df %>%
mutate_at(.vars = cols
,.fun = fct_relabeler
,map = map)
}
var_label(df = df, cols = c("test_type", "group"), map = label_map)
# values test_type group
# 1 0.05159681 posttest control
# 2 0.89050323 pretest control
# 3 0.42988881 posttest EST
# 4 0.32012811 pretest EST

Visualization of ranked likert-scale using advanced dot plot

I wish to produce a graph presenting responses to a number of agreement statements. The graph should allow for comparisons across the different groups of respondents and statement items.
I basically draw on a R code provided by Kastellec & Leoni (Figure 5; http://tables2graphs.com/doku.php?id=03_descriptive_statistics#figure_5).
In contrast to them, I would like to have the x-axis ranging from -5 to 5 and the table should be in the format of 2x2.
This code should produce sample data similar to the one I used:
mydata<-expand.grid(
col1=c('item1', 'item2', 'item3', 'item4'),
col2=c('0', '1', '3', '4'),
col3=c('T1', 'T2', 'C1', 'C2'))
mydata$col4=sapply(rnorm(64,0,1), function(x) {round(x,2)})
Note:
- col1: statement item („variable“ in the data set of K&L)
- col2: answer category ("period" in K&L)
- col3: group of respondent ("legislature" in K&L)
- col4: proportion ("proportion" in K&L)
And this is the code:
library(lattice)
library(car)
ltheme <- canonical.theme(color = FALSE)
ltheme$strip.background$col <- "lightgrey"
lattice.options(default.theme = ltheme)
mydata$col2<-factor(mydata$col2,
levels=c(0,1,3,4),
labels=c("strongly disagree", "disagree", "agree", "strongly agree"), ordered=TRUE)
mydata$col3<-factor(mydata$col3,
levels=c("T1", "C1", "T2", "C2"), ordered=TRUE)
levels(mydata$col3)<-c("treatment group 1", "control group 1", "treatment group 2", "control group 2")
mydata$col1<-factor(mydata$col1,
levels=c("item1", "item2", "item3", "item4"),
labels=c("item 4", "item 3", "item 2", "item 1"), ordered=TRUE)
prop.vec<-c(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5)
plot<-dotplot(mydata$col1~mydata$col4|mydata$col3, xlab="levels of agreement",
data=mydata,
groups=mydata$col2,
layout=c(2,2),
scales=list(cex=0.65,
x=list(at=prop.vec),
alternating=3),
par.strip.text=list(lines=2.5,cex=0.65),
panel=function(...){
panel.abline(v=prop.vec, col="lightgrey")
panel.abline(h=1:11, col="lightgrey", lty=2)
panel.xyplot(...)},
as.table=TRUE,
par.settings=simpleTheme(pch=c(19,1,2,17), cex=0.7),
auto.key=list(space="bottom", column=4, cex=0.65)
)
trellis.device(file="figure.pdf", device="pdf", color=FALSE, width=6, height=8)
print(plot, newpage=FALSE)
dev.off()
Problem solved - I corrected the code so that it produces a nice plot presenting responses to statement agreements, as shown above. Feel free to further improve it, if you wish. Many many thanks for all your help!

Resources