Change each histogram color in chart - r

I have this histogram separated in five categories depending the age. The problem is that I cannot change the color depending the category. I tried to use the marker function with an array filled with the colors I want, but it didn't work as expected:
As you can see, the colors are all bugged.
This is what I tried:
less20 <- subset(dataset, dataset$EDAD <20)
between20n40 <- subset(dataset, dataset$EDAD >=20 & dataset$EDAD <40)
between40n60 <- subset(dataset, dataset$EDAD >=40 & dataset$EDAD <60)
between60n80 <- subset(dataset, dataset$EDAD >=60 & dataset$EDAD <80)
more80 <- subset(dataset, dataset$EDAD >=80)
plot_ly(alpha = 0.7, orientation = 'h', marker = list(color = c('rgba(31,119,180,1)','rgba(105,122,125,1)','rgba(183,124,67,1)', 'rgba(243,127,23,1)','rgba(255,127,14,1)'))) %>%
add_histogram(y = more80$EDAD, name = "More than 80") %>%
add_histogram(y = between60n80$EDAD, name = "Between 60 and 79") %>%
add_histogram(y = between40n60$EDAD, name = "Between 40 and 59") %>%
add_histogram(y = between20n40$EDAD, name = "Between 20 and 39") %>%
add_histogram(y = less20$EDAD, name = "Less than 20") %>%
layout(barmode = "group", title = "",orientation="h")
The correct color order is the next one:
However, I want to change those colors.
Any recomendations? Thanks in advance :)

I think it might be easier if you put the values along with their corresponding ranges inside a dataframe and color the plot using these values and ranges. This is my solution to this using ggplot2. You can define Values with your original dataset for the histogram and should obtain a similar result to yours. This solution uses a 1000 normally distributed sample with SD = 30 and MEAN = 70 in order to produce the plot.
# Import ggplot2
library("ggplot2")
# Obtain sample values for histogram
set.seed(1234)
Values = rnorm(n = 1000, mean = 70, sd = 30)
Range = c()
# Get ranges for each value in data
for(i in 1:length(Values)){
if(Values[i] >= 80){
Range[i] = "More than 80"
} else if (Values[i] < 80 & Values[i] >= 60){
Range[i] = "Between 60 and 79"
} else if (Values[i] < 60 & Values[i] >= 40){
Range[i] = "Between 40 and 59"
} else if (Values[i] < 40 & Values[i] >= 20){
Range[i] = "Between 20 and 39"
} else {
Range[i] = "Less than 20"
}
}
# Put all data inside a data frame
plot_dat = data.frame(Values, Range)
# Order plot labels
plot_dat$Range <- factor(plot_dat$Range, levels = c("More than 80", "Between 60 and 79", "Between 40 and 59", "Between 20 and 39", "Less than 20"))
# Produce plot
ggplot(plot_dat, aes(x=Values, fill=Range)) + geom_histogram(binwidth = 5) + coord_flip() + ggtitle("Sample Histogram")
Output

I just had to add the marker function inside add_histogram. That way, I only change the color of each histogram added.
plot_ly(alpha = 0.7, orientation = 'h') %>%
add_histogram(y = more80$EDAD, name = "More than 80", marker = list(color ='rgba(31,119,180,1)')) %>%
add_histogram(y = between60n80$EDAD, name = "Between 60 and 79", marker = list(color ='rgba(105,122,125,1)')) %>%
Thank you for your answers!

Related

How do I put a reactive subset of data into renderplot?

I am new to Shiny and have been trying to learn in my spare time. I have a dataframe of Fantasy Football statistics that I am trying to plot based on selectinput()'s and sliderbar()'s. I used renderprint() to ensure my inputs and correct when the slider's or selects are changed. I have the sliders and select inputs in a reactive() where I am simply subsetting the data. I am then feeding the reactive function into my ggplot() as the data. When trying to plot these graphs I am getting an "Error: object 'columnName' not found", but for only some columns. Please help me find the source of this issue.
Best, Davis
Here is the code:
######################################################################
#------------------------Load libraries------------------------------#
######################################################################
library(shiny)
library(bslib)
library(shinydashboardPlus)
library(ggplot2)
library(shinyWidgets)
######################################################################
#------------------------Data import and Clean-----------------------#
######################################################################
FantFootDF <- read_excel("~/Desktop/Fantasy/2021 Fantasy Stats.xltx")
FantFootDF <- as.data.frame(FantFootDF)
colnames(FantFootDF) <- paste(FantFootDF[1,])
FantFootDF <- FantFootDF[-1,]
colnames(FantFootDF) <- c("Rk","Player","Team","FantPos","Age",
"G","GS","Cmp","PAtt","PYds","PTD",
"Int","RuAtt","RuYds","RuYA","RuTD",
"Rec","ReYds","ReYA","ReTD","Fmb","FL",
"TTD","2PM","2PP","FantPt","PPR","DKPt",
"FDPt","VBD","PosRank","OvRank")
FantFootDF[!is.na(FantFootDF$FantPos),]
NumColumns <- c("Rk","Age",
"G","GS","Cmp","PAtt","PYds","PTD",
"Int","RuAtt","RuYds","RuYA","RuTD",
"Rec","ReYds","ReYA","ReTD","Fmb","FL",
"TTD","2PM","2PP","FantPt","PPR","DKPt",
"FDPt","VBD","PosRank","OvRank")
FantFootDF[NumColumns] <- lapply(FantFootDF[NumColumns], as.numeric)
FantFootDF[is.na(FantFootDF)] = 0
FinalDF <- FantFootDF
######################################################################
#------------------------User Interface------------------------------#
######################################################################
# Define UI for application that draws a histogram
ui <- fluidPage(
# Application title
titlePanel("Fantasy Football GUI"),
#Sidebar
sidebarLayout(
sidebarPanel(
pickerInput("position",
"Position(s)",
choices = unique(FinalDF$FantPos),
options = list(`actions-box` = TRUE),
multiple = T),
pickerInput("playername",
"Player Name",
choices = unique(FinalDF$Player),
options = list(`actions-box` = TRUE),
multiple = T),
pickerInput("team",
"Team",
choices = unique(FinalDF$Team),
options = list(`actions-box` = TRUE),
multiple = T),
sliderInput("age",
"Age",
min = min(FinalDF$Age),
max = max(FinalDF$Age),
value = c(min(FinalDF$Age), max(FinalDF$Age))),
sliderInput("completions",
"Completions",
min = min(FinalDF$Cmp),
max = max(FinalDF$Cmp),
value = c(min(FinalDF$Cmp), max(FinalDF$Cmp))),
sliderInput("Pattempts",
"Passing Attempts",
min = min(FinalDF$PAtt),
max = max(FinalDF$PAtt),
value = c(min(FinalDF$PAtt), max(FinalDF$PAtt))),
sliderInput("Pyards",
"Passing Yards",
min = min(FinalDF$PYds),
max = max(FinalDF$PYds),
value = c(min(FinalDF$PYds), max(FinalDF$PYds))),
sliderInput("Ptds",
"Passing TD's",
min = min(FinalDF$PTD),
max = max(FinalDF$PTD),
value = c(min(FinalDF$PTD), max(FinalDF$PTD))),
sliderInput("RuAttempts",
"Rushing Attempts",
min = min(FinalDF$RuAtt),
max = max(FinalDF$RuAtt),
value = c(min(FinalDF$RuAtt), max(FinalDF$RuAtt))),
sliderInput("RuYards",
"Rushing Yards",
min = min(FinalDF$RuYds),
max = max(FinalDF$RuYds),
value = c(min(FinalDF$RuYds), max(FinalDF$RuYds))),
sliderInput("RuYperA",
"Yards per Rushing Attempt",
min = min(FinalDF$RuYA),
max = max(FinalDF$RuYA),
value = c(min(FinalDF$RuYA), max(FinalDF$RuYA))),
sliderInput("RuTDs",
"Rushing TD's",
min = min(FinalDF$RuTD),
max = max(FinalDF$RuTD),
value = c(min(FinalDF$RuTD), max(FinalDF$RuTD))),
sliderInput("rec",
"Receptions",
min = min(FinalDF$Rec),
max = max(FinalDF$Rec),
value = c(min(FinalDF$Rec), max(FinalDF$Rec))),
sliderInput("ReYards",
"Receiving Yards",
min = min(FinalDF$ReYds),
max = max(FinalDF$ReYds),
value = c(min(FinalDF$ReYds), max(FinalDF$ReYds))),
sliderInput("ReYperA",
"Yards per Reception",
min = min(FinalDF$ReYA),
max = max(FinalDF$ReYA),
value = c(min(FinalDF$ReYA), max(FinalDF$ReYA))),
sliderInput("ReTDs",
"Receiving TD's",
min = min(FinalDF$ReTD),
max = max(FinalDF$ReTD),
value = c(min(FinalDF$ReTD), max(FinalDF$ReTD))),
sliderInput("fumb",
"Fumbles",
min = min(FinalDF$Fmb),
max = max(FinalDF$Fmb),
value = c(min(FinalDF$Fmb), max(FinalDF$Fmb))),
sliderInput("ppr",
"1 PPR Total Points",
min = min(FinalDF$PPR),
max = max(FinalDF$PPR),
value = c(min(FinalDF$PPR), max(FinalDF$PPR)))
),
#Main Panel
mainPanel(
selectInput("plottype",
"Which Plot",
choices = c("PPR by Player",
"PPR by Team",
"PPR by Age")),
plotOutput("plot1"),
tableOutput("table"),
verbatimTextOutput("minmax")
)
)
)
######################################################################
#--------------------------------Server------------------------------#
######################################################################
server <- function(input, output) {
#Reactive to subset data and reduce size in graps
df <- reactive({
a = subset(FinalDF,
FantPos = input$position,
Player = input$playername,
Team = input$team,
Age >= input$age[1] & Age <= input$age[2],
Cmp >= input$completions[1] & Cmp <= input$completions[2],
PAtt >= input$Pattempts[1] & PAtt <= input$Pattempts[2],
PYds >= input$Pyards[1] & PYds <= input$Pyards[2],
PTD >= input$Ptds[1] & PTD <= input$Ptds[2],
RuYA >= input$RuYperA[1] & RuYA <= input$RuYperA[2],
RuAtt >= input$RuAttempts[1] & RuAtt <= input$RuAttempts[2],
RuYds >= input$RuYards[1] & RuYds <= input$RuYards[2],
RuTD >= input$RuTDs[1] & RuTD <= input$RuTDs[2],
Rec >= input$rec[1] & Rec <= input$rec[2],
ReYds >= input$ReYards[1] & ReYds <= input$ReYards[2],
ReYA >= input$ReYperA[1] & ReYA <= input$ReYperA[2],
ReTD >= input$ReTDs[1] & ReTD <= input$ReTDs[2],
Fmb >= input$fumb[1] & Fmb <= input$fumb[2],
PPR >= input$ppr[1] & PPR <= input$ppr[2]
)
return(a)
})
#Plot
output$plot1 <- renderPlot({
# generate bins based on input$bins from ui.R
if(input$plottype == "PPR by Player"){
ggplot(data = df()) +
geom_point(data = df(),
aes(x = Player,
y = PPR,
color = FantPos)) +
ggtitle("PPR Points") +
xlab("Player") +
ylab("PPR Points")
}
else if(input$plottype == "PPR by Team"){
ggplot(data = df()) +
geom_point(data = df(),
aes(x = Team,
y = PPR,
color = FantPos)) +
ggtitle("PPR Points") +
xlab("Player") +
ylab("PPR Points")
}
else if(input$plottype == "PPR by Age"){
ggplot(data = df()) +
geom_point(data = df(),
aes(x = Age,
y = PPR,
color = FantPos)) +
ggtitle("PPR Points") +
xlab("Player") +
ylab("PPR Points")
}
})
#Checking inputs
output$minmax <- renderText(
paste("age", input$age[1], input$age[2], "\ncompletions =",
input$completions[1],input$completions[2],"\nPattempts =",
input$Pattempts[1],input$Pattempts[2],"\nPyards =",
input$Pyards[1],input$Pyards[2],"\nPtds =",
input$Ptds[1],input$Ptds[2],"\nRuYperA =",
input$RuYperA[1],input$RuYperA[2],"\nRuAttempts =",
input$RuAttempts[1],input$RuAttempts[2],"\nRuYards =",
input$RuYards[1],input$RuYards[2],"\nRuTDs =",
input$RuTDs[1],input$RuTDs[2],"\nrec =",
input$rec[1],input$rec[2],"\nReYards =",
input$ReYards[1],input$ReYards[2],"\nReYperA =",
input$ReYperA[1],input$ReYperA[2],"\nReTDs =",
input$ReTDs[1],input$ReTDs[2],"\nfumb =",
input$fumb[1],input$fumb[2],"\nppr =",
input$ppr[1], input$ppr[2])
)
}
# Run the application
shinyApp(ui = ui, server = server)
My apologies. I will be sure to include a reproducible example next time. I replicated the code by making a smaller DataFrame. The replicated code and it worked, so I had another look at my original data. There was a column that was NA at the end. When renaming the columns I forgot the index at the end. I also changed from subset to filter. Not sure why the last column with no name messed everything up, but the shiny ran how I wanted after those changes.

Not enough Y-observations for t-test with spatial RNA-sequencing data in R?

I am trying to perform differential gene expression using a t-test on spatial RNA-sequencing data. There are a couple of different annotations/groups indicating different structures (ANN2 in code): AML area, Taggregate, immatureTLS, matureTLS, and microcluster. ANN1 relates to one of the 3 different patients.
The error I get:
Error in h(simpleError(msg, call)) :
error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': not enough 'y' observations
I don’t understand how my data has not enough y-observations, and how I could overcome this error. I have searched google and other blogs, but I wasn’t able to resolve it.
The code I use (all the code I used before is shown on this website: https://bioconductor.org/packages/devel/workflows/vignettes/GeoMxWorkflows/inst/doc/GeomxTools_RNA-NGS_Analysis.html )
7.1 Differential Expression
plots<-list()
tables<-list()
labels<-list()
test<-"ttest"
mtc<-"BY"
#options: "holm" "hochberg" "hommel" "bonferroni" "BH" "BY" "fdr"
counter=1
comps_df<-data.frame(comp='',val='')
for (active_group1 in unique(ann$segment)) {
for (active_group2 in unique(ann$segment)) {
#supress reduncant compares
if(active_group1==active_group2) {next}
comp<-paste(sort(c(active_group1,active_group2)),collapse = "_")
if(comp %in% comps_df$comp) {next}
temp_df<-data.frame(comp=comp ,val=1)
comps_df<-rbind(comps_df,temp_df)
labels[[counter]]<-paste(active_group1," vs ", active_group2)
group1<-log_q[,rownames(ann)[ann$segment==active_group1]]
group2<-log_q[,rownames(ann)[ann$segment==active_group2]]
#run t_tests
results<-as.data.frame ( apply(log_q, 1, function(x) t.test(x[colnames(group1)],x[colnames(group2)])$p.value) )
colnames(results)<-"raw_p_value"
#multiple_testing_correction
adj_p_value<- p.adjust(results$raw_p_value,method=mtc)
results<-cbind(results,adj_p_value)
#calc_fdr
FDR<- p.adjust(results$raw_p_value,method="fdr")
results<-cbind(results,FDR)
#fold_changes
#as base data is already log transformed, means need to be subtracted to get FC in log space
fchanges<-as.data.frame( apply(log_q, 1, function(x) (mean(x[colnames(group1)]) - mean(x[colnames(group2)]) ) ) )
colnames(fchanges)<-"FC"
#paste("FC",active_group1," / ",active_group2)
results<-cbind(results,fchanges)
#add genenames
results$Gene<-rownames(results)
#set categories based on P-value & FDR for plotting
results$Color <- "NS or FC < 0.5"
results$Color[results$adj_p_value < 0.05] <- "P < 0.05"
results$Color[results$FDR < 0.05] <- "FDR < 0.05"
results$Color[results$FDR < 0.001] <- "FDR < 0.001"
results$Color[abs(results$FC) < 1] <- "NS or FC < 1"
results$Color <- factor(results$Color,
levels = c("NS or FC < 1", "P < 0.05", "FDR < 0.05", "FDR < 0.001"))
#vulcanoplot
# pick top genes for either side of volcano to label
# order genes for convenience:
results$invert_P <- (-log10(results$adj_p_value)) * sign(results$FC)
top_g <- c()
top_g <- c(top_g,
results[ind, 'Gene'][
order(results[ind, 'invert_P'], decreasing = TRUE)[1:15]],
results[ind, 'Gene'][order(results[ind, 'invert_P'], decreasing = FALSE)[1:15]])
top_g<- unique(top_g)
results <- results[, -1*ncol(results)] # remove invert_P from matrix
# Graph results
plots[[counter]]<- ggplot(results,
aes(x = FC, y = -log10(adj_p_value),
color = Color, label = Gene)) +
geom_vline(xintercept = c(1, -1), lty = "dashed") +
geom_hline(yintercept = -log10(0.05), lty = "dashed") +
geom_point() +
labs(x = paste("Enriched in", active_group2," <- log2(FC) -> Enriched in", active_group1),
y = "Significance, -log10(P)",
color = "Significance") +
scale_color_manual(values = c(`FDR < 0.001` = "dodgerblue",
`FDR < 0.05` = "lightblue",
`P < 0.05` = "orange2",
`NS or FC < 0.5` = "gray"),
guide = guide_legend(override.aes = list(size = 4))) +
scale_y_continuous(expand = expansion(mult = c(0,0.05))) +
geom_text_repel(data = subset(results, FDR<0.001 & (-1>FC| FC>1)),
point.padding = 0.15, color = "black", size=3.5,
min.segment.length = .1, box.padding = .2, lwd = 2,
max.overlaps = 50) +
theme_bw(base_size = 20) +
theme(legend.position = "bottom") +
ggtitle(paste(test, mtc,"multitest corr"))
#store tables for display later
tables[[counter]]<-results
counter = counter+1
#datatable(subset(results, Gene %in% GOI), rownames=FALSE,caption = paste("DE results ", active_group1," vs ", active_group2))
}
}
grid.arrange(grobs=plots,ncol=2)
#strangly does not appear in html output??
for (c in (2:counter-1)) {
#Gene %in% GOI
print(datatable( subset(tables[[c]], Color == "FDR < 0.001" ),
rownames=FALSE,
extensions = 'Buttons', options = list (
dom = 'Bftrip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')
),
caption = paste("DE results ", labels[[1]]),filter='top') %>% formatRound(columns=c("raw_p_value","adj_p_value","FDR","FC"), digits=3))
cat('\n\n<!-- -->\n\n')
}
[normalised data example][1]Data type which is used as input:
[1]: https://i.stack.imgur.com/Yt0DJ.png
Any help would be greatly appreciated! Thanks

R Shiny - Groups are not represented as specified in plot

I've grouped some Data about the titanic the following way:
priceMutate <- mutate(titanic, PriceGroup = ifelse (Fare < 51,
'0 - 50',
ifelse(Fare >=51 & Fare < 101,
'51-100',
ifelse(Fare >= 101 & Fare < 151,
'101-150',
ifelse(Fare >= 151 & Fare < 201,
'151-200',
ifelse(Fare >= 201 & Fare < 251,
'201-250',
ifelse(Fare >= 251 & Fare < 301,
'251-300',
ifelse(Fare >= 301 & Fare < 351,
'301-350',
ifelse(Fare >= 351 & Fare < 401,
'351-400',
ifelse(Fare >= 401 & Fare < 451,
'401-450',
ifelse(Fare > 450,
'451+','?')))))))))))
"Fare" is the price payed for a ticket for the titanic. I've chosen steps of 50$.
Now here is my problem:
I've made a plot that shows the chance of survival regarding the price of the tickets:
output$ex15 <- renderPlot({
ggplot(priceMutate,
aes(x = PriceGroup,
fill = Status)) +
geom_bar(position = "fill")+
ggtitle("Überlebenschancen nach Preis des Tickets (gruppiert)")+
theme(plot.title = element_text(size = 16))+
scale_fill_manual(values = c("grey24", "snow"))+
labs(y= "Anzahl")
})
However this plot mixes up the groups I made and does not show the "?" for the not-available data!
Can anyone see a problem/mistake that I've made?
Here is a link to my dataset: https://drive.google.com/file/d/1xsIfkv1464etX23O0J9y35CviK0mKYQl/view?usp=sharing
Thank you a lot :)
As already mentioned by #YBS in the comments at least for your example data there is no observation which will be assigned a "?" as all values are in the range 0 to 512 and there are no missings.
Concerning your second issue, as you recoded the Fare column as a character your PriceGroups will be ordered alphabetically by default. And alphabetically a string starting with a 4 like 451+ comes before a string starting with a 5 like 51-100. If you want the categories to be ordered you have to convert to a factor with the levels set according to your desired order. This for example could be achieved via the cut function which makes it easy to recode a numeric to intervals and which will automatically convert to a factor. If you do that often I also would suggest to have a look at the santoku package which makes it even easier set nice labels.
Finally, instead of using your data I created a minimal reproducible example by using some fake random example data to mimic your real data:
library(shiny)
library(tidyverse)
# Create fake example data
set.seed(123)
titanic <- data.frame(
PassengerId = 1:100,
Survived = sample(0:1, 100, replace = TRUE),
Fare = runif(100, 0, 512)
)
# Set breaks and labels
breaks <- c(0, seq(51, 451, 50), Inf)
labels <- paste(breaks[-length(breaks)], breaks[-1], sep = "-")
labels[length(labels)] <- "451+"
priceMutate <- titanic %>%
mutate(PriceGroup = cut(Fare, breaks = breaks, labels = labels, right = FALSE),
Status = recode(Survived, "0" = "Dead", "1" = "Survived"))
ui <- fluidPage(
plotOutput("ex15")
)
server <- function(input, output, session) {
output$ex15 <- renderPlot({
ggplot(priceMutate,
aes(x = PriceGroup,
fill = Status)) +
geom_bar(position = "fill")+
ggtitle("Überlebenschancen nach Preis des Tickets (gruppiert)")+
theme(plot.title = element_text(size = 16))+
scale_fill_manual(values = c("grey24", "snow"))+
labs(y= "Anzahl")
})
}
shinyApp(ui, server)
#>
#> Listening on http://127.0.0.1:8734

Formatting an ftable in R

I have the following 3 way table I created in R.
with(dataset, ftable(xtabs(count ~ dos + sex + edu)))
The output looks like
edu high low medium unknown
dos sex
five-to-ten-years female 247776 44916 127133 23793
male 225403 37858 147821 20383
five-years-or-less female 304851 58018 182152 33649
male 253977 55720 193621 28972
more-than-ten-years female 709303 452605 539403 165675
male 629162 309193 689299 121336
native-born female 1988476 1456792 2094297 502153
male 1411509 1197395 2790522 395953
unknown female 57974 75480 73204 593141
male 40176 57786 93108 605542
I want to rename the variables and format the table so that I can include it in a report. I know that I can use dnn to rename the variables, but are there any other recommendations to rename the variables? And to format the table (similar to using kable)?
You could convert the output to a text matrix using the following function, after which you can style with kable however you choose:
ftab_to_matrix <- function(ft)
{
row_vars <- attr(ft, "row.vars")
for(i in seq_along(row_vars)){
row_vars[[i]] <- c(names(row_vars[i]), row_vars[[i]])}
rowvar_widths <- sapply(row_vars, function(x) max(nchar(x))) + 1
col_vars <- attr(ft, "col.vars")
rowvar_widths <- c(1, cumsum(c(rowvar_widths, max(nchar(names(col_vars))))))
ft_text <- capture.output(print(ft))
row_cols <- sapply(seq_along(rowvar_widths)[-1], function(x)
substr(ft_text, rowvar_widths[x - 1], rowvar_widths[x]))
ft_text <- substr(ft_text, rowvar_widths[length(rowvar_widths)] + 2, 100)
ft_breaks <- c(1, cumsum(lapply(strsplit(ft_text[length(ft_text)], "\\d "),
function(x) nchar(x) + 2)[[1]]))
col_cols <- sapply(seq_along(ft_breaks)[-1], function(x)
substr(ft_text, ft_breaks[x - 1], ft_breaks[x]))
trimws(cbind(row_cols, col_cols))
}
So, for example, using my example data from your last question, you could do something like:
my_tab <- with(`3waydata`, ftable(xtabs(count ~ duration + sex + education)))
as_image(kable_styling(kable(ftab_to_df(my_tab))), file = "kable.png")
Might have been easier had you given the full picture when you asked your first question... You could use gt to make fancy tables for reports. This is an edited version more fully demonstrating some capabilities.
library(dplyr)
library(gt)
way3data <- data %>%
group_by(duration, education, sex) %>%
summarise(count = sum(number)) %>%
ungroup
# Reorder with select and Titlecase with stringr
longer <- tidyr::pivot_wider(way3data,
values_from = count,
names_from = "education") %>%
select(duration, sex, high, medium, low, unknown) %>%
rename_with(stringr::str_to_title)
# Demonstrating some of the features of gt
# obviously could have done some of this
# to the original dataframe
myresults <- longer %>%
group_by(Duration) %>%
gt(rowname_col = "Sex") %>%
row_group_order(
groups = c("native-born",
"more-than-ten-years",
"five-to-ten-years",
"five-years-or-less",
"unknown")
) %>%
tab_spanner(label = "Education",
columns = matches("High|Low|Medium|Unknown")) %>%
tab_stubhead(label = "Duration or something") %>%
tab_style(
style = cell_text(style = "oblique", weight = "bold"),
locations = cells_row_groups()) %>%
tab_style(
style = cell_text(align = "right", style = "italic", weight = "bold"),
locations = cells_column_labels(
columns = vars(High, Low, Medium, Unknown)
)) %>%
tab_style(
style = cell_text(align = "right", weight = "bold"),
locations = cells_stub()) %>%
tab_header(
title = "Fancy table of counts with Duration, Education and Gender") %>%
tab_source_note(md("More information is available at https://stackoverflow.com/questions/62284264."))
# myresults
# Can save in other formats including .rtf
myresults %>%
gtsave(
"tab_1.png", expand = 10
)
You can read about all the formatting choices here
Data compliments of Allan
set.seed(69)
data <- data.frame(education = sample(c("high","low","medium","unknown"), 600, T),
sex = rep(c("Male", "Female"), 300),
duration = sample(c("unknown", "native-born",
"five-years-or-less", "five-to-ten-years",
"more-than-ten-years"), 600, T),
number = rpois(600, 10))

R 3.2.1 incorrect mapping of color

This is based on R 3.2.1, reverse colors on map
I have two data points, one is more than 66%, which should be green, other is less than 33%, which should be red.
However, the less than 33% is orange.
Below is the code, which looks correct (but something is wrong)
sep <- read.csv("Out_SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.12", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
# create a new grouping variable
Percent_SEP12_Assets <- ifelse(sep[,8] <= 33, "Less than 33%", ifelse(sep[,8] >= 66, "More than 66%", "Between 33% and 66%"))
Percent_SEP12_Assets <- factor(Percent_SEP12_Assets,
levels = c("More than 66%", "Between 33% and 66%", "Less than 33%"))
# get the map
bbox <- make_bbox(sep$Longitude, sep$Latitude, f = 1)
map <- get_map(bbox)
# plot the map and use the grouping variable for the fill inside the aes
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(values=c("green","orange","red"))
The dput(sep) is
structure(list(School = structure(1:2, .Label = c("Out of City\\00L001",
"Out of City\\O308"), class = "factor"), Latitude = c(40.821367,
41.310426), Longitude = c(-73.488313, -73.837612), Windows.SEP.11 = c(4L,
69L), Mac.SEP.11 = 0:1, Windows.SEP.12 = c(3L, 26L), Mac.SEP.12 = c(16L,
1L), newCol = c(82.6086956521739, 27.8350515463918)), .Names = c("School",
"Latitude", "Longitude", "Windows.SEP.11", "Mac.SEP.11", "Windows.SEP.12",
"Mac.SEP.12", "newCol"), row.names = c(NA, -2L), class = "data.frame")
Output is this (incorrect circled in red) ........ How to fix?
Responses
Coordinates are correct, I am asking why is the point incorrectly colored. I thought this logic is correct
Percent_SEP12_Assets <- ifelse(sep[,8] <= 33, "Less than 33%", ifelse(sep[,8] >= 66, "More than 66%", "Between 33% and 66%"))
Updated code
I tried this per #bondeded user and resulting map is same as before
sep <- read.csv("Out_SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.12", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
# create a new grouping variable
sep$Percent_SEP12_Assets <- ifelse(sep[,8] <= 33, "Less than 33%", ifelse(sep[,8] >= 66, "More than 66%", "Between 33% and 66%"))
sep$Percent_SEP12_Assets <- factor(sep$Percent_SEP12_Assets,
levels = c("More than 66%", "Between 33% and 66%", "Less than 33%"))
# get the map
bbox <- make_bbox(sep$Longitude, sep$Latitude, f = 1)
map <- get_map(bbox)
# plot the map and use the grouping variable for the fill inside the aes
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=sep$Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(values=c("green","orange","red"))
Actual CSV
Here is actual CSV, two rows
School Latitude Longitude Windows-SEP-11 Mac-SEP-11 Windows-SEP-12 Mac-SEP-12
Out of City\00L001 40.821367 -73.488313 4 0 3 16
Out of City\O308 41.310426 -73.837612 69 1 26 1
The problem is that by default ggplot2 drops unused levels from factors. There are two options:
Specify drop = FALSE
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=sep$Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(values=c("green","orange","red"), drop = FALSE)
Specify the values for each level:
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=sep$Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(values=c(`More than 66%` = "green", `Between 33% and 66%` = "orange", `Less than 33%` = "red"))
Clearly you could also do both.
Now I got what you meant. The problem is in you ifelse structure. Maybe this can help:
ifelse(sep[,8] <= 33, "Less than 33%", ifelse(sep[,8] >= 66, "More than 66%", "Between 33% and 66%"))
[1] "More than 66%" "Less than 33%"

Resources