Set each column to it's own palette - r

I'm making a column chart of the amount of time I spend on various projects, each one for one of a range of "clients" (actually different "areas" of my job) using the excellent togglr package to download my tracked time data and ggplot2.
The code I'm using is this: (data dput(SO) output pasted below question)
library("ggplot2")
library("RColorBrewer")
theme_set(theme_bw())
colourCount = 48 #nrow(projects)
getPalette = colorRampPalette(brewer.pal(12, "Paired"))
ggplot(data = SO, aes(x = client, y = time_spent)) +
geom_col(aes(fill = area_project), colour = "black") +
scale_fill_manual(values = getPalette(colourCount)) +
theme(legend.position = "right") +
guides(fill=guide_legend(ncol = 2)) +
ggtitle("From Start to End") #paste("From", date(min(df$start)), " to", date(max(df$stop)))) +
xlab("Functional Area") + ylab("Hours")
Which produces this plot:
What I can't figure out how to do is to make each column it's own palette with different shades for each project.
I.E. I'd like all the boxes in the "0_Admin" column to be different blues, each box in the "1_Monitoring" column to be different greens, etc. The plot above is close, but mostly by coincidence and the number of projects/area. You'll notice for example that "3_Management" projects are both red and orange, and orange shades "bleed" all the way over to "7_Visitor Safety".
Over time, the number of projects will increase overall (but will be a subset when I'm reporting on smaller time periods) so a fully manual scale is not feasible, but the number of Areas will stay the same.
Any thoughts? Hints? Thanks!
SO <- structure(list(client = c("0_Admin", "0_Admin", "0_Admin", "0_Admin",
"0_Admin", "0_Admin", "0_Admin", "0_Admin", "0_Admin", "1_Monitoring",
"1_Monitoring", "1_Monitoring", "1_Monitoring", "1_Monitoring",
"1_Monitoring", "1_Monitoring", "2_Science", "2_Science", "2_Science",
"2_Science", "2_Science", "2_Science", "3_Management", "3_Management",
"3_Management", "3_Management", "3_Management", "3_Management",
"4_EA", "6_Fire", "6_Fire", "7_VisitorSafety", "8_ResConMisc",
"8_ResConMisc", "8_ResConMisc", "8_ResConMisc", "8_ResConMisc",
"8_ResConMisc", "8_ResConMisc", "8_ResConMisc", "8_ResConMisc",
"9_CrossFxn", "9_CrossFxn", "9_CrossFxn", "9_CrossFxn", "Z_Leave",
"Z_Leave", "Z_Leave"),
project = c("Email", "EPM", "Finance",
"HR", "Misc", "OHS", "RCPs", "Time mgmt", "Training", "Amphibians",
"Area burned", "Birds", "Rangeland Health", "Sediment", "Ungulates",
"Water Quality", "Bison GPS", "Bison science advisory group",
"Collaboration", "Corridor Use", "Grassland bird survey", "Misc",
"Beavers", "Bison", "Geese", "HUMP/HIP", "HWC", "Invasive Plants",
"Nest sweeps", "Fire crew", "Fire mgmt plan", "DO response",
"Duty Officer", "Media", "Misc", "Open Data", "Peer discussion",
"RC meeting", "Training", "Travel", "Work planning", "CC meeting",
"Events", "Misc", "Trails", "Appointments", "Stat holiday", "Vacation"
),
time_spent = c(174.709722222222, 15.2483333333333, 26.7827777777778,
127.603611111111, 21.7127777777778, 6.32222222222222, 11.9725,
3.32111111111111, 29.6375, 4.80333333333333, 0.498055555555556,
74.4958333333333, 21.8011111111111, 1.14111111111111, 21.5008333333333,
36.0780555555556, 1.44972222222222, 1.40694444444444, 6.83916666666667,
3.93027777777778, 6.94916666666667, 2, 28.7986111111111, 154.448888888889,
0.684444444444445, 12.5727777777778, 2.98861111111111, 1.89416666666667,
1.75, 21.2725, 11.0122222222222, 2.74333333333333, 0.817777777777778,
10.415, 84.9144444444444, 11.4, 19.7738888888889, 8.84444444444444,
38.7216666666667, 8, 11.6063888888889, 10.5191666666667, 3.41638888888889,
20.8216666666667, 0.298611111111111, 6.74611111111111, 30, 75.5
),
area_project = c("0_Email", "0_EPM", "0_Finance", "0_HR",
"0_Misc", "0_OHS", "0_RCPs", "0_Time mgmt", "0_Training", "1_Amphibians",
"1_Area burned", "1_Birds", "1_Rangeland Health", "1_Sediment",
"1_Ungulates", "1_Water Quality", "2_Bison GPS", "2_Bison science advisory group",
"2_Collaboration", "2_Corridor Use", "2_Grassland bird survey",
"2_Misc", "3_Beavers", "3_Bison", "3_Geese", "3_HUMP/HIP", "3_HWC",
"3_Invasive Plants", "4_Nest sweeps", "6_Fire crew", "6_Fire mgmt plan",
"7_DO response", "8_Duty Officer", "8_Media", "8_Misc", "8_Open Data",
"8_Peer discussion", "8_RC meeting", "8_Training", "8_Travel",
"8_Work planning", "9_CC meeting", "9_Events", "9_Misc", "9_Trails",
"Z_Appointments", "Z_Stat holiday", "Z_Vacation")), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -48L), vars = "client", labels = structure(list(
client = c("0_Admin", "1_Monitoring", "2_Science", "3_Management",
"4_EA", "6_Fire", "7_VisitorSafety", "8_ResConMisc", "9_CrossFxn",
"Z_Leave")), class = "data.frame", row.names = c(NA, -10L), vars = "client", drop = TRUE),
indices = list(0:8, 9:15, 16:21, 22:27, 28L, 29:30, 31L, 32:40, 41:44, 45:47),
drop = TRUE, group_sizes = c(9L, 7L, 6L, 6L, 1L, 2L, 1L, 9L, 4L, 3L), biggest_group_size = 9L)

Related

How do I change the shape of the lines in a plot generated by a for loop?

I'm not sure how to change the shape and color of the for loop for data in a df, fish
structure(list(Region = structure(c(7L, 7L, 7L, 7L, 7L), .Label = c("American Samoa",
"Johnston Atoll", "Line Islands", "MHI", "Musicians Seamounts",
"Northern Marianas", "NWHI", "Southern Marianas", "Tokelau Ridge",
"Wake Island"), class = "factor"), ObservationYear = c(2015,
2015, 2015, 2015, 2015), `Mega-Habitat` = c("bank", "bank", "tablemount",
"bank", "atoll"), Total_fish = c(6, 10, 21, 11, 7), Lat = c(23.2227305,
25.0840027, 26.8267143809524, 26.8188378, 27.5178584285714),
Long = c(-163.516748333333, -172.490419, -175.607307619048,
-176.315991, -175.460592857143), Temperature = c(1.82256666666667,
2.00518, 3.03555714285714, 2.01533, 1.5475), Salinity = c(34.64115,
34.61702, 34.4760619047619, 34.61106, 34.6673857142857),
Oxygen = c(3.16008333333333, 2.79735, 1.27077619047619, 2.58692,
3.73167142857143), Distance = c(350, 960, 1130, 360, 460),
`CTD Availability` = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), depth_bin = c("2000-3000",
"1000-2000", "1000-2000", "1000-2000", "2000-3000"), EventID = c("D2-EX1504L2-01",
"D2-EX1504L2-06", "D2-EX1504L2-08", "D2-EX1504L2-10", "D2-EX1504L2-12"
), Average_Depth = c(2160.20383333333, 1880.4596, 1217.94385,
1890.1868, 2780.92557142857), POC_Flux = c(2.56732258067581,
2.86961424536357, 3.38129564627503, 3.38129564627503, 3.80216410589398
)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
))
I ran a GAM before the loop:
g1 = mgcv::gam(Total_fish ~ s(Average_Depth, by = Region) + Region + offset(log(Distance)), data =fish,family= nb)
I tried defining shapes, but it did not work
shapes= c(0,1,2,3,4,5,6,7,8,9,10)
shapes <-shapes[as.numeric(fish$Region)]
colors.use = rainbow(nlevels(fish$Region))
for (i in 1:nlevels(fish$Region)) {
predictions = predict(g1, type="response", newdata = data.frame(Distance= 1000, Average_Depth = seq(0,3000,length=1000), Region = levels(fish$Region)[i]))
if (i == 1) plot(xlab= "Depth (m)", ylab = "fish/1000 m",seq(0,3000,length=1000), predictions, type = 'l', col=colors.use[i], pch=i)
if (i > 1) lines(seq(0,3000,length=1000), predictions,col=colors.use[i],pch=i)
}
I just need to be able to differentiate between the regions and the current rainbow colors alone are not very useful for that

How do I label the sum the total of y-axis column values from consecutive bar values like in the example “Confirmed” Cases per x-axis “Date”

I have been working on this for some time, and am re-posting this hoping to simplify the definition of the problem and to bring some clarity from feedback of my previous attempt. I am able to label each individual column value, but not able to put the code together necessary to sum the total. The examples I have looked at never work the way I try to put them together, for example with goup_by, or summarize etc.. I would like to only sum the values of "Confirmed Cases", and not show the other column values as with many c("x", "Y", ... "data"), it becomes impossible to read.
Here is the data frame:
dput(COVID1[1:12, ])
structure(list(COUNTY = c("Antrim", "Antrim", "Antrim", "Charlevoix",
"Charlevoix", "Grand Traverse", "Grand Traverse", "Grand Traverse",
"Antrim", "Grand Traverse", "Grand Traverse", "Grand Traverse"
), Date = structure(c(18453, 18456, 18457, 18453, 18455, 18453,
18456, 18457, 18455, 18453, 18456, 18457), class = "Date"), CASE_STATUS = c("Confirmed",
"Confirmed", "Confirmed", "Confirmed", "Confirmed", "Confirmed",
"Confirmed", "Confirmed", "Probable", "Probable", "Probable",
"Probable"), Cases = c(1L, 1L, 2L, 1L, 3L, 2L, 2L, 1L, 1L, 1L,
1L, 1L)), row.names = c(NA, 12L), class = "data.frame")
Code:
ggplot(filter(COVID1, COUNTY %in% c("Antrim", "Charlevoix", "Grand Traverse"), Cases > 0)) +
geom_col(aes(x = Date, y = Cases, fill = CASE_STATUS), position = position_stack(reverse = TRUE), width = .88)+
geom_text(aes(x = Date, y = Cases, label = (Cases)), position = position_stack(reverse = TRUE), vjust = 1.5, size = 3, color = "white") +
scale_fill_manual(values = c('blue',"tomato"))+
scale_x_date(labels = date_format("%m/%d"), limits = as.Date(c('2020-07-09','today()')), breaks = "1 week")+
theme(axis.text.x = element_text(angle=0))+
labs(title = "Antrim - Grand Traverse - Charlevoix")
I'm not sure if I understood the question but I think you want to add the sum of the confirmed cases as labels. There might be a ggplot way of doing it but I think the most straightforward way is to make another dataset with your labels and feed it in.
date_labels <- filter(COVID1, COUNTY %in% c("Antrim", "Charlevoix", "Grand Traverse"), Cases > 0) %>% group_by(Date) %>% summarise(confirmed_cases = sum(Cases[CASE_STATUS == "Confirmed"]))
ggplot(filter(COVID1, COUNTY %in% c("Antrim", "Charlevoix", "Grand Traverse"), Cases > 0)) +
geom_col(aes(x = Date, y = Cases, fill = CASE_STATUS), position = position_stack(reverse = TRUE), width = .88)+
geom_text(data = date_labels, aes(x = Date, y = 1, label = confirmed_cases), position = position_stack(reverse = TRUE), vjust = 1.5, size = 3, color = "white") +
scale_fill_manual(values = c('blue',"tomato"))+
scale_x_date(labels = label_date("%m/%d"), limits = as.Date(c('2020-07-09','today()')), breaks = "1 week")+
theme(axis.text.x = element_text(angle=0))+
labs(title = "Antrim - Grand Traverse - Charlevoix")
Gives me this result:

Why ggplot2 geom_hlines plots more than intended?

Here is a sample of the dataframe I am working with.
> head(tbl[,c('logFC', 'CI_L', 'CI_R', "adj_P_Value","gene",'Group1','Group2', 'Study_ID')])
logFC CI_L CI_R adj_P_Value gene Group1 Group2 Study_ID
1 -0.09017596 -0.43955752 0.25920561 1 CD244 Male Female GSE2461
2 0.08704844 -0.26134341 0.43544028 1 CD244 ulcerative colitis irritable bowel syndrome GSE2461
3 -0.03501474 -0.12677636 0.05674688 1 CD244 nonlesional skin lesional skin GSE27887
4 0.01096914 -0.08064105 0.10257932 1 CD244 pretreatment posttreatment GSE27887
5 -0.03707265 -0.12407201 0.04992672 1 CD244 Infliximab Before treatment GSE42296
6 0.07644834 -0.02849309 0.18138977 1 CD244 Responder Nonresponder GSE42296
> dput(droplevels(head(tbl, 4)))
structure(list(Probe_gene = c("211828_s_at", "213107_at", "213109_at",
"211828_s_at"), logFC = c(0.299038590078202, 0.110797898105632,
0.183214738942169, -0.733505457149486), CI_L = c(-0.0332844208935414,
-0.246475718463096, -0.103358698007331, -1.06488707237429), CI_R = c(0.631361601049945,
0.46807151467436, 0.469788175891669, -0.402123841924678), AveExpr = c(7.38827278419383,
7.83576862202959, 6.68411901305011, 7.38827278419383), t = c(2.08930195860002,
0.720053829585981, 1.48442706763586, -5.13936340603241), P_Value = c(0.0714526369900392,
0.492771856681782, 0.177447421180599, 0.000998740960213292),
adj_P_Value = c(1, 1, 1, 1), B = c(-4.07430683864883, -5.56181503167371,
-4.83144498851773, -0.294306065125513), gene = c("TNIK",
"TNIK", "TNIK", "TNIK"), Study_ID = c("GSE2461", "GSE2461",
"GSE2461", "GSE2461"), Group1 = c("Male", "Male", "Male",
"ulcerative colitis"), Group2 = c("Female", "Female", "Female",
"irritable bowel syndrome"), Study_ID = c("GSE2461", "GSE2461",
"GSE2461", "GSE2461"), Disease = c("irritable bowel syndrome; ulcerative colitis",
"irritable bowel syndrome; ulcerative colitis", "irritable bowel syndrome; ulcerative colitis",
"irritable bowel syndrome; ulcerative colitis"), DOID = c(9778L,
9778L, 9778L, 9778L), Title = c("Control (IBS) & Ulcerative colitis (UC) subjects",
"Control (IBS) & Ulcerative colitis (UC) subjects", "Control (IBS) & Ulcerative colitis (UC) subjects",
"Control (IBS) & Ulcerative colitis (UC) subjects"), GEO_Platform_ID = c("GPL96",
"GPL96", "GPL96", "GPL96"), Platform = c("Affymetrix Human U133A Array",
"Affymetrix Human U133A Array", "Affymetrix Human U133A Array",
"Affymetrix Human U133A Array"), PMID = c(0L, 0L, 0L, 0L),
Organism = c("Homo sapiens", "Homo sapiens", "Homo sapiens",
"Homo sapiens"), Data_Type = c("RNA", "RNA", "RNA", "RNA"
), Biomaterial = c("Colonic Mucosal biopsy", "Colonic Mucosal biopsy",
"Colonic Mucosal biopsy", "Colonic Mucosal biopsy"), Study_Type = c("in vivo",
"in vivo", "in vivo", "in vivo"), Samples = c(8L, 8L, 8L,
8L), Time_Point = c("Baseline", "Baseline", "Baseline", "Baseline"
), Treatment = c("NA", "NA", "NA", "NA"), Treatment_Protocol = c("NA",
"NA", "NA", "NA"), Raw_Data = c(0L, 0L, 0L, 0L), Notes = c("controls are IBS, not healty",
"controls are IBS, not healty", "controls are IBS, not healty",
"controls are IBS, not healty"), ylab = c("Female → Male",
"Female → Male", "Female → Male", "irritable bowel syndrome → ulcerative colitis"
)), .Names = c("Probe_gene", "logFC", "CI_L", "CI_R", "AveExpr",
"t", "P_Value", "adj_P_Value", "B", "gene", "Study_ID", "Group1",
"Group2", "Study_ID", "Disease", "DOID", "Title", "GEO_Platform_ID",
"Platform", "PMID", "Organism", "Data_Type", "Biomaterial", "Study_Type",
"Samples", "Time_Point", "Treatment", "Treatment_Protocol", "Raw_Data",
"Notes", "ylab"), row.names = c(NA, 4L), class = "data.frame")
I am using this to construct a plot that has the GSE # (Study_ID), followed by the contrast (Group1 vs Group2) on the y-axis, and logFC as the x-axis. I want to plot a horizontal line between each of the different GSE #'s for visual clarity, but my code doesn't seem to be working.
datasetList = tbl$Study_ID
hLines =(which(duplicated(datasetList) == FALSE) - 0.5)
tbl$ylab <- paste(tbl$Group2," \U2192 ", tbl$Group1, sep = "")
p <- ggplot(data = tbl, aes(x = logFC, y = Probe_gene, group = Study_ID)) +
geom_point() +
geom_vline(xintercept = log(0.5,2), size = 0.2) +
geom_vline(xintercept = log(2/3,2), size = 0.2) +
geom_vline(xintercept = log(1.5,2), size = 0.2) +
geom_vline(xintercept = log(2,2), size = 0.2) +
geom_hline(yintercept = hLines) +
labs(title = tbl$gene, y = "Contrasts", x = bquote(~Log[2]~'(Fold Change)')) +
geom_errorbarh(aes(x = logFC, xmin = CI_L, xmax = CI_R), height = .1) +
geom_point(aes(colour = cut(adj_P_Value, c(-Inf, 0.01, 0.05, Inf)))) +
scale_color_manual(name = "P Value",
values = c("(-Inf,0.01]" = "red",
"(0.01,0.05)" = "orange",
"(0.05, Inf]" = "black"),
labels = c("<= 0.01", "0.01 < P Value <= 0.05", "> 0.05")) +
#theme_bw()+
theme(axis.text.y = element_blank(), strip.text.y = element_text(angle = 180),
panel.spacing.y = unit(0,'lines'), axis.ticks.y = element_blank()) +
facet_grid(Study_ID+ylab~ ., scales = 'free', space = 'free', switch = 'both')
p
For some reason with the code I have now, ggplot prints many more horizontal lines than I need. It is printing a line in between each GSE #, when I only need it to print a line in between the unique GSE #'s. What I am doing wrong? hLines contains the y-intercepts of where the lines should go.
P.S. As a bit of a side question, if anyone knows of a way for me to specify the shapes that appears (similar to how I specify the colors), that would be very appreciated. In reference to the colors, I need red circles, orange squares, and black crosses for the same conditions that appear in the scale_color_manual() function.

Using mutate and a lookup/calc funtion

I wrote a function where I pass a company name to lookup in a 2nd table a set of records, calculate a complicated result, and return the result.
I want to process all companies and add a value to each record with that result.
I am using the following code:
`aa <- mutate(companies,newcol=sum_rounds(companies$company_name))`
But I get the following warning:
Warning message:
In c("Bwom", "Symple", "TravelTriangle", "Ark Biosciences", "Artizan Biosciences", :
longer object length is not a multiple of shorter object length
(each of these is a company name)
The company dataframe gets a new column, but all values are "false" where actually there should be both true and false.
Any advice would be welcome to a newbie.
Function follows:
sum_rounds<-function(co_name) {
#get records from rounds for the company name passed to the function
#remove NAs from column roundtype too
outval<- rounds %>%
filter(company_name.x==co_name & !is.na(roundtype)) %>%
#sort by date round is announced
arrange(announced_on) %>%
select(roundtype) %>%
#create a string of all round types in order
apply(2,paste,collapse="")
#the values from mixed to "M", venture to "V" and pureangel to "A"
# now see if it is of the form aaaaa (and #) followed by m or v
# in grep: ^ is start of a line and + is for ar least one copy
# [mv] is either m or v
# nice summary is here: http://www.endmemo.com/program/R/gsub.php
#is angel2vc?
angel2vc<-grepl("^a+[mv]+",outval)
#return(list("roundcodes"=outval,"angel2vc"=angel2vc))
return(angel2vc)
}
DPUT from Companies table Follows:
structure(list(company_name = c("Bwom", "Symple", "TravelTriangle",
"Ark Biosciences", "Artizan Biosciences", "Audiense"), domain = c("b-wom.com",
"getsymple.com", "traveltriangle.com", "arkbiosciences.com",
NA, "audiense.com"), country_code = c("ESP", "USA", "USA", "CHN",
"USA", "GBR"), state_code = c(NA, "CA", "VA", NA, "NC", NA),
region = c("Barcelona", "SF Bay Area", "Washington, D.C.",
"Shanghai", "Raleigh", "London"), city = c("Barcelona", "San Francisco",
"Charlottesville", "Shanghai", "Durham", "London"), status = c("operating",
"operating", "operating", "operating", "operating", "operating"
), short_description = c("Bwom is a tool that offers a test and personalized exercises for women's intimate health.",
"Symple is the cloud platform for all your business payments. Pay, get paid, connect.",
"TravelTriangle enables travel enthusiasts to reserve a personalized holiday plan with a local travel agent.",
"Ark Biosciences is a biopharmaceutical company that is dedicated to the discovery and development",
"Artizan Biosciences", "SaaS developer delivering unique consumer insight and engagement capabilities to many of the world’s biggest brands and agencies."
), category_list = c("health care", "cloud computing|machine learning|mobile apps|mobile payments|retail technology",
"e-commerce|personalization|tourism|travel", "health care",
"biopharma", "analytics|apps|marketing|market research|social crm|social media|social media marketing"
), category_group_list = c("health care", "apps|commerce and shopping|data and analytics|financial services|hardware|internet services|mobile|payments|software",
"commerce and shopping|travel and tourism", "health care",
"biotechnology|health care|science and engineering", "apps|data and analytics|design|information technology|internet services|media and entertainment|sales and marketing|software"
), employee_count = c("1 to 10", "11 to 50", "101 to 250",
NA, "1 to 10", "51 to 100"), funding_rounds = c(2L, 1L, 4L,
2L, 2L, 5L), funding_total_usd = c(1075791, 120000, 19900000,
NA, 3e+06, 8013391), founded_on = structure(c(16555, 16770,
15156, 16071, NA, 14975), class = "Date"), first_funding_on = structure(c(16526,
17204, 15492, 16532, 17091, 15294), class = "Date"), last_funding_on = structure(c(17204,
17204, 17204, 17203, 17203, 17203), class = "Date"), closed_on = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), email = c("hello#b-wom.com", "info#getsymple.com",
"admin#traveltriangle.com", "info#arkbiosciences.com", NA,
"moreinfo#audiense.com"), phone = c(NA, NA, "'+91 98 99 120408",
"###############################################################################################################################################################################################################################################################",
NA, "###############################################################################################################################################################################################################################################################"
), cb_url = c("https://www.crunchbase.com/organization/bwom",
"https://www.crunchbase.com/organization/symple-2", "https://www.crunchbase.com/organization/traveltriangle-com",
"https://www.crunchbase.com/organization/ark-biosciences",
"https://www.crunchbase.com/organization/artizan-biosciences",
"https://www.crunchbase.com/organization/socialbro"), twitter_url = c("https://www.twitter.com/hellobwom",
NA, "https://www.twitter.com/traveltriangle", NA, NA, "https://www.twitter.com/socialbro"
), facebook_url = c("https://www.facebook.com/hellobwom/?fref=ts",
NA, "http://www.facebook.com/traveltriangle", NA, NA, "http://www.facebook.com/socialbro"
), uuid = c("e6096d58-3454-d982-0dbe-7de9b06cd493", "fd0ab78f-0dc4-1f18-21d1-7ce9ff7a173b",
"742043c1-c17a-4526-4ed0-e911e6e9555b", "8e27eb22-ce03-a2af-58ba-53f0f458f49c",
"ed07ac9e-1071-fca0-46d9-42035c2da505", "fed333e5-2754-7413-1e3d-5939d70541d2"
), isbio = c("other", "other", "other", "other", "bio", "other"
), co_type = c("m", "m", "m", "v", "v", "m")), .Names = c("company_name",
"domain", "country_code", "state_code", "region", "city", "status",
"short_description", "category_list", "category_group_list",
"employee_count", "funding_rounds", "funding_total_usd", "founded_on",
"first_funding_on", "last_funding_on", "closed_on", "email",
"phone", "cb_url", "twitter_url", "facebook_url", "uuid", "isbio",
"co_type"), row.names = c(NA, -6L), class = c("tbl_df", "tbl",
"data.frame"))
>

Stacked Bar ordered by Sum of Fill with ggplot2

With the following data (an already melted data frame):
df1<-structure(list(Speciality = structure(27:32, .Label = c("Addiction Medicine",
"Anesthesiology", "Cardiac Electrophysiology", "Cardiology",
"Dermatology", "Emergency Medicine", "Family Medicine", "Gastroenterology",
"General Surgery", "Hematology & Oncology", "Hospitalist", "Internal Medicine",
"Nephrology", "Neurological Surgery", "Neurology", "Obstetrics & Gynecology",
"Otolaryngology", "Pain Medicine", "Pathology", "Pediatric Critical Care Medicine",
"Pediatric Hematology-Oncology", "Pediatric Pulmonology", "Pediatric Radiology",
"Pediatric Surgery", "Pediatrics", "Psychiatry", "Pulmonology",
"Radiation Oncology", "Radiology", "Surgical Oncology", "Urology",
"Vascular Surgery"), class = "factor"), PhysAge = structure(c(5L,
5L, 1L, 3L, 5L, 5L), .Label = c("25-34", "35-44", "45-54", "55-64",
"65+"), class = "factor"), value = c(0.0035, 0.0058, 0.0089, 0, 0.00512820512820513,
0.00512820512820513)), .Names = c("Speciality", "PhysAge", "value"
), row.names = 155:160, class = "data.frame")
How can I reorder in ggplot based on the sum of values for each Speciality in a stacked bar chart. I've found some options where the value is multiple columns, but in this case it's one value column.
Currently plotting by:
ggplot(df,aes(x=Speciality,y=value,fill=PhysAge))+
geom_bar(stat="identity")
You could try
set.seed(1)
df <- rbind(
AgevsPractice.melt,
transform(AgevsPractice.melt, PhysAge="1", value=runif(6, 0, 0.01)),
transform(AgevsPractice.melt, PhysAge="10", value=runif(6, 0, 0.01))
)
ggplot(df,aes(x=reorder(Speciality, value, sum), y=value,fill=PhysAge))+
geom_bar(stat="identity")

Resources