How to change the legend title of my ggplots based on outcomes? - r

I wonder if there is an efficent way to change the legend title of my ggplots based on outcomes.
Example
I have a function which helps me to filter a data base by type and by county.
df without filter
county | date | value | type
-----------------------------------
Alameda 2020-01-01 6 positive
Alameda 2020-01-02 2 negative
Alameda 2020-01-03 1 positive
LA 2020-01-04 4 positive
LA 2020-01-03 1 positive
** Function **
function_forggplot <- function(data = df,
select_county = "Alameda",
type_order = unique(df$type)) {
#Filter data base
df_outcome <- df[df$county %in% select_county,]
df_outcome <- df_outcome[df_outcome$type %in% type_order,]
gg_outcome <- ggplot(
data = df_outcome,
aes(x = date,
y = value,
color = type
)) +
geom_line(size = .5)
I want change the legend title of my ggplot2 based on the outcome for example if the user select county= LA and type = positive. I want a title in my ggplot like "Results positives for LA".
With if else conditionals works but I have more than 100 cases so I think this not a good option.
Expect outcome
function_forggplot(county="Alameda", type = "negatives")
A ggplot object with this title "Results negatives for Alameda"
function_forggplot(county="Fresno", type = "postives")
A ggplot object with this title "Results pisitives for Fresno"
Thanks

You can adapt the code you showed in the function directly like this. Also you can use paste0() for the title (no need of other packages):
library(ggplot2)
#Function
funplot <- function(df, select_county, type_order) {
#Filter data base
df_outcome <- df[df$county == select_county,]
df_outcome <- df_outcome[df_outcome$type == type_order,]
#Plot
ggplot(df_outcome,aes(x = date, y = value)) +
geom_line() +
ggtitle(paste0("Results ",select_county," for ",type_order))
}
#Apply
funplot(mydf, 'Alameda', 'positive')
Output:
Some data used:
#Data
mydf <- structure(list(county = c("Alameda", "Alameda", "Alameda", "LA",
"LA"), date = structure(c(18262, 18263, 18264, 18265, 18264), class = "Date"),
value = c(6L, 2L, 1L, 4L, 1L), type = c("positive", "negative",
"positive", "positive", "positive")), row.names = c(NA, -5L
), class = "data.frame")

We can create the function with glue as it is very flexible to get objects specified within {}. Of course, we can use paste or sprintf as well. Anyway, ggplot is an external package. So using, another package from the tidyverse, would make this more tidier.
library(ggplot2)
library(dplyr)
f1 <- function(dat, county_nm, type_nm) {
dat %>%
filter(county == county_nm, type == type_nm) %>%
ggplot(aes(x = date, y = value)) +
geom_line() +
ggtitle(glue::glue("Results {type_nm} for {county_nm}"))
}
then, we call as
f1(df, 'LA', 'positive')
-output
Or without any packages (other than ggplot2, dplyr)
f2 <- function(dat, county_nm, type_nm) {
dat %>%
filter(county == county_nm, type == type_nm) %>%
ggplot(aes(x = date, y = value)) +
geom_line() +
ggtitle(sprintf("Results %s for %s", type_nm, county_nm))
}
data
df <- structure(list(county = c("Alameda", "Alameda", "Alameda", "LA",
"LA"), date = structure(c(18262, 18263, 18264, 18265, 18264), class = "Date"),
value = c(6L, 2L, 1L, 4L, 1L), type = c("positive", "negative",
"positive", "positive", "positive")), row.names = c(NA, -5L
), class = "data.frame")

Related

Estimate_richness for all phyla in phyloseq

Is there an easy way to get ASV richness for each Phylum for each Station using the estimate_richness function in phyloseq? Or is there another simple way of extracting the abundance data for each taxonomic rank and calculating richness that way?
So far I have just been subsetting individual Phyla of interest using for example:
ps.Prymnesiophyceae <- subset_taxa(ps, Phylum == "Prymnesiophyceae")
alpha_diversity<-estimate_richness(ps.Prymnesiophyceae,measure=c("Shannon","Observed"))
H<-alpha_diversity$Shannon
S1<-alpha_diversity$Observed
S<-log(S1)
evenness<-H/S
alpha<-cbind(Shannon=H,Richness=S1,Evenness=evenness,sample_data(Prymnesiophyceae))
But this is rather a pain when having to do it for e.g. the top 20 phyla.
EDIT:
suggestion by #GTM works well until last step. See comment + dput:
> dput(head(sample_names(ps.transect), n=2)) c("2-1-DCM_S21_L001_R1_001.fastq", "2-1-SA_S9_L001_R1_001.fastq" )
> dput(head(alpha, n=2)) structure(list(Observed = c(31, 25), Shannon = c(2.84184012598765,
2.53358345702604), taxon = c("Prymnesiophyceae", "Prymnesiophyceae" ), sample_id = c("X2.1.DCM_S21_L001_R1_001.fastq", "X2.1.SA_S9_L001_R1_001.fastq" ), S = c(3.43398720448515,
3.2188758248682), evenness = c(0.827562817437384,
0.787101955736294)), row.names = c("X2.1.DCM_S21_L001_R1_001.fastq", "X2.1.SA_S9_L001_R1_001.fastq"), class = "data.frame")
> dput(head(smpl_data, n=1)) new("sample_data", .Data = list("001_DCM", 125L, structure(1L, .Label = "DCM", class = "factor"), structure(1L, .Label = "Transect", class = "factor"), structure(1L, .Label = "STZ", class = "factor"),
structure(1L, .Label = "STFW", class = "factor"), "Oligotrophic",
16L, -149.9978333, -29.997, 130.634, 17.1252, 35.4443, 1025.835008,
1.1968, 1e-12, 5.387, 2.8469, 52.26978546, 98.0505, 0, 0,
0.02, 0.9, 0, 0, 2069.47, 8.057, 377.3), names = c("Station_neat", "Depth_our", "Depth_bin", "Loc", "Front", "Water", "Zone", "Bottle", "Lon", "Lat", "pressure..db.", "Temperature", "Salinity", "Density_kgm.3", "Fluorescence_ugL", "PAR", "BottleO2_mLL", "CTDO2._mLL", "OxygenSat_.", "Beam_Transmission", "N_umolL", "NO3_umolL", "PO4_umolL", "SIL_umolL", "NO2_umolL", "NH4_umolL", "DIC_uMkg", "pH", "pCO2_matm"), row.names = "2-1-DCM_S21_L001_R1_001.fastq",
.S3Class = "data.frame")
You can wrap your code in a for loop to do so. I've slightly modified your code to make it a bit more flexible, see below.
require("phyloseq")
require("dplyr")
# Calculate alpha diversity measures for a specific taxon at a specified rank.
# You can pass any parameters that you normally pass to `estimate_richness`
estimate_diversity_for_taxon <- function(ps, taxon_name, tax_rank = "Phylum", ...){
# Subset to taxon of interest
tax_tbl <- as.data.frame(tax_table(ps))
keep <- tax_tbl[,tax_rank] == taxon_name
keep[is.na(keep)] <- FALSE
ps_phylum <- prune_taxa(keep, ps)
# Calculate alpha diversity and generate a table
alpha_diversity <- estimate_richness(ps_phylum, ...)
alpha_diversity$taxon <- taxon_name
alpha_diversity$sample_id <- row.names(alpha_diversity)
return(alpha_diversity)
}
# Load data
data(GlobalPatterns)
ps <- GlobalPatterns
# Estimate alpha diversity for each phylum
phyla <- get_taxa_unique(ps,
taxonomic.rank = 'Phylum')
phyla <- phyla[!is.na(phyla)]
alpha <- data.frame()
for (phylum in phyla){
a <- estimate_diversity_for_taxon(ps = ps,
taxon_name = phylum,
measure = c("Shannon", "Observed"))
alpha <- rbind(alpha, a)
}
# Calculate the additional alpha diversity measures
alpha$S <- log(alpha$Observed)
alpha$evenness <- alpha$Shannon/alpha$S
# Add sample data
smpl_data <- as.data.frame(sample_data(ps))
alpha <- left_join(alpha,
smpl_data,
by = c("sample_id" = "X.SampleID"))
This is a reproducible example with GlobalPatterns. Make sure to alter the code to match your data by replacing X.SampleID in the left join with the name of the column that contains the sample IDs in your sample_data. If there is no such column, you can create it from the row names:
smpl_data <- as.data.frame(sample_data(ps))
smpl_data$sample_id < row.names(smpl_data)
alpha <- left_join(alpha,
smpl_data,
by = c("sample_id" = "sample_id"))

Making 2-way graph (ggplot2) out of a tabyl table changing values

male FALSE TRUE
0 50.0% 66.7%
1 50.0% 33.3%
structure(list(male = 0:1, `FALSE` = c("50.0%", "50.0%"), `TRUE` = c("66.7%",
"33.3%")), row.names = c(NA, -2L), core = structure(list(male = 0:1,
`FALSE` = c(1, 1), `TRUE` = c(4, 2)), class = "data.frame", row.names = c(NA,
-2L)), tabyl_type = "two_way", var_names = list(row = "male",
col = "dummy"), class = c("tabyl", "data.frame"))
How can I make a plot using ggplot2 of this table constructed with janitor? The thing is that I would like two plots side-by-side: one for dummy=TRUE and the other for dummy=FALSE (but changing the labels such that TRUE is replaced by a and FALSE by b -- i am having difficulties with this because TRUE and FALSE are logical). I would also like to replace the values 0 and 1 for c and d respectively.
You can try a tidyverse. The trick is to transform the data from wide to long since this is the prefered input for ggplot. Here I used pivot_longer, but you can also use reshape or melt.
library(tidyverse)
df %>%
pivot_longer(-1) %>%
mutate(name = ifelse(name, "a", "b")) %>%
ggplot( aes(factor(male), value, fill =name)) +
geom_col(position = position_dodge())
Using base R you can try
# transform percentages to numerics
df$a <- as.numeric(gsub("%", "", df$`TRUE`))
df$b <- as.numeric(gsub("%", "", df$`FALSE`))
barplot(cbind(a, b) ~ male, df, beside=T,legend.text = TRUE)

Plot multiple geom_line and geom_smooth objects in one plot

I have somewhat messy looking dataframes, like this one:
df0
# A tibble: 3 x 9
# Groups: Sequ [1]
Sequ Speaker Utterance A_intpl A_dur B_intpl B_dur C_intpl C_dur
<int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 2 ID16.A cool >wha… 31.44786152… 10.5,17,1… 32.86993284… 9.5,16,17… 58.3368399… 14,17,17…
2 2 NA (0.228) 32.75735987… 15.5,17,1… 30.83469006… 14.5,16.9… 26.0386462… 3,17,16,…
3 2 ID16.B u:m Tenne… 32.05752604… 4.5,17,16… 29.95825107… 3.5,16,17… 55.9298614… 8,17,17,…
I want to plot the *_intpl values for each speaker (A, B, or C) for each of the three Utterances in a single chart both as line charts and as trend lines.
I'm just half successful doing this:
library(tidyr)
library(ggplot2)
library(dplyr)
df0 %>%
pivot_longer(cols = contains("_"),
names_to = c("Event_by", ".value"),
names_pattern = "^(.*)_([^_]+$)") %>%
separate_rows(c(intpl, dur), sep = ",", convert = TRUE) %>%
mutate(Time = cumsum(dur)) %>%
mutate(Utterance = paste0(sub(".*(.)$", "\\1",Speaker), ": ", Utterance),
Utterance = factor(Utterance, levels = unique(Utterance))) %>%
ggplot(aes(x = Time, y = log2(intpl),
group = Event_by,
colour = Event_by)) +
geom_line()+
geom_smooth(method = 'lm', color = "red", formula = y~x)+
facet_wrap(~ Utterance, ncol = 1, scales= "free_x")
Half successful because the line plots and trend lines are side-by-side, as if in three columns, whereas they should be in rows, one below the other - how can that be achieved?
Reproducible data:
structure(list(Sequ = c(2L, 2L, 2L), Speaker = c("ID16.A", NA,
"ID16.B"), Utterance = c("cool >what part?<", "(0.228)", "u:m Tennessee="
), A_intpl = c("31.4478615210995,31.5797510648522,31.7143985369445,31.651083739602,31.5806035086034,36.8956763912703,36.2882129597292,35.2124499461012,34.1366869324732,34.1366869324732,32.1927035724058,30.2487202123383,28.3047368522709,26.3607534922035,30.5278334848495,30.5919390424853,30.8898529369568,31.578968913188,31.9011198738002,32.1543265113196,31.9708002079533,31.966536408565,31.8762658607759,31.8994741472105,31.4215913971938,32.1510578328563,31.7863350712876,32.4685052625667,31.7422271490296,32.3286054977263,31.9998974949481,32.5177992323864,32.4727499785435,32.9310888953766,32.7592010033585,33.2231711877427,33.1593949301066,33.2432973964816,33.2569729073414,33.492144800249,33.317650964723,33.4835787832119,33.2377190454279,32.9200836384356,32.9684568771567,32.6400987016883,27.5447101464944,29.3948945479171,35.3449171857603,33.5932932239592,31.8416692621581,30.0900453003569,32.7850431084597,32.7589003618266,32.8365550655013,32.386716057622,32.8420792704881,32.6909995562489,32.6269434402016,32.7370944106334,32.7529759209752,32.6528826975113,32.3663573764448,32.7326853004792,32.6930038462418,32.8975978772676,33.1752899475416,33.2034433355001,33.0667431432803,32.6322933080614,33.2503168843178,32.7573598713719",
"32.7573598713719,32.7531704791313,32.7366130631104,32.918942216354,32.8309939530596,32.3856893430525,32.5368873543441,32.5628510484821,32.5628510484821,32.5628510484821,32.5506564332008,32.7477119716583,32.3458470743288,32.0575260428013",
"32.0575260428013,32.1628824338111,32.0093334061923,32.1461460586991,31.9080762250966,31.9469105074833,31.7431187667232,31.7194255656503,31.7394296413187,31.8594986292975,31.7498243274746,31.9069142374258,32.0835520942767,31.6257067057109,31.757232379438,31.9036689124911,32.1319749301918,31.7203280774998,31.7877137245706,32.3030946636177,32.2800139298454,32.164646135728,32.3636504940227,32.5657818936495,32.3859453482697,32.4797898358193,32.5319835105237,32.92233491509,32.8240561109448,32.664496027779,33.1835064752029,33.0366413969703,33.0406288190821,33.3232964677672,33.2206260057731,33.1537134269402,33.2783471506207,33.2933281566788,33.5322350394609,33.3815736723684,33.7905544185063,33.6143820666896,33.7490659591585,33.7260102344634,34.0721931066557,34.0455026427054,34.3735788774521,34.2888420421073,34.3913721165542,34.5982135545306,34.4417202731001,34.6586347152449,31.1590521215434,31.3276405983897,28.2379253186548,31.133030931336,34.0715906921349,35.8967950760285,35.9334551147377,35.8565504335515,35.7446081905229,35.6300325834155,35.8390086948751,35.9711743270411,36.0029493274176,35.8891056768339"
), A_dur = c("10.5,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,0.5",
"15.5,17,17,16,17,17,16,17,17,16,17,17,16,12.5", "4.5,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,5.5"
), B_intpl = c("32.8699328424689,32.8154348109057,32.5454364786882,32.408257038977,32.5304564519672,32.3270203236281,31.9233218634346,32.0166346064182,31.7360745988363,31.7546527359571,31.8603220354065,31.6520061326962,31.5603191463274,31.3357561466519,31.0976090032219,31.1405090978825,31.1697180784961,31.0863999545386,31.3126984044729,30.580776446803,30.7137016246273,31.0801914571091,31.2343922096768,31.2749857511594,31.3488604642844,30.9327390960718,31.0750482778561,31.1849119826023,31.4180114886183,31.5284273181104,31.147361398529,31.1128597713973,31.5551385744611,31.7479939892741,31.5890352680344,31.5470790538009,31.5427330200078,31.3901913024084,31.5423214446953,31.4814325586741,31.4937336232021,31.3483738841556,31.2516462059018,31.2233881922543,31.2572951780583,31.0087226975291,31.1197589042273,31.053748381687,30.8202174718598,30.845143129195,30.8727194789634,30.4231467151428,30.7254093759809,30.2757746547116,30.6047530953025,29.6835591414008,28.257421076205,29.4634886416064,29.183064807185,28.6935506287734,29.3989017421637,30.8936090542518,30.6884831327852,30.805770713392,30.6938909098627,30.8317757801268,30.8509115577427,30.6836198471168,30.7979978629801,31.0260101704105,30.6248844591805,30.8346900656087",
"30.8346900656087,30.9826158466835,29.814086001996,29.7839590794955,30.7928804535206,31.1589874726521,31.0547403039501,31.2268131145794,31.155503802286,31.3036925274762,31.4782621660348,31.0928322383151,31.589958621025,29.9582510795225",
"29.9582510795225,29.9796434055214,29.9405638729798,30.2602098442174,30.5011865525849,30.6753859842987,28.9331380886365,30.7736467776919,30.8457967803438,30.843630408183,30.8767570425033,30.9178344980247,30.734598946287,30.8877440413271,30.9225051837881,30.9534076039184,31.0172861192043,30.9371712793451,30.9806052132295,31.0593603717961,31.1156928565737,30.4713263393479,26.028518302418,28.1426546887905,29.4308434671559,30.7190322455213,31.2289674937063,31.7389027418913,32.2488379900763,32.7587732382613,33.2687084864463,33.7786437346312,34.2885789828162,34.7985142310012,35.3084494791862,35.8183847273712,36.3283199755562,36.8382552237412,37.3481904719262,37.8581257201112,38.3680609682962,25.5986933949893,29.7968031963901,30.5336819967028,30.1876589408847,30.4260367500101,30.2997107671214,30.3429716412578,30.3537316791924,30.4111899964144,30.7293520851914,30.7778983966343,30.9712137067708,30.9072589183658,31.0696990205164,30.5713926084448,31.3458855877875,31.4169903025083,31.5148974986093,31.5972499257413,31.2293401943969,31.2033325602348,31.1657434266985,30.6784877073261,30.6991365599664,30.6763195188897"
), B_dur = c("9.5,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,2.5",
"14.5,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,13.4999999999854",
"3.5,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,7.5"
), C_intpl = c("58.3368399069697,58.249224089011,59.5198368051218,58.8722012497097,58.4418996252205,58.5849059154389,59.2752163985494,52.8407480422202,51.6276603912397,48.0255346632529,44.753541512539,41.4815483618252,38.2095552111114,34.9375620603975,31.6655689096837,28.3935757589698,25.121582608256,19.4712933827274,22.0108873782783,24.5504813738291,24.8441573376901,24.6902151101703,24.4029572181118,24.9753161974674,24.8664406826514,24.8486668451201,25.1137001504163,25.1142578332509,25.4902077628339,25.4075561268027,25.6622548410237,61.2421678149908,25.1600975771354,25.6667198263373,25.442560744158,25.8736383423437,25.5859074180431,24.7860400673889,24.4337707697216,24.3214953242744,23.915753514736,23.7363185577661,23.7186569801299,23.4313514771952,23.5730151254578,62.5124513171595,23.3260531660862,23.4498217326665,23.2145314844252,57.5586745434594,63.4646233226955,23.0706406704345,23.3318690599491,62.044649715831,62.2720656330432,22.2532276715887,62.7059140614625,22.9511208849958,22.5603175709988,23.3456453893988,63.2523901625561,60.6655429980934,60.2358824325868,59.957910796633,57.3999702562457,54.8277282980263,43.0269305132552,31.2261327284841,19.425334943713,22.7319906068577,26.0386462700023",
"26.0386462700023,29.345301933147,32.6519575962917,35.9586132594364,48.3773995023798,60.7961857453232,49.4980424442242,55.9907960862667,57.2956837917999,58.1409925994177,59.025022056064,60.0098263540792,60.4028460580062,61.2629030450653,55.9298614021542",
"55.9298614021542,55.3877180252389,61.3547152702855,61.7847919095391,56.2457623439544,62.5477315546977,62.3078007189967,62.4272469013149,57.6479672147315,62.9844338801191,58.0081708266629,63.3872796098875,59.0138830718112,58.0612924481098,58.38680047729,58.687179350318,63.8724230039733,63.4126777597892,63.6865154626743,63.5670658627636,63.4496590540706,63.7595297692908,58.9069708176601,63.4547681163061,64.3198376700797,63.415319961042,64.0985879957056,64.1201809531605,63.677902665454,64.1934303628317,64.4682003346273,64.2868853545462,24.8444135816353,64.1579626357752,63.8897139146875,58.5472675827292,64.5784992977498,64.0848591719068,63.8841268679761,64.2901359712354,64.395692486112,64.5425896391638,64.8060565909917,64.3618830026368,64.7088481705444,64.5005944199885,64.5540289192148,64.7408010459365,63.378880767685,63.3415589069662,63.5362700331647,63.5924807719723,63.575801461932,63.6799360982113,64.0041021410894,64.3144923757986,63.8692943755376,63.8594574363473,64.2731841085802,63.3314657812309,64.2758880216293,64.1011768977101,64.0261661917799,64.2865302330478,63.724697791255,64.1202175712152"
), C_dur = c("14,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,14",
"3,17,16,17,17,16,17,17,16,17,17,16,17,17,8", "8,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,2"
)), row.names = c(NA, -3L), groups = structure(list(Sequ = 2L,
.rows = structure(list(1:3), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
There's a possible solution with use of grid.arrange() func from library(gridExtra) library(grid) packages.
I've wrapped your data into unique charts and combined them together into arranged chart.
df1 = df0 %>%
pivot_longer(cols = contains("_"),
names_to = c("Event_by", ".value"),
names_pattern = "^(.*)_([^_]+$)") %>%
separate_rows(c(intpl, dur), sep = ",", convert = TRUE) %>%
mutate(Time = cumsum(dur)) %>%
mutate(Utterance = paste0(sub(".*(.)$", "\\1",Speaker), ": ", Utterance),
Utterance = factor(Utterance, levels = unique(Utterance)))
Set chart objects into enviroment:
for (i in unique(df1$Event_by)){
for (j in levels(df1$Utterance)){
assign(x = paste0(i,j), value = ggplot(data = df1[df1$Event_by == i & df1$Utterance == j,], aes(x = Time, y = log2(intpl))) +
geom_line()+
geom_smooth(method = 'lm', color = "red", formula = y~x))
}
}
Create grided chart:
library(gridExtra) library(grid)
grid.arrange(
`AA: cool >what part?<`,
`AB: u:m Tennessee=` ,
`ANA: (0.228)` ,
`BA: cool >what part?<` ,
`BB: u:m Tennessee=` ,
`BNA: (0.228)` ,
`CA: cool >what part?<` ,
`CB: u:m Tennessee=` ,
`CNA: (0.228)` ,
nrow = 3)
Although i think there should be better solution for that.
You can also try to explore below articlesfor arranging plots:
http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/81-ggplot2-easy-way-to-mix-multiple-graphs-on-the-same-page/
https://ggplot2-book.org/facet.html
Moreover, there's is no themming added to my solution

Comparing "Unlimited" value to numerical values in ggplot

I am trying to make a visual comparison between an input vector and my database.However, the input vector or the database may contain the "UL" character, which means, an infinite number. Think of it as your unlimited voice plan, with which you can make an unlimited number of calls.
Here is the code I have used to try to make a visual comparison between "UL" and other numerical values.
# d is the database data.frame, with which we want to compare the input vector
d = structure(list(Type = c("H1", "H2", "H3"),
P1 = c(2000L, 1500L, 1000L),
P2 = c(60L, 40L, 20L),
P3 = c("UL", 3000L, 2000L)),
class = "data.frame",
row.names = c(NA, -3L))
# d2 is the input vector
d2 = structure(list(Type = "New_offre", P1 = 1200L, P2 = "UL", P3 = 2000),
class = "data.frame",
row.names = c(NA, -1L))
#Check if there are some unlimited values in both d and d2
y1 <-rbind(d,d2)
y <- y1
if("UL" %in% y$P3){
max_P3_scale <- max(as.numeric(y[y$P3!="UL","P3"]))
y[y$P3=="UL","P3"]= 2*max_P3_scale
}
if("UL" %in% y$P2){
max_P2_scale <- max(as.numeric(y[y$P2!="UL","P2"]))
y[y$P2=="UL","P2"]= 2*max_P2_scale
}
y <- transform(y,P1=as.numeric(P1),
P2=as.numeric(P2),
P3=as.numeric(P3))
d <- y[1:nrow(d),]
d2<- y[nrow(d)+1,]
d %>% gather(var1, current, -Type) %>%
mutate(new = as.numeric(d2[cbind(rep(1, max(row_number())),
match(var1, names(d2)))]),
slope = factor(sign(current - new), -1:1)) %>%
gather(var2, val, -Type, -var1, -slope) %>%
ggplot(aes(x = factor(var2,levels = c("new","current")), y = val, group = 1)) +
geom_point(aes(fill = var2), shape = 2,size=4) +
geom_line(aes(colour = slope)) +
scale_colour_manual(values = c("green","green", "red")) +
facet_wrap(Type ~ var1,scales = "free")
My first attempt was to find if there is "UL" values in P2 and P3. If yes, I try to find the maximum numeric value other than "UL". Then, I replace all "UL" occurrences by this maximum value* 2, so the graphical representations will always show that "UL" is maximum.
The issue with this is that I am not able to differentiate between actual values and "UL" ones.
Here is how my plot looks like using this solution

R barplot cumulative - x date, y freq

I'm trying to recreate this kind of plot in R but I'm not very successful.
Where X = date and Y = frequency of a discrete variable, cumulative on one bar.
Also I'm trying to put it in a function so it would be easier to use this kind of plot for different variables.
Link to the plot image <---
I'd appreciate any help!
Data example:
Excel plot example <---
Purchase_date Phone
2014-10-23 Sony
2014-10-23 Apple
2014-10-23 Nokia
2014-10-23 Nokia
2014-10-24 NA
2014-10-24 Nokia
2014-10-24 Sony
2014-10-24 Other
2014-10-24 Apple
2014-10-25 Sony
2014-10-25 NA
2014-10-25 Apple
2014-10-25 Sony
2014-10-25 Nokia
Also
I have something like this but it's definitely far from universal method for different variables:
base_table %>%
filter(year(as.Date(BUY_DATE)) >= 2014, year(as.Date(BUY_DATE)) <= 2017) %>%
mutate(BUY_DATE = as.yearmon(as.Date(BUY_DATE))) %>%
group_by(PHONETYPE, BUY_DATE) %>% summarise(n = n()) -> applPerTypeAndMonth
applPerTypeAndMonth %>% pull(PHONETYPE) %>% table()
filter(applPerTypeAndMonth, PHONETYPE == '') -> x
xts(x$n, order.by = x$BUY_DATE) -> type1
filter(applPerTypeAndMonth, PHONETYPE == 'NOKIA') -> x
xts(x$n, order.by = x$BUY_DATE) -> type2
filter(applPerTypeAndMonth, PHONETYPE == 'APPLE') -> x
xts(x$n, order.by = x$BUY_DATE) -> type3
filter(applPerTypeAndMonth, PHONETYPE == 'SONY') -> x
xts(x$n, order.by = x$BUY_DATE) -> type4
filter(applPerTypeAndMonth, PHONETYPE == 'HUAWEI') -> x
xts(x$n, order.by = x$BUY_DATE) -> type5
filter(applPerTypeAndMonth, PHONETYPE == 'LG') -> x
xts(x$n, order.by = x$BUY_DATE) -> type6
filter(applPerTypeAndMonth, PHONETYPE == 'OTHER') -> x
xts(x$n, order.by = x$BUY_DATE) -> type7
merge(type1,type2,type3,type4,type5,type6,type7) -> types
na.fill(types, fill = 0.0) -> types
barplot(types, col = rainbow(7))
types %>% apply(1, function(x) x / sum(x)) %>% barplot(col = rainbow(7))
# legend("topright", legend = names(types), fill = rainbow(7))
Using data.table first create a summary table that details the frequency of each phone by each day.
summary = purchases[,list(Purchases = .N), by = list(Purchase_date, Phone)
Then split this out by phone type, and in each sub-dataset order by date and add in a cumulative purchases variable.
splitted = split(summary, summary$Phone)
splitted = lapply(splitted, function(x){
x = x[order(PurchaseDate)]
x$CumulativePurchases = cumsum(x$Purchases)
return(x)})
Then rbindlist back together into a single dataframe and then you can use GGplot easily.
summary = rbindlist(splitted)
plotted = ggplot(summary, aes(x = PurchaseDate, y = CumulativePurchases, fill = Phone)) + geom_bar(stat = "identity")
something along the lines of this,
dta <- structure(list(Purchase_date = structure(c(1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), .Label = c("2014-10-23",
"2014-10-24", "2014-10-25"), class = "factor"), Phone = structure(c(4L,
1L, 2L, 2L, NA, 2L, 4L, 3L, 1L, 4L, NA, 1L, 4L, 2L), .Label = c("Apple",
"Nokia", "Other", "Sony"), class = "factor")), .Names = c("Purchase_date",
"Phone"), class = "data.frame", row.names = c(NA, -14L))
# install.packages(c("ggplot2"), dependencies = TRUE)
library(ggplot2)
g <- ggplot(dta, aes(Purchase_date))
g + geom_bar(aes(fill = Phone))
updated, here's the plot wrapped in a function,
function.name <- function(df)
{
require(ggplot2)
p <- ggplot(df, aes(x = Purchase_date))
p + geom_bar(aes(fill = Phone))
}
function.name(dta)
I'll obviously recommend you take a look at this site to learn how to label, color, reorder, etc.
# load packages
library(tidyverse)
library(lubridate)
# create a dataframe from your data
df <- frame_data(
~Purchase_date, ~Phone
, "2014-10-23", "Sony"
, "2014-10-23", "Apple"
, "2014-10-23", "Nokia"
, "2014-10-23", "Nokia"
, "2014-10-24", "NA"
, "2014-10-24", "Nokia"
, "2014-10-24", "Sony"
, "2014-10-24", "Other"
, "2014-10-24", "Apple"
, "2014-10-25", "Sony"
, "2014-10-25", NA
, "2014-10-25", "Apple"
, "2014-10-25", "Sony"
, "2014-10-25", "Nokia"
)
# make dates dates, if you want to
df <- df %>%
mutate(Purchase_date = as_date(Purchase_date))
# and plot it
df %>%
ggplot(aes(Purchase_date, fill = Phone)) +
geom_bar()
ggplot() and geom_bar() ARE a functions and they do what you want (and actually a whole lot more if desired). How to plot can be read up, e.g., in the R-Graphics Cookbook which really helps whenever you need it.

Resources