Related
I'm very very new to R, so thanks in advance for the help
I did the lda analysis on my dataset (tme.lda), in the console I get all my results with LD1, LD2, LD3, LD4, LD5 and LD6 but when I try to plot it I tried a lot of different methods but I get every kind of error: Error LD1 object not found - Error in fortify - Error in as.data.frame just to say a few.
This is my dataset:
dput(head(tme.lda))
structure(list(Word = structure(1:6, levels = c("bene", "bile",
"casa", "come", "posso", "tutto", "vero"), class = "factor"),
f0min = c(184L, 193L, 189L, 199L, 175L, 144L), f0max = c(229L,
226L, 198L, 225L, 192L, 188L), F1 = c(600L, 347L, 980L, 531L,
550L, 432L), F2 = c(2406L, 2695L, 1759L, 997L, 996L, 1901L
), F4 = c(4125L, 4403L, 3837L, 3988L, 3909L, 4171L), max_F0 = c(143L,
130L, 124L, 133L, 123L, 120L)), row.names = c(NA, 6L), class = "data.frame")
And this is the code I wrote, how can I get from here the scatterplot LD1 vs LD2?
View(tme.lda)
#lDFA analysis with "WORD" as grouping factor
tme.lda<-cbind(tme[,5],tme.lda[,1:6])
names(tme.lda)
#> [1] "tme[, 5]" "f0min" "f0max" "F1" "F2" "F4" "max_F0"
names(tme.lda)=c("Word","f0min","f0max","F1","F2","F4","max_F0")
names(tme.lda)
#> [1] "Word" "f0min" "f0max" "F1" "F2" "F4" "max_F0"
library(MASS)
lda(Word~f0min+f0max+F1+F2+F4+max_F0,data = tme.lda)
I tried this:
plot(Word, panel = tme.lda, abbrev = FALSE, xlab = "LD1", ylab = "LD2")
plot(x, panel = panel.lda, cex = 0.7, dimen=2, abbrev = FALSE, xlab = "LD1", ylab = "LD2")
ggplot(Word, panel = tme.lda, cex = 0.7, dimen=2, xlab = "LD1", ylab = "LD2")
ggplot2::aes(LD1,LD2) (Word, panel = tme.lda, cex = 0.7, dimen=2, xlab = "LD1", ylab = "LD2")
plot.lda<-lda(Word~f0min+f0max+F1+F2+F4+max_F0,data = tme.lda)
ggp <- ggplot(plot.lda, aes(x = LD1, y=LD2)) +
geom_point(mapping = aes(colour=Word)) +
ggtitle("LD1 Vs. LD2")
ggp <- ggplot(plot.lda, aes(x = LD1, y=LD2))
Just to say a few things I tried
I am currently trying to plot a 3D bubble graph with 2 (then later i will try with 3) axes, as in excel, but on R (here is an example of the 3D bubble plot i am trying to plot) :
https://fr.extendoffice.com/documents/excel/2017-excel-create-bubble-chart.html
library(ggplot2)
library(scales)
p <- ggplot(plot_3D, aes(x = var_2020_2021_valeur, y = var_2020_2021_CA)) +
geom_point(aes(color = Specialite, size = CA.annee.N), alpha = 0.5) +
scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07",
"#FFB5C5", "#BF87B3", "#7F5AA2", "#3F2D91", "#000080",
"#2468A0", "#a32cc4", "#9073db", "#c51f5d",
"#5800ff", "#4455ff", "#48ff50")) +
scale_size(range = c(0.5, 12)) + # Réglage de la plage de tailles des points
ylim(-100, 100) +
xlim (-100, 100) +
geom_hline(yintercept=0) +
geom_vline(xintercept=0) +
scale_y_continuous(labels = percent) +
scale_x_continuous(labels = percent)
I get the following message of error :
"Scale for 'y' is already present. Adding another scale for 'y',
which
will replace the existing scale.
Scale for 'x' is already present. Adding another scale for 'x',
which
will replace the existing scale."
Even when i abandon the two last lines of code, it doesn't work...
Here is the structure of my dataset plot_3D :
structure(list(Specialite = c("ANESTHESIE REANIMATION", "Autres",
"CHIRURGIE GENERALE ET VISCERALE", "CHIRURGIE PLASTIQUE", "GASTRO ENTEROLOGIE",
"GYNECOLOGIE OBSTETRIQUE", "IMAGERIE", "MAXILLO STOMATO", "MEDECINE GENERALE et
INTERNE",
"OPHTALMOLOGIE", "ORL", "ORTHOPEDIE", "PNEUMOLOGIE", "URGENTISTE",
"UROLOGIE"), CA.annee.N = c(64310L, 25298L, 1205537L, 42020L,
3694964L, 344370L, 3454L, 588033L, 228439L, 1849804L, 27358L,
2739286L, 0L, 916L, 432907L), Nombre.de.sejours.annee.N = c(171L,
34L, 1504L, 56L, 9224L, 682L, 9L, 1103L, 888L, 2276L, 57L, 4068L,
0L, 2L, 720L), CA.annee.N.1 = c(50135L, 454L, 790559L, 15531L,
2644858L, 304242L, 3026L, 402195L, 459813L, 1308933L, 20597L,
2269691L, 0L, 3901L, 318352L), Nombre.de.sejours.annee.N.1 = c(150L,
1L, 1067L, 25L, 7276L, 627L, 9L, 802L, 1918L, 1693L, 43L, 3519L,
0L, 7L, 547L), CA.annee.N.2 = c(48583L, 453L, 941610L, 16675L,
3140507L, 385813L, 2950L, 642017L, 691982L, 1704005L, 51602L,
2261368L, 7145L, 4648L, 308169L), Nombre.de.sejours.annee.N.2 = c(154L,
1L, 1264L, 28L, 8317L, 831L, 8L, 1286L, 3231L, 2269L, 127L, 3610L,
26L, 10L, 551L), CA_par_sejour_N = c(376.081871345029, 744.058823529412,
801.553856382979, 750.357142857143, 400.581526452732, 504.941348973607,
383.777777777778, 533.121486854034, 257.251126126126, 812.743409490334,
479.964912280702, 673.374139626352, NaN, 458, 601.259722222222
), CA_par_sejour_N1 = c(334.233333333333, 454, 740.917525773196,
621.24, 363.504398020891, 485.234449760766, 336.222222222222,
501.490024937656, 239.735662148071, 773.14412285883, 479, 644.981813015061,
NaN, 557.285714285714, 581.99634369287), CA_par_sejour_N2 = c(315.474025974026,
453, 744.944620253165, 595.535714285714, 377.600937838163, 464.275571600481,
368.75, 499.235614307932, 214.169606932838, 750.993829881005,
406.314960629921, 626.417728531856, 274.807692307692, 464.8,
559.290381125227), var_2020_2021_valeur = c(0.125207553640259,
0.638896087069189, 0.0818395145215454, 0.207837780659878, 0.101999119223065,
0.0406131494220115, 0.141440846001322, 0.063074957314078, 0.0730615704860669,
0.051218505658529, 0.00201443064864667, 0.0440203522616658, NaN,
-0.178159446295822, 0.0330987964754596), var_2020_2021_CA = c(0.282736611149895,
54.7224669603524, 0.524917178856986, 1.70555662867813, 0.397036816343259,
0.131895004634469, 0.141440846001322, 0.462059448774848, -0.503191514811456,
0.413215191304673, 0.328251687138904, 0.206898207729598, NaN,
-0.765188413227378, 0.35983753832236)), class = "data.frame", row.names = c(NA,
-15L))
Could anyone help ?
You should remove xlim and ylim because they can be used when you don't specify anything else on your axis. So your could add the limits to both scale_*_continuous in the limits arguments like this:
library(ggplot2)
library(scales)
p <- ggplot(plot_3D, aes(x = var_2020_2021_valeur, y = var_2020_2021_CA)) +
geom_point(aes(color = Specialite, size = CA.annee.N), alpha = 0.5) +
scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07",
"#FFB5C5", "#BF87B3", "#7F5AA2", "#3F2D91", "#000080",
"#2468A0", "#a32cc4", "#9073db", "#c51f5d",
"#5800ff", "#4455ff", "#48ff50")) +
scale_size(range = c(0.5, 12)) + # Réglage de la plage de tailles des points
geom_hline(yintercept=0) +
geom_vline(xintercept=0) +
scale_y_continuous(labels = percent, limits = c(-100, 100)) +
scale_x_continuous(labels = percent, limits = c(-100, 100))
p
#> Warning: Removed 1 rows containing missing values (geom_point).
Created on 2022-07-12 by the reprex package (v2.0.1)
I'm trying to add date labels to each individual bar that i've plotted. The data looks like this:
structure(list(lakeID = c("WE1", "WE1", "WE1", "WE1", "WE1",
"WE1", "WE1", "WE1", "WE1", "WE1"), depth = c("hypolimnion",
"hypolimnion", "hypolimnion", "hypolimnion", "hypolimnion", "hypolimnion",
"hypolimnion", "hypolimnion", "hypolimnion", "hypolimnion"),
date = structure(c(1505779200, 1529366400, 1534723200, 1537142400,
1559088000, 1590624000, 1592352000, 1596153600, 1597881600,
1599696000), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
year = c(2017L, 2018L, 2018L, 2018L, 2019L, 2020L, 2020L,
2020L, 2020L, 2020L), BACI = c("Control", "Control", "Control",
"Control", "Impact", "Impact", "Impact", "Impact", "Impact",
"Impact"), N = c(2L, 2L, 1L, 1L, 7L, 56L, 10L, 58L, 3L, 2L
), meanlengths = c(1.44092870850016, 1.90413253399558, 2.08356379729054,
1.24630120343851, 6.39410976055929, 6.07576588787627, 5.63215818993585,
6.00116183368038, 5.42957068546558, 3.62096915399567), sd = c(0.261562571663927,
0.237860661401758, NA, NA, 0.454442531596122, 0.703831022329251,
0.465831548164042, 1.1448481521142, 1.19135665831808, 1.41507359493585
), SE = c(0.184952668128155, 0.1681928866547, NA, NA, 0.171763131967707,
0.0940533765621035, 0.14730886981608, 0.150325927020161,
0.687830087380796, 1.00060813485716), Upper2SE = c(1.80343593803135,
2.23379059183879, NA, NA, 6.73076549921599, 6.26011050593799,
5.92088357477536, 6.2958006506399, 6.77771765673194, 5.58216109831572
), Lower2SE = c(1.07842147896898, 1.57447447615237, NA, NA,
6.05745402190258, 5.89142126981455, 5.34343280509633, 5.70652301672087,
4.08142371419922, 1.65977720967563)), row.names = 22:31, class = "data.frame")
I've attempted to plot the dates with this code but have not been able to get them to show:
xxx <- barplot(ChiSum$meanlength, axisnames=TRUE, beside = TRUE, names.arg=c(ChiSum$date), col=ifelse(ChiSum$BACI=="Control",'white','gray'), ylim = c(-0.01,8), ylab = "Mean body length",
xlab="Date",xaxt="n")
arrows(xxx,ChiSum$Lower2SE,xxx,ChiSum$Upper2SE, code=3, length=0.02, angle = 90,
col=ifelse(ChiSum$BACI=="Control",'black','black'))
axis(side=1,at=XXX[c(1,20)], labels=format(ChiSum$date))
How about this:
bar_mids <- cumsum(c(.7, rep(1.2,9)))
par(mar=c(8,4,2,1)+.1)
xxx <- barplot(ChiSum$meanlength, axisnames=TRUE, beside = TRUE, names.arg=c(ChiSum$date), col=ifelse(ChiSum$BACI=="Control",'white','gray'), ylim = c(-0.01,8), ylab = "Mean body length",
xlab="",xaxt="n")
arrows(xxx,ChiSum$Lower2SE,xxx,ChiSum$Upper2SE, code=3, length=0.02, angle = 90,
col=ifelse(ChiSum$BACI=="Control",'black','black'))
axis(side=1,at=bar_mids, labels=format(ChiSum$date), las=2)
The key is to know where the mid-points of the bars are. The default settings are that the bars are one-unit wide with a space (before every bar) of 0.2. So the mid-point of the first bar is at 0.7. The mid-point of the second bar is 0.7 + 1.2, the mid-point of the third is 0.7 + 1.2*2, etc... These mid-points should be specified as at and then you can use the dates as the labels. To make them plot nicely, you can use las=2 to turn the labels perpendicular to the axis.
You could also do it with ggplot2:
ChiSum %>%
arrange(date) %>%
mutate(obs = 1:n()) %>%
ggplot(aes(x=obs, y=meanlengths, ymin = Lower2SE, ymax=Upper2SE, fill=BACI)) +
geom_bar(stat="identity", col="black") +
geom_errorbar(width=.15) +
scale_x_continuous(breaks=1:10, labels=ChiSum$date) +
theme_classic() +
theme(axis.text.x = element_text(angle=45, hjust=1),
legend.position = c(.15, .85)) +
labs(x="", y="Mean Length") +
scale_fill_manual(values=c("white", "gray50"))
I have been working on this for some time, and am re-posting this hoping to simplify the definition of the problem and to bring some clarity from feedback of my previous attempt. I am able to label each individual column value, but not able to put the code together necessary to sum the total. The examples I have looked at never work the way I try to put them together, for example with goup_by, or summarize etc.. I would like to only sum the values of "Confirmed Cases", and not show the other column values as with many c("x", "Y", ... "data"), it becomes impossible to read.
Here is the data frame:
dput(COVID1[1:12, ])
structure(list(COUNTY = c("Antrim", "Antrim", "Antrim", "Charlevoix",
"Charlevoix", "Grand Traverse", "Grand Traverse", "Grand Traverse",
"Antrim", "Grand Traverse", "Grand Traverse", "Grand Traverse"
), Date = structure(c(18453, 18456, 18457, 18453, 18455, 18453,
18456, 18457, 18455, 18453, 18456, 18457), class = "Date"), CASE_STATUS = c("Confirmed",
"Confirmed", "Confirmed", "Confirmed", "Confirmed", "Confirmed",
"Confirmed", "Confirmed", "Probable", "Probable", "Probable",
"Probable"), Cases = c(1L, 1L, 2L, 1L, 3L, 2L, 2L, 1L, 1L, 1L,
1L, 1L)), row.names = c(NA, 12L), class = "data.frame")
Code:
ggplot(filter(COVID1, COUNTY %in% c("Antrim", "Charlevoix", "Grand Traverse"), Cases > 0)) +
geom_col(aes(x = Date, y = Cases, fill = CASE_STATUS), position = position_stack(reverse = TRUE), width = .88)+
geom_text(aes(x = Date, y = Cases, label = (Cases)), position = position_stack(reverse = TRUE), vjust = 1.5, size = 3, color = "white") +
scale_fill_manual(values = c('blue',"tomato"))+
scale_x_date(labels = date_format("%m/%d"), limits = as.Date(c('2020-07-09','today()')), breaks = "1 week")+
theme(axis.text.x = element_text(angle=0))+
labs(title = "Antrim - Grand Traverse - Charlevoix")
I'm not sure if I understood the question but I think you want to add the sum of the confirmed cases as labels. There might be a ggplot way of doing it but I think the most straightforward way is to make another dataset with your labels and feed it in.
date_labels <- filter(COVID1, COUNTY %in% c("Antrim", "Charlevoix", "Grand Traverse"), Cases > 0) %>% group_by(Date) %>% summarise(confirmed_cases = sum(Cases[CASE_STATUS == "Confirmed"]))
ggplot(filter(COVID1, COUNTY %in% c("Antrim", "Charlevoix", "Grand Traverse"), Cases > 0)) +
geom_col(aes(x = Date, y = Cases, fill = CASE_STATUS), position = position_stack(reverse = TRUE), width = .88)+
geom_text(data = date_labels, aes(x = Date, y = 1, label = confirmed_cases), position = position_stack(reverse = TRUE), vjust = 1.5, size = 3, color = "white") +
scale_fill_manual(values = c('blue',"tomato"))+
scale_x_date(labels = label_date("%m/%d"), limits = as.Date(c('2020-07-09','today()')), breaks = "1 week")+
theme(axis.text.x = element_text(angle=0))+
labs(title = "Antrim - Grand Traverse - Charlevoix")
Gives me this result:
My code was working fine until I added another geom_hline() layer to my plot. Now I keep getting this error and I don't understand what I'm doing wrong. I want to have two horizontal lines both red but one either dotted or dashed at the yintercept provided.
Error in `[[<-.data.frame`(`*tmp*`, v, value = c(1, 2)) :
replacement has 2 rows, data has 1
Code:
box_whisk_graph<-ggplot(data = box_fresh_chloride, aes(x = factor(year), y = val)) +
geom_boxplot(coef=10) +
geom_hline(aes(yintercept = 230,color="red"),size=1.3)+
geom_hline(aes(yintercept = 860,color="red"),size=1.3)+
scale_color_manual("",
values = c("red"="red","red"="red"),
labels=c("Freshwater Aquatic Life (chronic)\nCriteria for chloride = 230 mg/L","Freshwater Aquatic Life Criteria (acute) for chloride = 860 mg/L"),
guide=guide_legend(override.aes=list(linetype=c(1,2), lwd=c(1,0.5))))
dput of data:
structure(list(orgid = c("USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ",
"USGS-NJ", "USGS-NJ"), locid = c("USGS-01482500", "USGS-0146453250",
"USGS-01392150", "USGS-01411035", "USGS-01411466", "USGS-01411444"
), stdate = structure(c(16394, 16610, 16328, 16583, 16602, 16602
), class = "Date"), sttime = c("09:45:00", "11:00:00", "10:40:00",
"09:45:00", "12:00:00", "10:30:00"), charnam = c("Chloride",
"Chloride", "Chloride", "Chloride", "Chloride", "Chloride"),
val = c(23.6, 221, 144, 10.8, 10.5, 5.76), valunit = c("mg/l",
"mg/l", "mg/l", "mg/l", "mg/l", "mg/l"), swqs = c("FW2-NT",
"FW2-NT", "FW2-NT", "PL", "FW2-NT", "PL"), region = c("Lower Delaware",
"Lower Delaware", "Northeast", "Atlantic Coast", "Lower Delaware",
"Atlantic Coast"), WMA = c(17L, 20L, 4L, 15L, 17L, 16L),
year = c(2014, 2015, 2014, 2015, 2015, 2015)), .Names = c("orgid",
"locid", "stdate", "sttime", "charnam", "val", "valunit", "swqs",
"region", "WMA", "year"), row.names = c(NA, 6L), class = "data.frame")