I am trying to create this
but in doing so R says the following:
In plot.xy(xy, type, ...) : NAs introduced by coercion`
I will attach my entire code below:
lnmass <- MoleRat$lnMass
lnenergy <- MoleRat$lnEnergy
caste <- MoleRat$caste
infrequent <- MoleRat[caste == "lazy", ]
frequent <- MoleRat[caste == "worker", ]
lm.infrequent <- lm(lnEnergy ~ lnMass, data = infrequent) #, subset=caste=="lazy")
lm.frequent <- lm(lnEnergy ~ lnMass, data = frequent)
plot(lnmass, lnenergy, pch = as.numeric(caste), col = as.numeric(caste))
abline(lm.infrequent)
abline(lm.frequent)
Here is my data:
dput(MoleRat)
structure(list(caste = c("worker", "worker", "worker", "worker",
"worker", "worker", "worker", "worker", "worker", "worker", "worker",
"worker", "worker", "worker", "worker", "worker", "worker", "worker",
"worker", "worker", "worker", "lazy", "lazy", "lazy", "lazy",
"lazy", "lazy", "lazy", "lazy", "lazy", "lazy", "lazy", "lazy",
"lazy", "lazy"), lnMass = c(3.850147602, 3.988984047, 4.110873864,
4.17438727, 4.248495242, 4.262679877, 4.343805422, 4.48863637,
4.510859507, 3.951243719, 3.988984047, 4.158883083, 4.234106505,
4.276666119, 4.248495242, 4.465908119, 4.532599493, 4.510859507,
4.828313737, 4.753590191, 4.875197323, 4.382026635, 4.543294782,
4.912654886, 4.700480366, 4.700480366, 4.762173935, 4.859812404,
5.056245805, 5.262690189, 5.147494477, 5.087596335, 4.997212274,
4.875197323, 4.812184355), lnEnergy = c(3.688879454, 3.688879454,
3.688879454, 3.663561646, 3.871201011, 3.850147602, 3.931825633,
3.688879454, 3.951243719, 4.110873864, 4.189654742, 4.143134726,
4.262679877, 4.248495242, 4.510859507, 4.394449155, 4.219507705,
4.48863637, 4.644390899, 5.017279837, 5.043425117, 3.828641396,
4.143134726, 3.555348061, 4.060443011, 4.094344562, 4.304065093,
4.094344562, 4.418840608, 4.234106505, 4.49980967, 4.574710979,
4.532599493, 4.615120517, 4.48863637)), class = "data.frame", row.names = c(NA,
-35L))
Why not use ggplot ?
ggplot(MoleRat, aes(lnmass,lnenergy, color=caste))+geom_point()+
geom_smooth(method='lm',se=FALSE)+ theme_minimal()
You can do it with plot:
plot(lnEnergy ~ lnMass, MoleRat)
points(lnEnergy ~ lnMass, infrequent, col = "red", pch = 19)
points(lnEnergy ~ lnMass, frequent , col = "red")
abline(lm.infrequent)
abline(lm.frequent)
or (easier) with ggplot:
library(ggplot2)
ggplot(MoleRat, aes(x = lnMass, y = lnEnergy, colour = caste)) +
geom_point(size = 2) +
geom_smooth(formula = y~x, method = "lm", se = FALSE) +
theme_classic() +
labs(x = "ln(body mass)",
y = "ln(daily energy expenditure)")
However, the image you posted is created by this other model:
lm(lnEnergy ~ lnMass + caste, data = MoleRat)
And based on that, that's the image you will get:
lm.graph <- lm(lnEnergy ~ lnMass + caste, data = MoleRat)
plot(lnEnergy ~ lnMass, MoleRat)
points(lnEnergy ~ lnMass, infrequent, col = "red", pch = 19)
points(lnEnergy ~ lnMass, frequent , col = "red")
lmcoef <- coef(lm.graph)
abline(a = lmcoef[1], b = lmcoef[2])
abline(a = lmcoef[1] + lmcoef[3], b = lmcoef[2])
And with ggplot:
MoleRat$prd <- predict(lm.graph, MoleRat)
ggplot(MoleRat, aes(x = lnMass, colour = caste)) +
geom_point(aes(y = lnEnergy), size = 2) +
geom_line(aes(y = prd), size = 1) +
theme_classic() +
labs(x = "ln(body mass)",
y = "ln(daily energy expenditure)")
Related
I create a line chart by plot() in R and I don't know how to add label at each line end. All I can find online is to by using ggplot() instead. Can anyone do that by plot()?
My table is "case_study1", and I assign each row to a vector as "V20xx" and plot it as a line chart by using plot(). (Since I just beginning using R so this is what I can do for plotting a line chart.)
V2014 <- as.numeric(case_study1[2,])
V2013 <- as.numeric(case_study1[3,])
V2012 <- as.numeric(case_study1[4,])
V2011 <- as.numeric(case_study1[5,])
V2010 <- as.numeric(case_study1[6,])
V2009 <- as.numeric(case_study1[7,])
V2008 <- as.numeric(case_study1[8,])
V2007 <- as.numeric(case_study1[9,])
plot(V2015, type = "o", col = "red", xlab = "Month", ylab = "Temperature", main = "Temperature chart")
lines(V2014, type = "o", col = "blue")
lines(V2013, type = "o", col = "yellow")
lines(V2012, type = "o", col = "chartreuse")
lines(V2011, type = "o", col = "chocolate")
lines(V2010, type = "o", col = "brown")
lines(V2009, type = "o", col = "burlywood")
lines(V2008, type = "o", col = "coral")
lines(V2007, type = "o", col = "cyan")
Sample data,
case_study1 <- structure(c(18.1265311110765, 18.1445806138217, 20.29444403667, 19.3943562610075, 18.4467029515654, 19.1106318701059, 18.913005082868, 20.1609526062384, 19.4320843210444, 19.3427841216326, 17.5637686550617, 21.8841708265245, 18.4573832275346, 16.7219386268407, 17.2237817412242, 18.5469768662006, 18.4829838024452, 20.7631137836725, 31.0647214353085, 29.5135084576905, 32.6102900002152, 29.3641985105351, 25.1434186547995, 27.994190084748, 27.0048756664619, 27.5333320870996, 31.3106615673751, 41.3828093148768, 40.9396879393607, 40.9130710875615, 37.9195794519037, 35.6223422279581, 39.8471474340186, 35.4306885786355, 38.9832761064172, 39.5527733126655, 50.0248330328614, 49.7703500119969, 51.4038161402568, 47.6908210013062, 44.733499577269, 49.0990697136149, 47.0381244253367, 50.6246182974428, 50.8727697608992, 48.1883749496192, 49.9094971902668, 51.4024006286636, 47.858812619932, 43.8532895939425, 51.0264792805538, 47.2120952093974, 50.0444536274299, 52.3428046917543, 59.7227923031896, 58.1244931928813, 60.2706275274977, 59.0147362891585, 52.6608879743144, 62.2188456263393, 57.7123434329405, 58.3746858304366, 62.6367503125221, 60.9077196735889, 56.8138210857287, 59.812376066111, 60.5437361020595, 51.2970413453877, 60.3391999211162, 57.3215152909979, 56.4301307220012, 62.8979858038947, 50.1909973882139, 45.9235293632373, 49.24725244008, 52.3605857389048, 42.8149003852159, 51.3142714761198, 47.4335271269083, 47.0058694463223, 52.0639638807625, 41.0728860516101, 36.0847078282386, 39.904410911724, 42.5762362144887, 32.67773448769, 42.9504046197981, 35.6693080635741, 37.4426508890465, 40.1027930378914), .Dim = 9:10, .Dimnames = list(Year = c("2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015"), Month = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")))
case_study1
# Month
# Year 1 2 3 4 5 6 7 8 9 10
# 2007 18.12653 19.34278 31.06472 41.38281 50.02483 48.18837 59.72279 60.90772 50.19100 41.07289
# 2008 18.14458 17.56377 29.51351 40.93969 49.77035 49.90950 58.12449 56.81382 45.92353 36.08471
# 2009 20.29444 21.88417 32.61029 40.91307 51.40382 51.40240 60.27063 59.81238 49.24725 39.90441
# 2010 19.39436 18.45738 29.36420 37.91958 47.69082 47.85881 59.01474 60.54374 52.36059 42.57624
# 2011 18.44670 16.72194 25.14342 35.62234 44.73350 43.85329 52.66089 51.29704 42.81490 32.67773
# 2012 19.11063 17.22378 27.99419 39.84715 49.09907 51.02648 62.21885 60.33920 51.31427 42.95040
# 2013 18.91301 18.54698 27.00488 35.43069 47.03812 47.21210 57.71234 57.32152 47.43353 35.66931
# 2014 20.16095 18.48298 27.53333 38.98328 50.62462 50.04445 58.37469 56.43013 47.00587 37.44265
# 2015 19.43208 20.76311 31.31066 39.55277 50.87277 52.34280 62.63675 62.89799 52.06396 40.10279
Similar plot, using matplot (to reduce your iteration):
cols <- c("red", "blue", "yellow", "chartreuse", "chocolate", "brown", "burlywood", "coral", "cyan")
matplot(
t(case_study1), type='b', pch=1, lty=1, col=cols,
xlim = c(1, ncol(case_study1) + 3),
xlab = "Month", ylab = "Temperature",
main = "Temperature chart")
legend("bottomright", legend = 2007:2015, col = cols, pch = 1)
To adapt this to your code using multiple calls to lines, add xlim= to your first plot call, then add the legend(..) call.
To do this in ggplot2 is not cosmic, but it should be melted into a long format.
melted <- reshape2::melt(case_study1)
ggplot(melted, aes(Month, value, color = factor(Year), group = Year)) +
geom_line() +
labs(title = "Temperature chart", x = "Month", y = "Temperature") +
scale_color_discrete(name = "Year") +
scale_x_continuous(breaks = seq(1, 10, by = 2))
I'm using the below code in order to plot two veriables on the same graph to compare them.
Some of them are in te AW group and the others are in the EWC group.
Data1 %>%
pivot_longer(
cols = -1,
names_to = c("try", "exp"),
names_pattern = "(.)(.)")%>%
ggplot(aes(x = exp, y = value, group = exp)) +
geom_point(aes (shape = exp, colour = exp))+
geom_smooth(alpha = 0, aes(colour = exp))+
stat_summary(fun.y = mean,
fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x),
aes(color=exp),
geom = "errorbar") +
scale_y_continuous(expand = c(0, 0), breaks = seq ( 0,700, by = 200), name = "AW (%)",
sec.axis = sec_axis(~. + 10, name = "EWC (%)"))
Data that I'm working with:
structure(list(Sample = c("AW DL", "AW", "AW ambient temp", "AW DL ambient temp",
"EWC DL", "EWC", "EWC DL ambient temp", "EWC ambient temp"),
A1 = c(418.181818181874, 288.888888888889, 319.999999999996,
173.333333333325, 80.701754385967, 74.2857142857143, 63.4146341463404,
76.190476190476), B1 = c(483.333333333305, 517.647058823533,
565.384615384621, 375.000000000032, 82.857142857142, 83.8095238095239,
78.947368421054, 84.9710982658961), C1 = c(606.06060606057,
542.10526315789, 496.551724137933, 587.500000000377, 85.8369098712439,
84.4262295081966, 85.4545454545534, 83.2369942196532), A2 = c(368.750000000047,
46.428571428571, 216.39344262295, 104.651162790703, 78.6666666666688,
31.7073170731706, 51.136363636365, 68.3937823834196), B2 = c(417.857142857153,
123.913043478263, 213.63636363636, 180.769230769273, 80.6896551724142,
55.3398058252432, 64.3835616438409, 68.1159420289851), C2 = c(283.928571428547,
169.230769230771, 271.428571428566, 95.2380952380727, 73.9534883720913,
62.8571428571431, 48.7804878048721, 73.0769230769227), A3 = c(564.10256410254,
194.285714285712, 314.999999999998, 362.162162162103, 84.9420849420844,
66.0194174757279, 78.3625730994124, 75.9036144578312), B3 = c(656.249999999929,
26.4705882352953, 263.492063492065, 443.243243243154, 86.776859504131,
20.9302325581403, 81.592039800992, 72.4890829694324), C3 = c(634.883720930251,
330.555555555559, 304.444444444446, 416.666666666644, 86.3924050632915,
76.7741935483873, 80.6451612903217, 75.2747252747254), A4 = c(260.00000000002,
96.3636363636384, 285.714285714287, 174.509803921584, 72.2222222222237,
49.0740740740746, 63.5714285714306, 74.0740740740742), B4 = c(196.721311475417,
41.4285714285729, 245.762711864405, 190.566037735832, 66.2983425414373,
29.29292929293, 65.5844155844135, 71.0784313725489), C4 = c(262.264150943415,
58.6206896551738, 194.444444444444, 214.634146341482, 72.3958333333348,
36.956521739131, 68.2170542635678, 66.0377358490565)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -8L))
I found some examples of an if statements, I am just stuck on getting them to work.
Thank you.
I'm using ggplot2 to make violin plots of module scores from Seurat, and am wanting to add statistics to it. I made the following violin plot, but I want to switch the violins around from reading "0" and "1" from left to right, to "1" and "0". (Thanks to #StupidWolf for the example!)
library(Seurat)
library(SeuratObject)
library(ggplot2)
library(ggpubr)
library(reshape2)
#add Seurat's module scores and create Seurat object from them =====================
ERlist <- list(c("CPB1", "RP11-53O19.1", "TFF1", "MB", "ANKRD30B",
"LINC00173", "DSCAM-AS1", "IGHG1", "SERPINA5", "ESR1",
"ILRP2", "IGLC3", "CA12", "RP11-64B16.2", "SLC7A2",
"AFF3", "IGFBP4", "GSTM3", "ANKRD30A", "GSTT1", "GSTM1",
"AC026806.2", "C19ORF33", "STC2", "HSPB8", "RPL29P11",
"FBP1", "AGR3", "TCEAL1", "CYP4B1", "SYT1", "COX6C",
"MT1E", "SYTL2", "THSD4", "IFI6", "K1AA1467", "SLC39A6",
"ABCD3", "SERPINA3", "DEGS2", "ERLIN2", "HEBP1", "BCL2",
"TCEAL3", "PPT1", "SLC7A8", "RP11-96D1.10", "H4C8",
"PI15", "PLPP5", "PLAAT4", "GALNT6", "IL6ST", "MYC",
"BST2", "RP11-658F2.8", "MRPS30", "MAPT", "AMFR", "TCEAL4",
"MED13L", "ISG15", "NDUFC2", "TIMP3", "RP13-39P12.3", "PARD68"))
tnbclist <- list(c("FABP7", "TSPAN8", "CYP4Z1", "HOXA10", "CLDN1",
"TMSB15A", "C10ORF10", "TRPV6", "HOXA9", "ATP13A4",
"GLYATL2", "RP11-48O20.4", "DYRK3", "MUCL1", "ID4", "FGFR2",
"SHOX2", "Z83851.1", "CD82", "COL6A1", "KRT23", "GCHFR",
"PRICKLE1", "GCNT2", "KHDRBS3", "SIPA1L2", "LMO4", "TFAP2B",
"SLC43A3", "FURIN", "ELF5", "C1ORF116", "ADD3", "EFNA3",
"EFCAB4A", "LTF", "LRRC31", "ARL4C", "GPNMB", "VIM",
"SDR16C5", "RHOV", "PXDC1", "MALL", "YAP1", "A2ML1",
"RP1-257A7.5", "RP11-353N4.6", "ZBTB18", "CTD-2314B22.3", "GALNT3",
"BCL11A", "CXADR", "SSFA2", "ADM", "GUCY1A3", "GSTP1",
"ADCK3", "SLC25A37", "SFRP1", "PRNP", "DEGS1", "RP11-110G21.2",
"AL589743.1", "ATF3", "SIVA1", "TACSTD2", "HEBP2"))
genes = c(unlist(c(ERlist,tnbclist)))
mat = matrix(rnbinom(500*length(genes),mu=500,size=1),ncol=500)
rownames(mat) = genes
colnames(mat) = paste0("cell",1:500)
sobj = CreateSeuratObject(mat)
sobj = NormalizeData(sobj)
sobj$ClusterName = factor(sample(0:1,ncol(sobj),replace=TRUE))
sobj = AddModuleScore(object = sobj, features = tnbclist,
name = "TNBC_List",ctrl=5)
sobj = AddModuleScore(object = sobj, features = ERlist,
name = "ER_List",ctrl=5)
sobjlists = FetchData(object = sobj, vars = c("ER_List1", "TNBC_List1", "ClusterName"))
#violin plot =======================================================
my_comparisons <- list( c("0", "1") )
ggplot(sobjlists,aes(x= ClusterName, y = ER_List1)) +
geom_violin(aes(fill=ClusterName)) +
geom_boxplot(width=0.1) + labs(y= "ER+ Signature", x = "ClusterName") + ggtitle(label = "Object") +
theme(plot.title = element_text(hjust = 0.5)) + stat_compare_means(comparisons = my_comparisons, method = "wilcox.test")+ # Add pairwise comparisons p-value
stat_compare_means(label.y = 0.75)
Using the solution from How to reorder plots in combined ggplot2 graph?, I can reorder the plots by adding
+ scale_x_discrete(limits = c("1", "0"))
to the very end of the code I have now. However, doing that, the "Wilcoxon, p = 0.13" disappears, and gives the following error:
ggplot(sobjlists,aes(x= ClusterName, y = ER_List1)) +
geom_violin(aes(fill=ClusterName)) +
geom_boxplot(width=0.1) + labs(y= "ER+ Signature", x = "ClusterName") + ggtitle(label = "Object") +
theme(plot.title = element_text(hjust = 0.5)) + stat_compare_means(comparisons = my_comparisons, method = "wilcox.test")+ # Add pairwise comparisons p-value
stat_compare_means(label.y = 0.75) + scale_x_discrete(limits = c("1", "0"))
Warning messages:
1: Unknown or uninitialised column: `p`.
2: Computation failed in `stat_compare_means()`:
argument "x" is missing, with no default
For a TL;DR, how do I keep the top statistic in and reorder the violins?
Thanks for reading!
Here is what worked for me (From https://www.datanovia.com/en/blog/how-to-change-ggplot-legend-order/)
sobjlists$ClusterName <- factor(sobjlists$ClusterName, levels = c("1", "0"))
I keep getting an error becasue the bargraphs are used for quaterly data and the line is going to be data from the entire year so it will have many points.
The only issue is with the geom_line function which I am new to using. The error is -->
Error in scale_fill_manual(values = c("green", "yellow")) + geom_line(aes(x = dts2, : non-numeric argument to binary operator
t="DG"
fin=getFinancials(t, auto.assign = F)
dts = labels(fin$BS$A)[[2]]
dts2 = paste(substr(dts,1,7),"::",dts, sep="")
stockprices = getSymbols(t, auto.assign = F)
price = rep(0,NROW(dts))
for(i in 1:NROW(price))
{
price[i]=as.vector(last(stockprices[dts2[i],6]))
}
yr= as.numeric(substr(dts,1,4))
pastyr = yr -2
betayr = paste(pastyr,"::",yr,sep="")
os = fin$BS$A["Total Common Shares Outstanding", ]
gw= fin$BS$A["Goodwill, Net", ]
ta= fin$BS$A["Total Assets", ]
td= fin$BS$A["Total Debt", ]
ni= fin$IS$A["Net Income", ]
btax = fin$IS$A["Income Before Tax", ]
atax = fin$IS$A["Income After Tax",]
intpaid = fin$CF$A["Cash Interest Paid, Supplemental",]
gw[is.na(gw)]=0
intpaid[is.na(intpaid)]=0
taa = (ta - gw)/os
Rd = rep(0,NROW(dts))
for(i in 1:NROW(dts))
{
if(td[i]!=0)
{
Rd[i] = intpaid[i]/td[i]
}
}
gspc = getSymbols("^GSPC", auto.assign = F)
gs5 = getSymbols("GS5", src = "FRED", auto.assign = F)
marketRisk = rep(0,NROW(dts))
riskFree = rep(0,NROW(dts))
beta = rep(0,NROW(dts))
for(i in 1:NROW(dts))
{
marketRisk[i]= mean(yearlyReturn(gspc[betayr[i]]))
riskFree[i] = mean(gs5[betayr[i]])
gspc.weekly = weeklyReturn(gspc[betayr[i]])
stockprices.weekly = weeklyReturn(stockprices[betayr[i]])
beta[i] = CAPM.beta(stockprices.weekly,gspc.weekly)
}
Re = (riskFree/100) + beta * (marketRisk-(riskFree/100))
E = os*price
V=E+td
Tc = (btax - atax)/btax
wacc = (E/V)*Re + (td/V)*Rd*(1-Tc)
margin = (ni/wacc)/os - taa
taadf = data.frame(dts,val = taa,cat="ta")
margindf = data.frame(dts,val = margin ,cat="margin")
mdf=rbind(margindf,taadf)
#linrng = paste(dts[NROW(dts)],"::",dts[1],sep="")
#dfdt = data.frame(stockprices[linrng,6])
#dfdt2 = data.frame(dt = labels(dfdt)[[1]],dfdt$AAPL.Adjusted,cat="taa")
#names(dfdt2)=c("dt,price,cat")
pricedf = data.frame(as.vector((stockprices[dts2[i],6])))
ggplot(mdf, aes(x=dts,y=val,fill=cat)) + geom_bar(stat="identity",color="black")
scale_fill_manual(values = c("green","yellow")) +
geom_line(aes(x=dts2, y=stockprices), stat = "identity",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE)
Note, the object stockprices is An ‘xts’ object. So, you can't use inside ggplot scale. I picked the fist variable of stockprices object to show the code, but you probabli want another one.
library(dplyr)
library(quantmod)
library(PerformanceAnalytics)
library(ggplot2)
stockprices_df <- as.data.frame(stockprices) %>%
mutate(date = rownames(.)) %>%
filter(date %in% dts)
ggplot() +
geom_col(
data = mdf,
aes(x = dts,y = val,fill = cat)
) +
geom_line(
data = stockprices_df,
aes(x = date, y = DG.Open, group = 1 )
) +
scale_fill_manual(values = c("green","yellow"))
[
I have a data frame that comes from a lm subset composed of the intercept (ordenada) and the slope (velocidad1) calculated for each subject.
A
UT1 UT2 UT3 UT4
ordenada 1213.8 2634.8 3.760000e+02 -11080.8
velocidad1 1.5 -2.5 6.615954e-14 20.0
UT5 UT6 UT7
ordenada 1711.8 1.739000e+03 1.800000e+01
velocidad1 -2.5 5.039544e-14 -9.154345e-16
UT8 UT9 UT10 UT11 UT12
ordenada 5659.2 -2791 3422.6 418.2 2802.2
velocidad1 -6.0 5 -1.0 -0.5 -1.5
UT13 UT14 TR1 TR2
ordenada 2.832000e+03 -411.2 -15722.0 -1105.4
velocidad1 1.405114e-13 3.5 25.5 25.0
TR3 TR4 TR5 TR6
ordenada 1.14600e+03 299.6 1943.4 6.840000e+02
velocidad1 -5.11402e-14 2.0 -2.5 6.479414e-14
TR7 TR8 TR9 TR10
ordenada 354.8 1.317000e+03 33284.6 -3742.6
velocidad1 1.0 -3.475548e-14 -52.0 8.0
TR11 TR12 TR13
ordenada 7.400000e+02 2205.4 -4542.6
velocidad1 -8.018585e-14 -2.5 8.0
TR14
ordenada 5.880000e+02
velocidad1 -4.406498e-14
dput(A)
structure(list(UT1 = c(1213.79999999971, 1.50000000000047), UT2 = c(2634.80000000021,
-2.50000000000033), UT3 = c(375.999999999959, 6.61595351840473e-14
), UT4 = c(-11080.8000000008, 20.0000000000013), UT5 = c(1711.80000000007,
-2.50000000000012), UT6 = c(1738.99999999997, 5.03954433109254e-14
), UT7 = c(18.0000000000006, -9.15434469010036e-16), UT8 = c(5659.20000000026,
-6.00000000000041), UT9 = c(-2791.00000000024, 5.00000000000039
), UT10 = c(3422.59999999968, -0.99999999999948), UT11 = c(418.199999999958,
-0.499999999999932), UT12 = c(2802.20000000017, -1.50000000000028
), UT13 = c(2831.99999999991, 1.40511433073812e-13), UT14 = c(-411.200000000294,
3.50000000000048), TR1 = c(-15722.0000000017, 25.5000000000028
), TR2 = c(-1105.40000000264, 25.0000000000043), TR3 = c(1146.00000000003,
-5.11402035568996e-14), TR4 = c(299.599999999803, 2.00000000000032
), TR5 = c(1943.40000000013, -2.50000000000021), TR6 = c(683.99999999996,
6.47941413997612e-14), TR7 = c(354.800000000011, 0.999999999999982
), TR8 = c(1317.00000000002, -3.47554781454658e-14), TR9 = c(33284.6000000025,
-52.000000000004), TR10 = c(-3742.60000000058, 8.00000000000094
), TR11 = c(740.00000000005, -8.0185853149896e-14), TR12 = c(2205.40000000021,
-2.50000000000034), TR13 = c(-4542.60000000042, 8.00000000000067
), TR14 = c(588.000000000027, -4.40649812201441e-14)), .Names = c("UT1",
"UT2", "UT3", "UT4", "UT5", "UT6", "UT7", "UT8", "UT9", "UT10",
"UT11", "UT12", "UT13", "UT14", "TR1", "TR2", "TR3", "TR4", "TR5",
"TR6", "TR7", "TR8", "TR9", "TR10", "TR11", "TR12", "TR13", "TR14"
), row.names = c("ordenada", "velocidad1"), class = "data.frame")
My goal is to get a barplot of the data in second row ( A[2,] ) splitting by group (UT which contains UT1,UT2... and TR) in the same graph. I am trying to do some ggplot but keep failing over and over again. I get no layers in plot error or margin error in base graphics.
The output should look like this
I KNOW the answer is in the reshape package but I wish there's another way to do that.
Thank you in advance.
Using base graphics:
# convert the one-row data frame to a two-row matrix
m <- matrix(unlist(df[2, ]), nrow = 2, byrow = TRUE)
# plot
barplot(m, beside = TRUE, col = c("blue", "red"), names.arg = seq_len(ncol(m)))
Possibly add a legend:
legend("topright", legend = c("UT", "TR"), fill = c("blue", "red"))
EDIT: Not using reshape per request in comments
library(ggplot2)
plot_data <- data.frame(names(A), t(A[2,]))
names(plot_data) <- c("variable", "value")
plot_data$group <- grepl("^TR", plot_data$variable)
plot_data$variable <- gsub("[^0-9]", "", as.character(plot_data$variable))
plot_data$variable <- factor(plot_data$variable,
unique(sort(as.numeric(plot_data$variable))))
p <- ggplot(aes(y = value, x = variable, fill = group), data = plot_data)
p + geom_bar(stat = "identity", position = "dodge")
Here is another option that incorporates your complete dataset. Not sure if this is usefull for you.
I've used reshape2, it's actually easier. You just have to melt(yourdataframe), for your particular case there is no need to specify anything else in the melt function arguments.
require("ggplot2")
require("reshape2")
A <- df
df1 <- melt(df[1,])
df1$origen <- "ORDENADA"
df2 <- melt(df[2,])
df2$origen <- "VELOCIDAD"
identical(df1$variable,df2$variable)
df3 <- rbind(df1,df2)
df3$group <- ifelse(grepl("^TR", df3$variable) == TRUE, "TR", "UT")
df3$vble <- gsub("[^0-9]", "", as.character(df3$variable))
df3$vble <- factor(df3$vble, levels = as.numeric(unique(df3$vble)))
ggplot(aes(y = value, x = vble, fill = group), data = df3) +
geom_bar(stat = "identity", position = "dodge") +
facet_grid(origen ~ ., scales = "free")
Using Functions
prepare <- function(data){
data1 <- melt(data[1,])
data1$origen <- "ORDENADA"
data2 <- melt(data[2,])
data2$origen <- "VELOCIDAD"
identical(data1$variable,data2$variable)
data3 <- rbind(data1,data2)
data3$group <- ifelse(grepl("^TR", data3$variable) == TRUE, "TR", "UT")
data3$vble <- gsub("[^0-9]", "", as.character(data3$variable))
data3$vble <- factor(data3$vble, levels = as.numeric(unique(data3$vble)))
return(data3)
}
prepare(df)
#This would work, but is a bit manual for many plots:
ggplot(aes(y = value, x = vble, fill = group), data = prepare(df)) +
geom_bar(stat = "identity", position = "dodge") +
facet_grid(origen ~ ., scales = "free")
plot_fun <- function(data){
p <- ggplot(data, aes_string(x = "vble", y = "value", fill = "group"))
p <- p + geom_bar(stat = "identity", position = "dodge")
p <- p + facet_grid(origen ~ ., scales = "free")
suppressWarnings(print(p))
}
plot_fun(prepare(df))
I guess you could loop in order to plot several data frames using the same plot function.
I guess you could probably addapt it more to your needs, but this can get you started