How to plot data with bounds - r

I have data such as this:
yr X lower upper
1 2004 0.2852 0.3927 0.1888
2 2005 0.3710 0.2385 0.5093
3 2006 0.3297 0.2177 0.4557
4 2007 0.2230 0.1424 0.3138
5 2008 0.3028 0.1952 0.4237
6 2009 0.3906 0.2798 0.5226
7 2010 0.3382 0.2343 0.4467
Here is some reproducible data:
dt <- structure(list(yr = 2004:2010, X = c(0.2852, 0.371, 0.3297, 0.223, 0.3028, 0.3906, 0.3382), lower = c(0.3927, 0.2385, 0.2177, 0.1424, 0.1952, 0.2798, 0.2343), upper = c(0.1888, 0.5093, 0.4557, 0.3138, 0.4237, 0.5226, 0.4467)), .Names = c("yr", "X", "lower", "upper"), class = "data.frame", row.names = c(NA, -7L))
I would like to plot this, and the results will go in a presentation, so I would like to make it look as nice as possible - I'm sorry to use the subjective "nice" but I don't know how else to say it ! I have tried this:
library(ggplot2)
ggplot(dt, aes(x=yr, y=X, group=1)) +
geom_line() +
geom_errorbar(width=.1, aes(ymin=lower, ymax=upper)) +
geom_point(shape=21, size=3, fill="blue") +
ylim(0,0.6)
But I don't like the results - it just seems to plain and boring:

You could use a ribbon instead of the errorbars
dt <- structure(list(yr = 2004:2010,
X = c(0.2852, 0.371, 0.3297, 0.223, 0.3028, 0.3906, 0.3382),
lower = c(0.3927, 0.2385, 0.2177, 0.1424, 0.1952, 0.2798, 0.2343),
upper = c(0.1888, 0.5093, 0.4557, 0.3138, 0.4237, 0.5226, 0.4467)),
.Names = c("yr", "X", "lower", "upper"), class = "data.frame",
row.names = c(NA, -7L))
library(ggplot2)
ggplot(dt, aes(x=yr, y=X, group=1, ymin = lower, ymax = upper)) +
geom_ribbon(alpha = 0.2) +
geom_line() +
geom_point(shape=21, size=3, fill="blue") +
ylim(0,0.6)

Related

Scale/Position R ggplot2 visualization: don't know what package to use

I had an idea for a visualization, that includes generating a plot for each row in my dataset (58 rows), showing the relative position of the value that i select, in a scale (e.g.: 58 cities and the position of the population size of one city relative to others).
Here's a code sample showing my data structure (nregs the name of regions I'm studying). I want to create a 'rank plot' as I've showed for each row, one plot ranking based in total_pop and other based in urban_pop.
structure(list(nregs = c("1.1 Javari e Interbacias Javari - Juruá",
"1.2 Transf. da Margem Esquerda do Solimões", "1.3 Juruá e Interbacias Juruá - Jutaí",
"1.4 Purus e Interbacias Purus - Juruá", "1.5 Negro", "1.6 Madeira e Interbacias Madeira - Purus",
"1.7 Estaduais Margem Esquerda do Amazonas", "1.8 Tapajós e Interbacias Tapajós - Madeira",
"1.9 Estaduais PA", "1.10 Xingu e Interbacias Xingu - Tapajós"
), urban_pop = c(63777, 83237, 265725, 717181, 2122424, 1693933,
837519, 1169865, 171045, 515124), total_pop = c(111120, 141473,
405955, 910484, 2357696, 2320307, 933181, 1639624, 304181, 831595
)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"
))
As english is not my native language, i'm finding it difficult to even search a solution online. I usually do my dataviz with R and tidyverse. Can anybody give me at least a direction? Thanks in advance.
It sounds like you're looking for something like this:
library(ggplot2)
library(dplyr)
df %>%
mutate(urban_pop = rank(urban_pop),
total_pop = rank(total_pop)) %>%
tidyr::pivot_longer(-1) %>%
ggplot(aes(value, nregs)) +
geom_segment(aes(x = 1, y = nregs, xend = 10, yend = nregs)) +
geom_segment(data = expand.grid(x = seq(nrow(df)), y = seq(nrow(df)) - 0.1),
aes(x = x, y = y, xend = x, yend = y + 0.2)) +
scale_x_continuous(breaks = seq(nrow(df)), labels = rev(seq(nrow(df))),
name = "Rank") +
geom_point(aes(color = name), position = position_dodge(width = 0.5),
size = 4) +
scale_color_manual(values = c("red", "forestgreen")) +
theme_void() +
theme(axis.text.y = element_text(hjust = 1),
axis.text.x = element_text(),
axis.title.x = element_text(size = 16))
Note that the ranks of urban and total population appear to be the same for each city in your sample

ggplot2 passing vectors into scale_alpha_manual() and scale_size_manual() is not working

I am trying to customize colors, size, and alpha of a ggplot with lines using data from a data frame. Specifically I want to highlight an average line superimposed over the other lines. I can get the colors to work, but neither alpha nor size parameters. The structure and code are below.
Appreciate any advice!
library(ggplot2)
library(reshape2)
df <- structure(list(V20 = c(0.021331, 0.034844, 0.023003, -0.001772,
-0.028383, -0.016064, 0, 0.016824, 0.045668, 0.029395), V21 = c(0.088426,
0.096454, 0.064565, 0.03234, 0.03058, 0.00994800000000001, 0,
0, 0, 0), V22 = c(0.091629, 0.061091, 0.039268, 0.00933999999999996,
0.005019, 0.000396000000000007, 0.009496, -0.013611, -0.020616,
-0.010839), avg = c(0.0130733014285714, -0.00393914761904762,
-0.00299403809523809, 0.00435276190476191, 0.00829649999999999,
0.00264482857142857, -0.0120784133333333, -0.00930774761904762,
0.00377989047619047, 0.00355469323809524), time = 1:10), row.names = c(NA,
10L), class = "data.frame")
colors.df <- data.frame(key=c(as.character(colnames(df)[1:length(colnames(df))-1])),
value=rep("gray",length(colnames(df))-1),
alpha=rep(0.2,length(colnames(df))-1),
size=rep(1.0,length(colnames(df))-1),
stringsAsFactors=FALSE)
colors.df[which(colors.df$key=='avg'),]$value <- "red"
colors.df[which(colors.df$key=='avg'),]$alpha <- 1.0
colors.df[which(colors.df$key=='avg'),]$size <- 3.0
df.melt <- melt(df, id='time', variable.name = "spike", value.name = "voltage")
ggplot(df.melt, aes(x=time, y=voltage, colour=spike)) +
geom_line() +
scale_alpha_manual(values = colors.df[1:length(colors.df$value),]$alpha) +
scale_color_manual(values = colors.df[1:length(colors.df$value),]$value) +
scale_size_manual(values= colors.df[1:length(colors.df$value),]$size)
Thanks #PoGibas for the hint!
ggplot(df.melt, aes(x=time, y=voltage, colour=spike, alpha=spike, size=spike))+
geom_line() +
scale_alpha_manual(values = colors.df[1:length(colors.df$value),]$alpha) +
scale_color_manual(values = colors.df[1:length(colors.df$value),]$value) +
scale_size_manual(values= colors.df[1:length(colors.df$value),]$size)

Overlapping density plots in a ggtern ternary diagram

I have two datasets looking like the following:
#dataset 1
structure(list(dataset1 = 1:86, x = c(24.22055, 24.61821, 24.60858,
24.5963, 24.66904, 24.682, 24.74323, 24.84038, 25.02606, 25.00763,
24.99861, 25.00901, 24.99273, 24.98789, 24.99308, 24.97615, 24.9572,
24.95962, 24.93451, 25.08111, 24.97653, 24.92734, 24.96208, 25.03111,
25.00242, 24.95385, 24.99345, 25.03311, 24.93516, 24.95163, 24.94859,
25.07071, 25.15814, 25.22433, 25.3163, 25.22823, 25.34902, 25.4118,
25.40885, 25.35868, 25.34709, 25.24046, 25.31097, 25.32868, 25.41141,
24.92474, 24.90951, 24.9927, 25.0052, 24.94954, 25.15449, 25.10164,
25.03112, 24.97345, 25.03352, 25.11059, 25.05391, 25.05766, 25.06176,
25.17039, 25.17868, 25.1053, 25.0568, 25.08028, 25.137, 25.36559,
25.06363, 25.26306, 25.16708, 25.14826, 25.06046, 24.99418, 25.19738,
25.20072, 25.24073, 25.18705, 25.18142, 25.16747, 25.1235, 25.38767,
25.37099, 25.30558, 25.35074, 25.33528, 25.32482, 25.32328),
y = c(22.25462, 21.88752, 21.89172, 21.88356, 21.86319, 21.80782,
21.7451, 21.70914, 21.68861, 21.66829, 21.67942, 21.67475,
21.67994, 21.67462, 21.67405, 21.67494, 21.66842, 21.65091,
21.6657, 21.68427, 21.66878, 21.6711, 21.66772, 21.63123,
21.64916, 21.65174, 21.65686, 21.63292, 21.64039, 21.53591,
21.64633, 21.62177, 21.61304, 21.60609, 21.594, 21.60413,
21.59069, 21.58264, 21.58277, 21.57736, 21.57457, 21.57674,
21.56562, 21.49258, 21.48584, 21.74852, 21.73081, 21.75594,
21.66646, 21.70782, 21.67075, 21.66456, 21.64514, 21.65763,
21.66863, 21.64658, 21.63672, 21.62677, 21.65441, 21.61994,
21.61754, 21.65159, 21.62676, 21.61157, 21.60181, 21.65121,
21.61303, 21.61424, 21.61419, 21.6258, 21.59797, 21.61477,
21.5879, 21.58918, 21.61834, 21.56725, 21.61358, 21.61456,
21.57619, 21.592, 21.58095, 21.52847, 21.57284, 21.56755,
21.56847, 21.49455), z = c(53.52483, 53.49427, 53.49971,
53.52014, 53.46777, 53.51018, 53.51168, 53.45048, 53.28533,
53.32408, 53.32197, 53.31623, 53.32733, 53.33749, 53.33287,
53.34891, 53.37439, 53.38947, 53.39978, 53.23462, 53.35469,
53.40156, 53.3702, 53.33767, 53.34843, 53.39441, 53.34969,
53.33398, 53.42445, 53.51247, 53.40507, 53.30752, 53.22882,
53.16958, 53.0897, 53.16764, 53.06029, 53.00556, 53.00838,
53.06396, 53.07834, 53.1828, 53.12341, 53.17874, 53.10275,
53.32674, 53.35968, 53.25136, 53.32834, 53.34264, 53.17476,
53.2338, 53.32374, 53.36892, 53.29785, 53.24283, 53.30937,
53.31556, 53.28384, 53.20967, 53.20378, 53.24311, 53.31644,
53.30816, 53.26118, 52.9832, 53.32334, 53.1227, 53.21872,
53.22594, 53.34158, 53.39105, 53.21472, 53.2101, 53.14093,
53.2457, 53.205, 53.21797, 53.30031, 53.02033, 53.04806,
53.16595, 53.07643, 53.09717, 53.10672, 53.18217)), class = "data.frame", row.names = c(NA,
-86L))
#dataset2
structure(list(dataset2 = 1:16, x1 = c(24.702, 24.64061, 24.64624,
24.699, 24.68064, 24.65854, 24.75148, 24.58633, 24.73463, 24.59992,
24.65293, 24.60753, 24.62394, 25.3416, 24.71006, 24.67719), y1 = c(21.87799,
21.89606, 21.9034, 21.8859, 21.89083, 21.90291, 21.8491, 21.93269,
21.87262, 21.87465, 21.90029, 21.87801, 21.87661, 21.64635, 21.83719,
21.90565), z1 = c(53.42002, 53.46333, 53.45036, 53.4151, 53.42853,
53.43855, 53.39942, 53.48098, 53.39274, 53.52543, 53.44677, 53.51446,
53.49945, 53.01205, 53.45276, 53.41716)), class = "data.frame", row.names = c(NA,
-16L))
I have written a code to plot kernel density contours according to the ggtern package.
# density plot for dataset 1
plot1 <- ggtern(data = test,aes(x=x, y=y, z=z))
plot1+ stat_density_tern(geom="polygon",
aes(fill = ..level..,
alpha = ..level..)) +
theme_rgbw() +
labs(title = "Example Density/Contour Plot") +
scale_fill_gradient(low = "lightblue",high = "blue") +
guides(color = "none", fill = "none", alpha = "none")+
scale_T_continuous (limits = c(0.225,0.215))+
scale_L_continuous (limits= c(0.255,0.245))+
scale_R_continuous (limits = c(0.53,0.54))
# density plot for dataset 2
plot2 <- ggtern(data = test2,aes(x=x1, y=y1, z=z1))
plot2 + stat_density_tern(geom="polygon",
aes(fill = ..level..,
alpha = ..level..)) +
theme_rgbw() +
labs(title = "Example Density/Contour Plot") +
scale_fill_gradient(low = "lightgreen",high = "green") +
guides(color = "none", fill = "none", alpha = "none")+
scale_T_continuous (limits = c(0.225,0.215))+
scale_L_continuous (limits= c(0.255,0.245))+
scale_R_continuous (limits = c(0.53,0.54))
The next step I would like to do is to overlap plot1 with plot2. I was wondering if anyone knows how to achieve this. Thanks.
The easiest way to handle this is to add a column to both dataframes identifying the source of the data and then to combine then into 1 large data frame.
Then in the mapping definition define the "group" parameter.
#Add column to identify the data source
test1$id <- "Test1"
test2$id <- "Test2"
test2$z <- test2$z+0.2
test2$y <- test2$y+0.2
#combine both datasets into 1
names(test2)<-names(test1)
totalTest <- rbind(test1, test2)
#plot and group by the new ID column
plot1 <- ggtern(data = totalTest, aes(x=x, y=y, z=z, group=id, fill=id))
plot1+ stat_density_tern(geom="polygon",
aes(fill = ..level..,
alpha = ..level..)) +
theme_rgbw() +
labs(title = "Example Density/Contour Plot") +
scale_fill_gradient(low = "lightblue",high = "blue") +
guides(color = "none", fill = "none", alpha = "none") +
scale_T_continuous (limits = c(0.225,0.215))+
scale_L_continuous (limits= c(0.255,0.245))+
scale_R_continuous (limits = c(0.53,0.54))

Plotting 2 Different ggplot2 charts with the Same y-axis

I'd like to plot 2 different charts side by side with a common y-axis using ggplot2:
p1) an "area rectangle" graph using geom_rect()
p2) a density plot using geom_density()
I've come close by using grid.arrange(), but the y-axes don't quite line up. I don't think facet_wrap or facet_grid will work here but I could be wrong.
rm(list=ls())
library(ggplot2)
library(gridExtra)
library(dplyr)
df<-structure(list(total_ulr = c(0.442, 0.679, 0.74, 0.773, 0.777,
0.8036, 0.87, 0.871, 0.895, 0.986, 1.003, 1.2054, 1.546, 1.6072
), width = c(4222L, 14335L, 2572L, 2460L, 1568L, 8143L, 3250L,
17119L, 3740L, 3060L, 2738L, 1L, 1L, 790L), w = c(4222L, 18557L,
21129L, 23589L, 25157L, 33300L, 36550L, 53669L, 57409L, 60469L,
63207L, 63208L, 63209L, 63999L), wm = c(0L, 4222L, 18557L, 21129L,
23589L, 25157L, 33300L, 36550L, 53669L, 57409L, 60469L, 63207L,
63208L, 63209L), wt = c(2111, 11389.5, 19843, 22359, 24373, 29228.5,
34925, 45109.5, 55539, 58939, 61838, 63207.5, 63208.5, 63604),
mainbuckets = c(" 4,222", "14,335", " 2,572", " 2,460", " 1,568",
" 8,143", " 3,250", "17,119", " 3,740", " 3,060", " 2,738",
"", "", " 790"), mainbucketsULR = c("0.44", "0.68", "0.74",
"0.77", "0.78", "0.80", "0.87", "0.87", "0.90", "0.99", "1.00",
"", "", "1.61")), .Names = c("total_ulr", "width", "w", "wm",
"wt", "mainbuckets", "mainbucketsULR"), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -14L))
textsize<-4
p1<-
ggplot(df, aes(ymin=0)) +
geom_rect(aes(xmin = wm, xmax = w, ymax = total_ulr, fill = total_ulr)) +
scale_x_reverse() +
geom_text(aes(x = wt, y = total_ulr+0.02, label = mainbuckets),size=textsize,color="black") +
geom_text(aes(x = wt, y = 0.02, label = mainbucketsULR),size=textsize+1,color="white",hjust=0,angle=90) +
xlab("Frequency") +
ylab("Ratio") +
ggtitle(paste("My Title")) +
theme_bw() +
theme(legend.position = "none"
,axis.text.x=element_blank())
p2<-ggplot(df, aes(total_ulr,fill=width,ymin=0)) + geom_density(color="grey",fill="grey") +
ggtitle("Density") +
xlab("") +
ylab("") +
theme_bw() +
coord_flip()+scale_y_reverse() +
theme(text=element_text(size=10)
,axis.text.x=element_blank()
,legend.position="none"
#,axis.text.y=element_blank()
)
grid.arrange(p2.common.y,p1.common.y,ncol=2,widths=c(1,5))
Here you go. You need to define a common axis with the scale_y_* functions, then set the plot heights to match.
limits <- c(0, 2)
breaks <- seq(limits[1], limits[2], by=.5)
# assign common axis to both plots
p1.common.y <- p1 + scale_y_continuous(limits=limits, breaks=breaks)
p2.common.y <- p2 + scale_x_continuous(limits=limits, breaks=breaks)
# At this point, they have the same axis, but the axis lengths are unequal, so ...
# build the plots
p1.common.y <- ggplot_gtable(ggplot_build(p1.common.y))
p2.common.y <- ggplot_gtable(ggplot_build(p2.common.y))
# copy the plot height from p1 to p2
p2.common.y$heights <- p1.common.y$heights
grid.arrange(p2.common.y,p1.common.y,ncol=2,widths=c(1,5))

ggplot2 plot table as lines

I would like to plot the following dataset
structure(list(X = structure(c(3L, 12L, 11L, 7L, 13L, 2L, 1L,
10L, 5L, 4L, 8L, 14L, 9L, 6L), .Label = c("BUM", "DDR", "ETB",
"EXP", "HED", "HEDOS", "KON", "LEIT", "MAIN", "MAT", "PER", "PMA",
"TRA", "TRADITION"), class = "factor"), Geschaeft = c(0.0468431771894094,
0.0916666666666667, 0.0654761904761905, 0.0905432595573441, 0.0761904761904762,
0.0672097759674134, 0.0869565217391304, 0.0650887573964497, 0.0762250453720508,
0.0518234165067179, 0.0561330561330561, 0.060077519379845, 0.0865384615384615,
0.0628683693516699), Gaststaette = c(0.0855397148676171, 0.0604166666666667,
0.0555555555555556, 0.0764587525150905, 0.0895238095238095, 0.0712830957230143,
0.075098814229249, 0.0631163708086785, 0.0780399274047187, 0.0383877159309021,
0.0561330561330561, 0.0581395348837209, 0.0596153846153846, 0.0648330058939096
), Bank = c(0.065173116089613, 0.0854166666666667, 0.0972222222222222,
0.0824949698189135, 0.060952380952381, 0.0529531568228106, 0.0731225296442688,
0.0828402366863905, 0.0725952813067151, 0.0806142034548944, 0.0686070686070686,
0.0503875968992248, 0.0807692307692308, 0.0550098231827112),
Hausarzt = c(0.0712830957230143, 0.0833333333333333, 0.0912698412698413,
0.0704225352112676, 0.0628571428571429, 0.0672097759674134,
0.106719367588933, 0.0710059171597633, 0.108892921960073,
0.0940499040307102, 0.0852390852390852, 0.0794573643410853,
0.0826923076923077, 0.110019646365422), Einr..F..Aeltere = c(0.10183299389002,
0.104166666666667, 0.107142857142857, 0.100603621730382,
0.12, 0.116089613034623, 0.112648221343874, 0.112426035502959,
0.121597096188748, 0.0998080614203455, 0.118503118503119,
0.131782945736434, 0.121153846153846, 0.104125736738703),
Park = c(0.0855397148676171, 0.0666666666666667, 0.0912698412698413,
0.0804828973843058, 0.0704761904761905, 0.0672097759674134,
0.0731225296442688, 0.0670611439842209, 0.0834845735027223,
0.0806142034548944, 0.0686070686070686, 0.0658914728682171,
0.0884615384615385, 0.0609037328094303), Sportstaette = c(0.0855397148676171,
0.0791666666666667, 0.0952380952380952, 0.0824949698189135,
0.0933333333333333, 0.114052953156823, 0.0810276679841897,
0.0788954635108481, 0.0780399274047187, 0.0825335892514395,
0.0831600831600832, 0.0852713178294574, 0.0884615384615385,
0.1237721021611), OEPNV = c(0.0529531568228106, 0.05625,
0.0456349206349206, 0.0583501006036217, 0.0666666666666667,
0.0366598778004073, 0.0434782608695652, 0.0571992110453649,
0.0344827586206897, 0.0633397312859885, 0.0478170478170478,
0.062015503875969, 0.0519230769230769, 0.0235756385068762
), Mangel.an.Gruenflaechen = c(0.0692464358452139, 0.0645833333333333,
0.0694444444444444, 0.0422535211267606, 0.0666666666666667,
0.0692464358452139, 0.0711462450592885, 0.0749506903353057,
0.0598911070780399, 0.0959692898272553, 0.0623700623700624,
0.0717054263565891, 0.0653846153846154, 0.0746561886051081
), Kriminalitaet = c(0.0672097759674134, 0.0541666666666667,
0.0476190476190476, 0.0422535211267606, 0.0628571428571429,
0.0509164969450102, 0.0454545454545455, 0.0532544378698225,
0.058076225045372, 0.072936660268714, 0.0602910602910603,
0.063953488372093, 0.0461538461538462, 0.0648330058939096
), Auslaender = c(0.0244399185336049, 0.04375, 0.0416666666666667,
0.0663983903420523, 0.0228571428571429, 0.0509164969450102,
0.0237154150197628, 0.0236686390532544, 0.0217785843920145,
0.0441458733205374, 0.024948024948025, 0.0232558139534884,
0.0230769230769231, 0.0451866404715128), Umweltbelastung = c(0.0468431771894094,
0.0479166666666667, 0.0476190476190476, 0.0402414486921529,
0.0438095238095238, 0.0468431771894094, 0.0454545454545455,
0.0512820512820513, 0.0417422867513612, 0.0518234165067179,
0.0478170478170478, 0.0445736434108527, 0.0442307692307692,
0.0451866404715128), Einr..f..Kinder = c(0.0753564154786151,
0.075, 0.0555555555555556, 0.0724346076458753, 0.0533333333333333,
0.0794297352342159, 0.075098814229249, 0.0788954635108481,
0.0598911070780399, 0.0460652591170825, 0.0977130977130977,
0.0930232558139535, 0.0634615384615385, 0.0451866404715128
), Einr..f..Jugendliche = c(0.122199592668024, 0.0875, 0.0892857142857143,
0.0945674044265594, 0.11047619047619, 0.109979633401222,
0.0869565217391304, 0.120315581854043, 0.105263157894737,
0.0978886756238004, 0.122661122661123, 0.11046511627907,
0.0980769230769231, 0.119842829076621)), .Names = c("X",
"Geschaeft", "Gaststaette", "Bank", "Hausarzt", "Einr..F..Aeltere",
"Park", "Sportstaette", "OEPNV", "Mangel.an.Gruenflaechen", "Kriminalitaet",
"Auslaender", "Umweltbelastung", "Einr..f..Kinder", "Einr..f..Jugendliche"
), row.names = c(NA, -14L), class = "data.frame")
So that it look like this picture (or better with each line in a seperate plot) that I created with Excel.
But I can't figure out how...
Thanks a lot for your help.
Dominik
UPDATE: Here is just a map of what the groups (BUM,DDR,ETB etc.) mean.
This is an extension to #Andrie's solution. It combines the faceting idea with that of overplotting (stolen liberally from the learnr blog, which I find results in a cool visualization. Here is the code and the resulting output. Comments are welcome
mdf <- melt(df, id.vars="X")
mdf = transform(mdf, variable = reorder(variable, value, mean), Y = X)
ggplot(mdf, aes(x = variable, y = value)) +
geom_line(data = transform(mdf, X = NULL), aes(group = Y), colour = "grey80") +
geom_line(aes(group = X)) +
facet_wrap(~X) +
opts(axis.text.x = theme_text(angle=90, hjust=1))
EDIT: If you have groupings of milieus, then a better way to present might be the following
mycols = c(brewer.pal(4, 'Oranges'), brewer.pal(4, 'Greens'),
brewer.pal(3, 'Blues'), brewer.pal(3, 'PuRd'))
mdf2 = read.table(textConnection("
V1, V2
ETB, LEIT
PMA, LEIT
PER, LEIT
LEIT, LEIT
KON, TRADITION
TRA, TRADITION
DDR, TRADITION
TRADITION, TRADITION
BUM, MAIN
MAT, MAIN
MAIN, MAIN
EXP, HEDOS
HED, HEDOS
HEDOS, HEDOS"), sep = ",", header = T, stringsAsFactors = F)
mdf2 = data.frame(mdf2, mycols = mycols)
mdf3 = merge(mdf, mdf2, by.x = 'X', by.y = "V1")
p1 = ggplot(mdf3, aes(x = variable, y = value, group = X, colour = mycols)) +
geom_line(subset = .(nchar(as.character(X)) == 3)) +
geom_line(subset = .(nchar(as.character(X)) != 3), size = 1.5) +
facet_wrap(~ V2) +
scale_color_identity(name = 'Milieus', breaks = mdf2$mycols, labels = mdf2$V1) +
theme_bw() +
opts(axis.text.x = theme_text(angle=90, hjust=1))
The trick is to reshape your data into tall format before you pass it to ggplot. This is easy when using the melt function in package reshape2:
Assuming your data is a variable called df:
library(reshape2)
library(ggplot2)
mdf <- melt(df, id.vars="X")
str(mdf)
ggplot(mdf, aes(x=variable, y=value, colour=X, group=X)) + geom_line() +
opts(axis.text.x = theme_text(angle=90, hjust=1))
Edit As #Chase suggests, you can use facetting to make the plot more readable:
ggplot(mdf, aes(x=X, y=value)) + geom_point() +
opts(axis.text.x = theme_text(angle=90, hjust=1)) + facet_wrap(~variable)
First, melt the data to put it in a long format.
melted_data <- melt(the_data, id.vars = "X")
Now draw the plot with a numeric x axis, and fix up the labels.
p <- ggplot(melted_data, aes(as.numeric(variable), value, colour = X)) +
geom_line() +
scale_x_continuous(
breaks = seq_len(nlevels(melted_data$variable)),
labels = levels(melted_data$variable)
) +
opts(axis.text.x = theme_text(angle = 90))
p
Having answered this, I'm not sure what the plot tells you &ndahs; it's just a jumble of lines to me. You might be better greying out most of the lines, and highlighting one or two interesting ones.
Add a column that picks out, e.g., EXP.
melted_data$is_EXP <- with(melted_data, X == "EXP")
Ignore my previous anser; Andrie's is better. Use manual colour and size scales to highlight your new column.
p <- ggplot(melted_data, aes(variable, value, colour = is_EXP, size = is_EXP, group = X)) +
geom_line() +
scale_colour_manual(values = c("grey80", "black")) +
scale_size_manual(values = c(0.5, 1.5)) +
opts(axis.text.x = theme_text(angle = 90, hjust=1))
p

Resources