Need plot labels from separate column in ggplot - r

I am plotting number of people against the number of certain incidents per month, and need to plot each month's label on the side of each point in the plot. The labels are in a separate column (column 'month') and I need to find the synthax that can help me put the abbreviated 3-letter month label besides each associated point in the plot. I have done this in base plot previously but can't get it done in ggplot.
My script:
library(dplyr)
library(ggplot2)
new_labels <- c("1995-\n2001","2002-\n2011","2012-\n2019")
df %>%
mutate(period=factor(period,levels = unique(period),
labels = new_labels,ordered = T)) %>%
ggplot(aes(people,inc)) +
geom_point(cex=3.5) +
scale_y_continuous(breaks=seq(0,12,by=2),limit=c(0,12),expand=c(0,1)) +
scale_x_continuous(breaks=seq(0,75000,by=10000),limit=c(0,75000),expand=c(0,0)) +
theme_bw(base_size=20) +
facet_grid(class~category) +
facet_grid(rows=vars(period)) +
stat_smooth(method="glm", method.args = list(family = "poisson"),col="black") +
theme(strip.background = element_rect(fill="lightgrey", size=1, color="black")) +
theme(strip.text.y = element_text(size=19, color="black",angle=0)) +
labs(x = "Number of people per month", y = "Incidents per month")
My dataframe:
dput(df)
structure(list(period = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("1",
"2", "3"), class = "factor"), month = structure(c(5L, 4L, 8L,
1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L,
6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 7L, 6L, 2L, 12L,
11L, 10L, 3L), .Label = c("APR", "AUG", "DEC", "FEB", "JAN",
"JUL", "JUN", "MAR", "MAY", "NOV", "OCT", "SEP"), class = "factor"),
people = c(4068L, 7251L, 14384L, 20513L, 18748L, 17760L,
23433L, 22878L, 12815L, 8101L, 7477L, 5018L, 6830L, 16278L,
30244L, 45747L, 31807L, 41184L, 54124L, 52565L, 24365L, 12759L,
8307L, 6038L, 16711L, 32187L, 45810L, 53932L, 40082L, 58506L,
71259L, 67564L, 33556L, 22818L, 16508L, 15848L), inc = c(2L,
1L, 3L, 5L, 3L, 0L, 2L, 5L, 1L, 1L, 0L, 0L, 0L, 2L, 1L, 5L,
5L, 2L, 7L, 6L, 1L, 0L, 0L, 2L, 0L, 0L, 2L, 2L, 0L, 1L, 1L,
1L, 0L, 0L, 0L, 0L)), row.names = c(NA, -36L), class = "data.frame")

Related

ggarrange() function overvrites the color of my boxplots

I am making two boxplots and want to arrange them beside each other. I have made each of them look like I want when displaying them separately but when I use ggarrange() the colors disappear. This is my code for the plots:
BOX1_data <- read.table(file = "clipboard",
sep = "\t", header=TRUE)
BOX1_data$Diagnosis <- as.factor(BOX1_data$Diagnosis)
BOX1plot <- ggplot(BOX1_data, aes(x=Diagnosis, y=No.Variants, fill= Diagnosis)) + geom_boxplot() +
scale_fill_brewer(palette = "Dark2") +
scale_x_discrete(labels = c("AC\nN=38", "SqCC\nN=15", "SCLC\nN=8", "BL disease\nN=16"))
BOX2_data <- read.table(file = "clipboard",
sep = "\t", header=TRUE)
BOX2_data$Stage <- as.factor(BOX2_data$Stage)
BOX2plot <- ggplot(BOX2_data, aes(x=Stage, y=No.Variants, fill = Stage)) + geom_boxplot(width = 0.4) +
scale_fill_brewer(palette = "Dark2") +
scale_x_discrete(labels = c("Stage I-III\nN=24", "Stage IV\nN=37"))
To arrange the plots I then write:
BOX_list <- list(BOX1plot, BOX2plot)
ggarrange(plotlist = BOX_list, labels = c('A', 'B'), ncol = 2)
The easiest way of getting rid of gridlines etc I thought was by using theme_set() and I think that this might be my problem.
My code is:
theme_set(theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.background = element_blank(),
axis.line = element_line(colour = "grey")))
I realize that theme_bw() overwrites my colors in the boxes. But I have tried removing it, switching it for theme_transparent() (this removes all my labels) and neither works. I have searched for a way of just adding a transparency to my boxes in the theme so that my colors will shine through. I am also suspicious that maybe the palette that I chose might give me the same colors in the two plots which I also do not want. To add, if it matters, I have 4 groups in the first plot and 2 in the second.
dput(BOX1_data)
structure(list(Diagnosis = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("1", "2", "3", "4"), class = "factor"),
No.Variants = c(3L, 4L, 6L, 14L, 3L, 3L, 4L, 3L, 3L, 3L,
8L, 6L, 22L, 10L, 6L, 9L, 1L, 9L, 3L, 4L, 8L, 2L, 13L, 3L,
11L, 19L, 5L, 5L, 3L, 12L, 4L, 2L, 4L, 18L, 8L, 7L, 7L, 12L,
4L, 1L, 6L, 3L, 2L, 8L, 10L, 3L, 15L, 9L, 13L, 13L, 15L,
10L, 10L, 12L, 6L, 3L, 12L, 9L, 15L, 10L, 18L, 3L, 6L, 3L,
6L, 1L, 3L, 3L, 7L, 1L, 2L, 10L, 7L, 7L, 1L, 0L, 2L)), row.names = c(NA,
-77L), class = "data.frame")
dput(BOX2_data)
structure(list(No.Variants = c(3L, 4L, 6L, 14L, 3L, 3L, 4L, 3L,
3L, 3L, 8L, 6L, 22L, 10L, 6L, 9L, 1L, 9L, 3L, 4L, 8L, 2L, 13L,
3L, 11L, 19L, 5L, 5L, 3L, 12L, 4L, 2L, 4L, 18L, 8L, 7L, 7L, 12L,
4L, 1L, 6L, 3L, 2L, 8L, 10L, 3L, 15L, 9L, 13L, 13L, 15L, 10L,
10L, 12L, 6L, 3L, 12L, 9L, 15L, 10L, 18L), Stage = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1",
"2"), class = "factor")), row.names = c(NA, -61L), class = "data.frame")
Grateful for any tips!
As already pointed out, it seems the OP's issue with theme_set() removing the fill colors set in your two plots was solved by updating to a new version of ggplot2. Herein, I have a solution for the second part of OP's question (that was clarified in the comments). Represented here for convenience:
Now it is just the problem that I want the palette to continue on the second plot's boxes and not restart so that I will get different colors on all boxes.
In order to do this, one has to realize that there are 4 fill colors for the first plot BOX1plot, and 2 fill colors for BOX2plot. For BOX1plot, we want the color palette to begin at the first color, but for BOX2plot, we want the palette to start on the 5th color sequence in the palette. There's no way to do this through the scale_*_brewer() functions, so the approach here will be to access the Brewer palette from RcolorBrewer::brewer.pal(), and then assign where to begin and end in that sequence based on the number of levels of each factor using scale_fill_manual() to just set the color values from the extracted Brewer color palette.
You can just "know" that you need to "use colors 1-4" for BOX1plot and "use color 5 and 6" for BOX2plot; however, it is much more elegant to just calculate this automatically based on the number of levels (in case you want to run this again). The code below does this:
library(ggplot2)
library(ggpubr)
library(RColorBrewer)
# ... read in your data as before
# create factors (as OP did before)
BOX1_data$Diagnosis <- as.factor(BOX1_data$Diagnosis)
BOX2_data$Stage <- as.factor(BOX2_data$Stage)
# make color palette based on Brewer "Dark2" palette
lev_diag <- length(levels(BOX1_data$Diagnosis))
lev_stage <- length(levels(BOX2_data$Stage))
lev_total <- lev_diag + lev_stage
my_colors <- brewer.pal(lev_total, "Dark2")
BOX1plot <- ggplot(BOX1_data, aes(x=Diagnosis, y=No.Variants, fill= Diagnosis)) + geom_boxplot() +
scale_fill_manual(values=my_colors[1:lev_diag]) +
scale_x_discrete(labels = c("AC\nN=38", "SqCC\nN=15", "SCLC\nN=8", "BL disease\nN=16"))
BOX2plot <- ggplot(BOX2_data, aes(x=Stage, y=No.Variants, fill = Stage)) + geom_boxplot(width = 0.4) +
scale_fill_manual(values = my_colors[(lev_diag+1):lev_total]) +
scale_x_discrete(labels = c("Stage I-III\nN=24", "Stage IV\nN=37"))
BOX_list <- list(BOX1plot, BOX2plot)
ggarrange(plotlist = BOX_list, labels = c('A', 'B'), ncol = 2)
If you have issues with ggarrange() I would suggest next approach using patchwork:
library(ggplot2)
library(patchwork)
#Data format
BOX1_data$Diagnosis <- as.factor(BOX1_data$Diagnosis)
#Plot 1
BOX1plot <- ggplot(BOX1_data, aes(x=Diagnosis, y=No.Variants, fill= Diagnosis)) + geom_boxplot() +
scale_fill_brewer(palette = "Dark2") +
scale_x_discrete(labels = c("AC\nN=38", "SqCC\nN=15", "SCLC\nN=8", "BL disease\nN=16"))
#Data format
BOX2_data$Stage <- as.factor(BOX2_data$Stage)
#Plot 2
BOX2plot <- ggplot(BOX2_data, aes(x=Stage, y=No.Variants, fill = Stage)) + geom_boxplot(width = 0.4) +
scale_fill_brewer(palette = "Dark2") +
scale_x_discrete(labels = c("Stage I-III\nN=24", "Stage IV\nN=37"))
#Arrange plots
BOX1plot+BOX2plot+plot_annotation(tag_levels = 'A')
The output:

How can I add percentage-labels to a facetted pie chart created with Ggplot2 in R

I'm quite new with R and I'm trying to create facetted pie charts with percentages.
My data is a dataframe with 31 patients. These either have a rapid progression of the disease or a slow progression. And each patient has a genetic diagnosis.
Here's the data
pie <- structure(list(progression = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("rapid", "slow"), class = "factor"), Gene = structure(c(3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 6L, 5L, 5L, 8L, 8L, 8L, 8L, 8L, 8L, 1L, 2L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L), .Label = c("ALS2", "BSCL2", "C9orf72", "C9orf72/SOD1", "FUS", "FUS/TBK1", "SETX", "SOD1 het", "SOD1 hom"), class = "factor")), class = "data.frame", row.names = c(NA, -31L))
I have now created a facetted pie chart using the following R code:
library(ggplot2)
ggplot(data = pie, aes(x=factor(1), fill=Gene)) +
geom_bar(position="fill", aes(y = stat(count/sum(count)))) +
facet_grid(facets=. ~ progression) +
coord_polar("y") +
scale_y_continuous(labels = scales::percent_format())
I tried to add the percentage of each genetic diagnosis among the rapid and the slow progression patients using geom_text but I have always failed.
It would be great to have your help here!
Many thanks,
Matias

change the order of a common legend, in a superimposed graph

I would like to change the order of my legend, and not to display them in alphabetical order as you can see below. I would like to have
"NONE","LIGHT","MEDIUM","HEAVY","V_COLD","COLD","MEDIUM","HOT".
Is it possible? I tried with several arguments but without success.
Below, my table :
structure(list(SOUNAME = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "BALLYSHANNON (CATHLEENS FALL)", class = "factor"),
year_month = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L), .Label = c("2013-03",
"2013-04", "2013-05", "2013-06", "2013-07", "2013-08", "2013-09",
"2013-10", "2013-12"), class = "factor"), pre_type = structure(c(4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L), .Label = c("HEAVY", "LIGHT", "MEDIUM",
"NONE"), class = "factor"), pre_value = c(13L, 2L, 11L, 5L,
9L, 3L, 10L, 7L, 2L, 6L, 13L, 10L, 10L, 1L, 15L, 4L, 16L,
2L, 7L, 5L, 2L, 2L, 17L, 9L, 7L, 3L, 13L, 6L, 5L, 2L, 10L,
14L, 1L, 5L, 19L, 6L), tem_type = structure(c(4L, 3L, 2L,
1L, 4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L, 4L, 3L,
2L, 1L, 4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L, 4L,
3L, 2L, 1L), .Label = c("COLD", "HOT", "MEDIUM", "V_COLD"
), class = "factor"), tem_value = c(0L, 7L, 0L, 23L, 0L,
29L, 0L, 1L, 0L, 29L, 2L, 0L, 0L, 21L, 9L, 0L, 0L, 5L, 25L,
0L, 0L, 18L, 13L, 0L, 0L, 21L, 9L, 0L, 0L, 26L, 5L, 0L, 0L,
24L, 0L, 7L), cnt_vehicle = c(NA, 2754406, NA, NA, NA, 2846039,
NA, NA, NA, 3149377, NA, NA, NA, 3058810, NA, NA, NA, 3362614,
NA, NA, NA, 3415716, NA, NA, NA, 3020812, NA, NA, NA, 3076665,
NA, NA, NA, 2775306, NA, NA), x = c(1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L
)), .Names = c("SOUNAME", "year_month", "pre_type", "pre_value",
"tem_type", "tem_value", "cnt_vehicle", "x"), row.names = c(NA,
-36L), class = "data.frame")
Below my graph:
ggplot(data = b_complet_2013, aes(x = x, y = pre_value*100000, fill = pre_type), stat = "identity") +
scale_x_continuous(breaks=(1:9)+0.2, labels=unique(b_complet_2013$year_month)) +
geom_bar(stat = "identity", width=0.3) +
xlab("date") + ylab ("Number of days of précipitations(left) and temperatures (ritght)") +
ggtitle("Precipitation per month") +
geom_bar(data=b_complet_2013,aes(x=x+0.4, y=tem_value*100000, fill=tem_type), width=0.3, stat = "identity") +
xlab("date") + ylab("Number of days of precipitations(left) and temperatures (ritght)") +
ggtitle("Impact of weather on road traffics") + theme( axis.title.y = element_text(color = "blue", face = "bold")) +
theme(axis.text.y = element_text(color = "blue", face = "bold", size=9)) + theme( axis.title.y.right = element_text(color = "black", face = "bold")) +
theme(axis.text.y.right = element_text(color = "black", size = 9, face = "bold")) +
geom_line(mapping = aes(x= x+0.2, y = as.numeric(cnt_vehicle)), colour = I("blue"), size = 0.8) +
geom_point(aes(x= x+0.2, y = as.numeric(cnt_vehicle), colour = I("blue")), show.legend=FALSE, stat = "identity") +
scale_y_continuous(sec.axis = sec_axis(~./100000,name="Number of days of precipitations(left) and temperatures (ritght)")) +
theme( plot.title = element_text(size = 17)) + theme(axis.title.x = element_text(size = 12)) + theme(axis.title.y = element_text(size = 12)) +
labs(y = "Number of vehicles", color ="black") +
theme(panel.background = element_rect(linetype = "dashed", fill="white"), plot.background = element_rect(linetype = "dashed",fill="grey90" ))

Faceting bars in ggplot2

I have this problem: I want to build a stacked bar plot with the faceting capabilities, so I can compare the distribution of frequencies for five common categories, within two different objects, separated according to three groups. I have six objects, five categories and three groups. The problem is that each group has only two different and exclusive objects to plot, but so far I can only produce a plot in which the six objects are plotted across the three groups. This is not optimal, since for each group I have four objects with no data.
Is it possible to plot just two objects for each group with the faceting capabilities?
EDITED
This is my data:
structure(list(Face = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L), .Label = c("LGH002", "LGH003", "LGM009",
"SCM018", "VAH022", "VAM028"), class = "factor"), Race = structure(c(1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L),
.Label = c("1. Amerindian", "2. White", "3. Mestizo", "4. Other races",
"5. Cannot tell"), class = "factor"), Count = c(19L, 0L, 13L, 8L, 0L, 2L,
7L, 23L, 6L, 2L, 1L, 1L, 29L, 6L, 3L, 29L, 0L, 11L, 0L, 0L, 0L, 38L, 1L, 0L,
1L, 0L, 30L, 9L, 0L, 1L), Density = c(0.475, 0, 0.325, 0.2, 0,
0.05, 0.175, 0.575, 0.15, 0.05, 0.025, 0.025, 0.725, 0.15,
0.075, 0.725, 0, 0.275, 0, 0, 0, 0.95, 0.025, 0, 0.025, 0,
0.75, 0.225, 0, 0.025), School = structure(c(1L, 1L, 1L,
1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Municipal",
"Private Fee-Paying", "Private-Voucher"), class = "factor")),
.Names =c("Face", "Race", "Count", "Density", "School"),
class = "data.frame", row.names = c(NA, -30L))
This is the code I'm using to build the plot:
P <- ggplot(data = races.df, aes(x = Face, y = Density, fill = Race)) +
geom_bar(stat="identity") +
scale_y_continuous(labels=percent)
P + facet_grid(School ~ ., scales="free") + coord_flip()
As you can imagine, I only want to see the x-values "SCM018" and "LGH002" in "Municipal"; "LGM009" and "LGH003" in "Private-Voucher"; and "VAH022" and "VAM028" in "Private Fee-Paying" (only two objects per group). Is it possible? Any help?
All the best,
Mauricio.

Reshape a large matrix with missing values and multiple vars of interest [duplicate]

This question already has answers here:
Convert data from long format to wide format with multiple measure columns
(6 answers)
Closed 4 years ago.
I need to reorganize a large dataset into a specific format for further analysis. Right now the data are in long format, with multiple records through time for each point. I need to reshape the data so that each point has a single record, but it will add many new columns of the time-specific data. I’ve looked at previous similar posts but I need to ultimately convert several of the current variables into columns, and I can’t find an example of such. Is there a way to accomplish this in a single reshape, or will I have to do several and then concatenate the new columns back together? Another wrinkle before I post the example is that not all points were sampled at each time-step, so I need those values to show up as NA. For example, (see data below) SitePoint A1 was not sampled at all in 2012, SitePoint A10 was not sampled during the first round in 2012, but K83 was sampled all nine times.
mydatain <- structure(list(SitePoint = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 6L, 6L), .Label = c("A1", "A10", "K145", "K83", "T15",
"T213"), class = "factor"), Year_Rotation = structure(c(1L, 2L,
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 1L, 2L, 4L, 5L,
6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 1L, 7L), .Label = c("2010_1", "2010_2",
"2010_3", "2011_1", "2011_2", "2011_3", "2012_1", "2012_2", "2012_3"
), class = "factor"), MR_Fire = structure(c(5L, 6L, 6L, 2L, 9L,
9L, 5L, 6L, 6L, 2L, 9L, 9L, 7L, 8L, 16L, 17L, 21L, 22L, 23L,
25L, 3L, 4L, 10L, 11L, 12L, 13L, 14L, 15L, 18L, 19L, 20L, 1L,
2L, 2L, 5L, 6L, 6L, 11L, 11L, 12L, 7L, 24L), .Label = c("0",
"1", "10", "11", "12", "13", "14", "15", "2", "23", "24", "25",
"35", "36", "37", "39", "40", "47", "48", "49", "51", "52", "53",
"8", "9"), class = "factor"), fire_seas = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 3L), .Label = c("dry", "fire", "wet"
), class = "factor"), OptTSF = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L,
0L, 1L, 1L)), .Names = c("SitePoint", "Year_Rotation", "MR_Fire",
"fire_seas", "OptTSF"), row.names = c(31L, 32L, 33L, 34L, 35L,
36L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 10543L, 10544L,
10545L, 10546L, 10547L, 10548L, 10549L, 10550L, 14988L, 14989L,
14990L, 14991L, 14992L, 14993L, 14994L, 14995L, 14996L, 17370L,
17371L, 17372L, 17373L, 17374L, 17375L, 17376L, 17377L, 17378L,
19353L, 19354L), class = "data.frame")
Ultimately I need something like this:
myfinal <- structure(list(SitePoint = structure(1:6, .Label = c("A1", "A10",
"K145", "K83", "T15", "T213"), class = "factor"), MR_Fire_2010_1 = c(12L,
12L, 39L, 23L, 0L, 14L), MR_Fire_2010_2 = c(13L, 13L, 40L, 24L,
1L, NA), MR_Fire_2010_3 = c(13L, 13L, NA, 25L, 1L, NA), MR_Fire_2011_1 = c(1L,
1L, 51L, 35L, 12L, NA), MR_Fire_2011_2 = c(2L, 2L, 52L, 36L,
13L, NA), MR_Fire_2011_3 = c(2L, 2L, 53L, 37L, 13L, NA), MR_Fire_2012_1 = c(NA,
NA, 9L, 47L, 24L, 8L), MR_Fire_2012_2 = c(NA, 14L, 10L, 48L,
24L, NA), MR_Fire_2012_3 = c(NA, 15L, 11L, 49L, 25L, NA), season_2010_1 = structure(c(2L,
2L, 1L, 2L, 2L, 1L), .Label = c("dry", "fire"), class = "factor"),
season_2010_2 = structure(c(2L, 2L, 1L, 2L, 2L, NA), .Label = c("dry",
"fire"), class = "factor"), season_2010_3 = structure(c(1L,
1L, NA, 1L, 1L, NA), .Label = "fire", class = "factor"),
season_2011_1 = structure(c(2L, 2L, 1L, 2L, 2L, NA), .Label = c("dry",
"fire"), class = "factor"), season_2011_2 = structure(c(2L,
2L, 1L, 2L, 2L, NA), .Label = c("dry", "fire"), class = "factor"),
season_2011_3 = structure(c(2L, 2L, 1L, 2L, 2L, NA), .Label = c("dry",
"fire"), class = "factor"), season_2012_1 = structure(c(NA,
NA, 2L, 1L, 1L, 2L), .Label = c("fire", "wet"), class = "factor"),
season_2012_2 = structure(c(NA, 1L, 2L, 1L, 1L, NA), .Label = c("fire",
"wet"), class = "factor"), season_2012_3 = structure(c(NA,
1L, 2L, 1L, 1L, NA), .Label = c("fire", "wet"), class = "factor"),
OptTSF_2010_1 = c(1L, 1L, 0L, 1L, 1L, 1L), OptTSF_2010_2 = c(1L,
1L, 0L, 1L, 1L, NA), OptTSF_2010_3 = c(1L, 1L, NA, 1L, 1L,
NA), OptTSF_2011_1 = c(1L, 1L, 0L, 0L, 1L, NA), OptTSF_2011_2 = c(1L,
1L, 0L, 0L, 1L, NA), OptTSF_2011_3 = c(1L, 1L, 0L, 0L, 1L,
NA), OptTSF_2012_1 = c(NA, NA, 1L, 0L, 0L, 1L), OptTSF_2012_2 = c(NA,
1L, 1L, 0L, 0L, NA), OptTSF_2012_3 = c(NA, 1L, 1L, 0L, 0L,
NA)), .Names = c("SitePoint", "MR_Fire_2010_1", "MR_Fire_2010_2",
"MR_Fire_2010_3", "MR_Fire_2011_1", "MR_Fire_2011_2", "MR_Fire_2011_3",
"MR_Fire_2012_1", "MR_Fire_2012_2", "MR_Fire_2012_3", "season_2010_1",
"season_2010_2", "season_2010_3", "season_2011_1", "season_2011_2",
"season_2011_3", "season_2012_1", "season_2012_2", "season_2012_3",
"OptTSF_2010_1", "OptTSF_2010_2", "OptTSF_2010_3", "OptTSF_2011_1",
"OptTSF_2011_2", "OptTSF_2011_3", "OptTSF_2012_1", "OptTSF_2012_2",
"OptTSF_2012_3"), class = "data.frame", row.names = c(NA, -6L
))
The actual dataset is about 23656 records X 15 variables, so doing it by hand is likely to cause major headaches and potential for mistakes. Any help or suggestions are appreciated. If this has been answered elsewhere, apologies. I couldn’t find anything directly applicable; everything seemed to related to three columns and only one of those being extracted as new variables. Thanks.
SP
dcast from the devel version of data.table i.e., v1.9.5 can cast multiple columns simultaneously. It can be installed from here.
library(data.table) ## v1.9.5+
dcast(setDT(mydatain), SitePoint~Year_Rotation,
value.var=c('MR_Fire', 'fire_seas', 'OptTSF'))
You can use reshape to change the structure of your dataframe from long to wide using the following code:
reshape(mydatain,timevar="Year_Rotation",idvar="SitePoint",direction="wide")

Resources