I'm trying to create a publication-ready table using the ggtexttable function from ggpubr. I have a data frame:
dput(df)
structure(list(feature = list("start_codon", "stop_codon", "intergenic",
"3UTR", "5UTR", "exon", "intron", "ncRNA", "pseudogene"),
observed = list(structure(1L, .Names = "start_codon"), structure(1L, .Names = "stop_codon"),
structure(418L, .Names = "intergenic"), structure(48L, .Names = "3UTR"),
structure(28L, .Names = "5UTR"), structure(223L, .Names = "exon"),
structure(578L, .Names = "intron"), structure(20L, .Names = "ncRNA"),
structure(1L, .Names = "pseudogene")), expected = list(
0.286, 0.286, 369.02, 72.461, 33.165, 257.869, 631.189,
48.491, 3.172), fc = list(3.5, 3.5, 1.1, 0.7, 0.8, 0.9,
0.9, 0.4, 0.3), test = list("enrichment", "enrichment",
"enrichment", "depletion", "depletion", "depletion",
"depletion", "depletion", "depletion"), sig = list("F",
"F", "T", "T", "F", "T", "T", "T", "F"), p_val = list(
"0.249", "0.249", "0.00186", "0.00116", "0.209", "0.00814",
"0.00237", "<1e-04", "0.175")), class = "data.frame", row.names = c(NA,
-9L), .Names = c("feature", "observed", "expected", "fc", "test",
"sig", "p_val"))
And when I try to turn this into a table:
ggtexttable(df)
I get the error:
Error in (function (label, parse = FALSE, col = "black", fontsize =
12, : unused arguments (label.feature = dots[[5]][1],
label.observed = dots[[6]][1], label.expected = dots[[7]][1],
label.fc = dots[[8]][1], label.test = dots[[9]][1], label.sig_val
= dots[[10]][1], label.p_val = dots[[11]][1])
Does anyone know what might be causing this?
This works fine:
df <- head(iris)
ggtexttable(df)
I have found the problem and solution which is going to work for you. First of all your data is not in proper format (nested list) thats why you were getting this error trying to display it. You can check what is the format of the dataset easily by pasting in your console: str(data)
Here is the solution to convert your data to data.frame:
first.step <- lapply(data, unlist)
second.step <- as.data.frame(first.step, stringsAsFactors = F)
Then you can easily use the function ggtexttable(second.step) and it displays the table with your data.
Related
Suppose I have three tables as below:
table1 <- structure(list(Pos = 1:6, A = c(16.8508287292818, 0, 0.552486187845304,
0, 1.10497237569061, 1.38121546961326), C = c(1.93370165745856,
0.276243093922652, 0.828729281767956, 0.276243093922652, 0, 0.552486187845304
), G = c(1.10497237569061, 2.48618784530387, 0.276243093922652,
0.828729281767956, 0.276243093922652, 0), T = c(0.828729281767956,
0, 0.828729281767956, 1.10497237569061, 0, 0)), .Names = c("Pos",
"A", "C", "G", "T"), row.names = c(NA, 6L), class = "data.frame")
table2<- structure(list(Pos = 1:6, A = c(4.15584415584416, 1.03896103896104,
0.779220779220779, 0.692640692640693, 2.25108225108225, 2.94372294372294
), C = c(1.12554112554113, 0.173160173160173, 0.173160173160173,
0.519480519480519, 0.173160173160173, 0.173160173160173), G = c(1.03896103896104,
0.346320346320346, 0.0865800865800866, 0.432900432900433, 0.519480519480519,
0.0865800865800866), T = c(2.77056277056277, 0.606060606060606,
0.25974025974026, 0.692640692640693, 0.346320346320346, 0.25974025974026
)), .Names = c("Pos", "A", "C", "G", "T"), row.names = c(NA,
6L), class = "data.frame")
table3 <- structure(list(Pos = 1:6, A = c(10.3492063492063, 0.317460317460317,
0.349206349206349, 0.920634920634921, 1.96825396825397, 1.23809523809524
), C = c(0.825396825396825, 0.126984126984127, 0.349206349206349,
0.317460317460317, 0.19047619047619, 0.253968253968254), G = c(0.761904761904762,
0.952380952380952, 0.285714285714286, 0.412698412698413, 0.126984126984127,
0.19047619047619), T = c(1.07936507936508, 0.412698412698413,
0.476190476190476, 0.253968253968254, 0.19047619047619, 0.253968253968254
)), .Names = c("Pos", "A", "C", "G", "T"), row.names = c(NA,
6L), class = "data.frame")
I have now saved the table names as files.table:
files.table <- paste0("table", seq(1:3))
My problem is that I could not run this bind_rows function to bind table1, table2 and table3 using files.table instead of listing all three tables. This is the error I get: Error in bind_rows_(x, .id) : Argument 1 must have names
This is the code I tried:
bind.table <- bind_rows(files.table, .id = "table") %>%
gather(Base, Percent, -Pos, -table)
The .id argument for bind_rows sets the name of the variable containing the name of the list item each row came from, not these names themselves. You set the table names by naming the items in the list. Then, bind_rows will get those names and put them into a column with a name you specify:
table_list <- list(table1, table2, table3)
names(table_list) <- paste0("table", seq(1:3))
bind.table <- bind_rows(table_list, .id = 'id')
From ?bind_rows:
Each argument can either be a data frame, a list that could be a data
frame, or a list of data frames
The easiest way to get the data frames into bind_rows is to assemble them into a list and then just pass the list of data frames in. As #joran suggests, the easiest way to do this is to load or generate them in a lapply function which will automatically output a list that can go into bind_rows.
My data is structured as follows:
dput(head(CharacterAnalysis,5))
structure(list(Character = c("A", "a", "B", "b", "C"),
Descriptor = c("Jog", "Change Direction", "Shuffle", "Walk", "Stop"),
.Names = c("Character", "Descriptor"),
row.names = c(NA, 5L), class = "data.frame")
I wish to lookup the Character and relevant Descriptor in the following data frame, but am unsure how to do so:
dput(head(StringAnalysis,3))
structure(list(MovementString = c("ACb", "aAaB", "BbCa"),
.Names = c("MovementString"),
row.names = c(NA, 3L), class = "data.frame")
My expected outcome/ data frame would be:
dput(head(Output,3))
structure(list(MovementString = c("ACb", "aAaB", "BbCa"),
MovementPerformed = c("Jog/ Stop/ Walk", "Change Direction/ Jog/ Change Direction/ Shuffle", "Shuffle/ Walk/ Stop/ Change Direction")
.Names = c("MovementString", "MovementPerformed"),
row.names = c(NA, 3L), class = "data.frame")
I would like a forward stroke (/) or similar to separate each Descriptor as it signals a new movement. Any advice on how to please complete this? My data frame CharacterAnalysis is over 1 million rows long, so I do not wish to have to search for each MovementString separately!
Thank you.
CharacterAnalysis <-
structure(list(Character = c("A", "a", "B", "b", "C"),
Descriptor = c("Jog", "Change Direction", "Shuffle", "Walk", "Stop")),
.Names = c("Character", "Descriptor"),
row.names = c(NA, 5L), class = "data.frame")
Output <-
structure(list(MovementString = c("ACb", "aAaB", "BbCa"),
MovementPerformed = c("Jog/ Stop/ Walk", "Change Direction/ Jog/ Change Direction/ Shuffle", "Shuffle/ Walk/ Stop/ Change Direction")),
.Names = c("MovementString", "MovementPerformed"),
row.names = c(NA, 3L), class = "data.frame")
# A simple approach based on names
# Build the lookup table just once
m <- CharacterAnalysis$Descriptor
names(m) <- CharacterAnalysis$Character
# Build the MovementPerformed column
Output$MovementPerformed <-
sapply(strsplit(Output$MovementString,""),
FUN = function(x) paste(m[x], collapse = "/ "))
I have this code
ggplot() +
stat_density(kernel = "biweight",aes(x=fd, colour=id), data=foo1,position="identity",geom="line")+
coord_cartesian(xlim = c(0, 200))+
xlab("Flood Duration")+
ylab("Density")+
ggtitle("PDFs of Flood Duration")+
ggsave("pdf_fd_conus.png")
And I wrote this function
pdf.plot<-function(data,x,xl,yl,title,save){
ggplot() +
stat_density(data, kernel = "biweight",aes_string(x=x, colour='id'),
position="identity",geom="line")+
coord_cartesian(xlim = c(0, 200))+
xlab(xl)+
ylab(yl)+
ggtitle(title)+
ggsave(save)
}
Calling using this:
pdf.plot(data=foo1,x='fd',xl='b',
yl='a',title='a',save='y.png')
But I am getting this error:
Error: ggplot2 doesn't know how to deal with data of class uneval
Called from: eval(expr, envir, enclos)
This is dput(head(foo1,4))
structure(list(id = structure(c(1L, 1L, 1L, 1L), .Label = c("dfa",
"dfb", "cfa", "csb", "bsk"), class = "factor"), lon = c(-70.978611,
-70.978611, -70.945278, -70.945278), lat = c(42.220833, 42.220833,
42.190278, 42.190278), peakq = c(14.7531, 17.3865, 3.3414, 2.7751
), area = c(74.3327, 74.3327, 11.6549, 11.6549), fd = c(29, 54.75,
23, 1), tp = c(14.25, 19.75, 13.5, 0.5), rt = c(14.75, 35, 9.5,
0.5), bl = c(15485.3, 15485.3, 8242.64, 8242.64), el = c(0.643551,
0.643551, 0.474219, 0.474219), k = c(0.325279, 0.325279, 0.176624,
0.176624), r = c(81.947, 81.947, 38.7003, 38.7003), si = c(0.0037157,
0.0037157, -9999, -9999), rr = c(0.00529193, 0.00529193, 0.00469513,
0.00469513)), .Names = c("id", "lon", "lat", "peakq", "area",
"fd", "tp", "rt", "bl", "el", "k", "r", "si", "rr"), row.names = c(NA,
4L), class = "data.frame")
Could you please help?
Your problem is that you didn't specify what argument data is in stat_density. If you look at ?stat_density you'll see the first implied argument is actually mapping=. You need to change pdf.plot to:
pdf.plot<-function(data,x,xl,yl,title,save){
ggplot() +
stat_density(data = data, kernel = "biweight",aes_string(x=x, colour='id'),
position="identity",geom="line")+
coord_cartesian(xlim = c(0, 200))+
xlab(xl)+
ylab(yl)+
ggtitle(title)+
ggsave(save)
}
Hi I have this piece of code that I use again and again
ggplot(foo1, aes(x=log(area), y=log(fd), colour = id)) +
geom_point()+
scale_color_manual(name = "Regions",values=cols)+
xlab('John')+
ylab('Peter')+
ggtitle("xyz")+
ggsave("x.png")
And I wrote this:
my.function<-function(arg1,arg2,arg3){
ggplot(arg1, aes_string(x=arg2, y=arg3, colour = id)) +
geom_point()+
scale_color_manual(name = "Regions",values=cols)+
xlab('John')+
ylab('Peter')+
ggtitle("xyz")+
ggsave("x.png")
}
I am calling this way
my.function(arg1='foo1',arg2='log(area)',arg3='log(fd)')
But it doesn't work. I have never written functions before. I want to save the figure in every function call. Could you please help?
dput(head(foo1,4))
structure(list(id = structure(c(1L, 1L, 1L, 1L), .Label = c("dfa",
"dfb", "cfa", "csb", "bsk"), class = "factor"), lon = c(-70.978611,
-70.978611, -70.945278, -70.945278), lat = c(42.220833, 42.220833,
42.190278, 42.190278), peakq = c(14.7531, 17.3865, 3.3414, 2.7751
), area = c(74.3327, 74.3327, 11.6549, 11.6549), fd = c(29, 54.75,
23, 1), tp = c(14.25, 19.75, 13.5, 0.5), rt = c(14.75, 35, 9.5,
0.5), bl = c(15485.3, 15485.3, 8242.64, 8242.64), el = c(0.643551,
0.643551, 0.474219, 0.474219), k = c(0.325279, 0.325279, 0.176624,
0.176624), r = c(81.947, 81.947, 38.7003, 38.7003), si = c(0.0037157,
0.0037157, -9999, -9999), rr = c(0.00529193, 0.00529193, 0.00469513,
0.00469513)), .Names = c("id", "lon", "lat", "peakq", "area",
"fd", "tp", "rt", "bl", "el", "k", "r", "si", "rr"), row.names = c(NA,
4L), class = "data.frame")
How about
my.function<-function(arg1,arg2,arg3){
ggplot(arg1, aes_string(x=arg2, y=arg3, colour ="id")) +
geom_point()+
scale_color_manual(name = "Regions",values=cols)+
xlab('John')+
ylab('Peter')+
ggtitle("xyz")+
ggsave("x.png")
}
and you call it with
my.function(arg1=foo1,arg2='log(area)',arg3='log(fd)')
Note that now you're passing the data.frame itself, not the name of the data.frame as a string. And since you're passing the column names as strings, you need to pass everything in aes_string as strings.
If you really want to pass the data.frame names as a string, you can change the first ggplot() call to
ggplot(get(arg1), aes_string(x=arg2, y=arg3, colour ="id")) +
I have over 800 dbf files which I need to import and merge in R. I have been able to bring in all of the files using this code:
library(foreign)
setwd("c:/temp/help/")
files <- list.files(pattern="\\.dbf$")
all.the.data <- lapply(files, read.dbf, as.is=FALSE)
DATA <- do.call("rbind",all.the.data)
However, these dbf files have different numbers of columns and even if they sometimes have the same number of columns, those headers may be different. Here are four of the dbf files to provide an example:
file01 <- structure(list(PLOTBUFFER = structure(1L, .Label = "1002_2km", class = "factor"),
VALUE_11 = 11443500, VALUE_31 = 13500, VALUE_42 = 928800,
VALUE_43 = 162000, VALUE_90 = 18900), .Names = c("PLOTBUFFER",
"VALUE_11", "VALUE_31", "VALUE_42", "VALUE_43", "VALUE_90"), row.names = c(NA,
-1L), class = "data.frame", data_types = c("C", "F", "F", "F",
"F", "F"))
file02 <- structure(list(PLOTBUFFER = structure(1L, .Label = "1002_5km", class = "factor"),
VALUE_11 = 66254400, VALUE_21 = 125100, VALUE_31 = 80100,
VALUE_41 = 4234500, VALUE_42 = 3199500, VALUE_43 = 4194000,
VALUE_52 = 376200, VALUE_90 = 72000), .Names = c("PLOTBUFFER",
"VALUE_11", "VALUE_21", "VALUE_31", "VALUE_41", "VALUE_42", "VALUE_43",
"VALUE_52", "VALUE_90"), row.names = c(NA, -1L), class = "data.frame", data_types = c("C",
"F", "F", "F", "F", "F", "F", "F", "F"))
file03 <- structure(list(PLOTBUFFER = structure(1L, .Label = "1003_2km", class = "factor"),
VALUE_11 = 1972800, VALUE_31 = 125100, VALUE_41 = 5316300,
VALUE_42 = 990900, VALUE_43 = 1995300, VALUE_52 = 740700,
VALUE_90 = 1396800, VALUE_95 = 25200), .Names = c("PLOTBUFFER",
"VALUE_11", "VALUE_31", "VALUE_41", "VALUE_42", "VALUE_43", "VALUE_52",
"VALUE_90", "VALUE_95"), row.names = c(NA, -1L), class = "data.frame", data_types = c("C",
"F", "F", "F", "F", "F", "F", "F", "F"))
file04 <- structure(list(PLOTBUFFER = structure(1L, .Label = "1003_5km", class = "factor"),
VALUE_11 = 43950600, VALUE_31 = 270000, VALUE_41 = 12969900,
VALUE_42 = 5105700, VALUE_43 = 12614400, VALUE_52 = 1491300,
VALUE_90 = 2055600, VALUE_95 = 70200), .Names = c("PLOTBUFFER",
"VALUE_11", "VALUE_31", "VALUE_41", "VALUE_42", "VALUE_43", "VALUE_52",
"VALUE_90", "VALUE_95"), row.names = c(NA, -1L), class = "data.frame", data_types = c("C",
"F", "F", "F", "F", "F", "F", "F", "F"))
I would like the dataframe to match this:
merged <- structure(list(PLOTBUFFER = structure(1:2, .Label = c("1002_2km",
"1002_5km"), class = "factor"), VALUE_11 = c(11443500, 66254400
), VALUE_21 = c(0, 125100), VALUE_31 = c(13500, 80100), VALUE_41 = c(0,
4234500), VALUE_42 = c(928800, 3199500), VALUE_43 = c(162000,
4194000), VALUE_52 = c(0, 376200), VALUE_90 = c(18900, 72000)), .Names = c("PLOTBUFFER",
"VALUE_11", "VALUE_21", "VALUE_31", "VALUE_41", "VALUE_42", "VALUE_43",
"VALUE_52", "VALUE_90"), class = "data.frame", row.names = c(NA,
-2L))
Where if there is a missing column from one dataset it simply is filled in with a zero or NULL.
Thanks
-al
The suggestion by #infominer worked for the 4 files I included as an example but when I tried to use merge_recurse on the large list of 802 elements, I received an error.
files <- list.files(pattern="\\.dbf$")
all.the.data <- lapply(files, read.dbf, as.is=FALSE)
merged <- merge_recurse(all.the.data)
Error: evaluation nested too deeply: infinite recursion / options(expressions=)?
Error during wrapup: evaluation nested too deeply: infinite recursion / options(expressions=)?
Use the package reshape
library(reshape)
merged.files <-merge_recurse(list(file01,file02,file03,file04))
Edit:
Try this code thanks to Ramnath
Reduce(function(...) merge(..., all=T),all.the.data)
adapted from https://stackoverflow.com/a/6947326/2747709