I have a small issue about GGVIS in R studio.
I want to plot something and have more information on each point when I move my cursor on it. Thus, I am using GGVIS package and the add_tooltip() function to do it.
However when I run the code below, I obtain the plot but not the additional info when I move my cursor on the points.
Furthermore, I want to use the separate function (tooltip_test) because my real code is a bit more complex and the function would help me a lot.
library(ggvis)
test <- data.frame(ID=1:10, TIME=1:10, COUNTS=rep(1:2,5), EXTRA=c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J"))
tooltip_test <- function(x) {
if (is.null(x)) return(NULL)
if(is.null(x$ID)) return(NULL)
sub_test = test[test$ID == x$ID, ]
paste0("Category: ", sub_test$EXTRA)
}
test %>%
ggvis(x= ~TIME, y= ~COUNTS) %>%
layer_points() %>%
add_tooltip(tooltip_test, "hover")
library(ggvis)
test <- data.frame(ID=1:10, TIME=1:10, COUNTS=rep(1:2,5), EXTRA=c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J"))
tooltip_test <- function(x) {
if (is.null(x)) return(NULL)
paste0('Category: ', test$EXTRA[x$ID])
}
test %>%
ggvis(x= ~TIME, y= ~COUNTS, key := ~ID) %>%
layer_points() %>%
add_tooltip(tooltip_test, "hover")
this should be sufficient for you,u forgot to add ID as key in ggvis implementation
Related
I have a subset data that has a total count for each observation from a bigger dataset. If I want to drop duplicates based on a higher count and drop codes that appear less if the name is the same, how would I go about that? So for instance:
name = c("a", "a", "b", "b", "b", "c", "d", "e", "e", "e")
code = c(1,1,2,3,4,1,1,2,2,3)
n = c(1,10,2,3,5,4,8,100,90,40)
data = data.frame(name,code,n)
The end product would be left with these:
name = c("a", "b", "c", "d", "e")
code = c(1,4,1,1,2)
n = c(10,5,4,8,100)
data2 = data.frame(name,code,n)
If you can use dplyr, this should do the trick:
library(dplyr)
data %>%
group_by(name) %>%
filter(n == max(n)) %>%
ungroup()
I have a long list of variables and for each I want to create a dummy variable. I am using the below dplyr mutate code to do this, but know that something like an array in SAS could be used (so I don't have to copy this line out multiple times). I just haven't been able to find an answer on Stack or anywhere else that fits.
Grade_Dist2 <- Grade_Dist2 %>% mutate(
ACCT2301_FA15_z = ifelse(ACCT2301_FA15 %in% c("A", "B", "C"), 1,
ifelse(ACCT2301_FA15 %in% c("D", "F", "W", "Q"), 0, NA)))
The columns/vars are arranged together--all vars in the table are similar except an ID var.
In the tidyverse you should probably look at something like mutate_all(), but in the meantime I would think something like this base R solution would work:
all_names <- grep("FA[0-9]+",names(Grade2),value=TRUE)
for (id in all_names) {
cur_var <- Grade2[[id]]
Grade2[[paste0(id,"_z")]] <-
ifelse(cur_var %in% c("A", "B", "C"), 1,
ifelse(cur_var %in% c("D", "F", "W", "Q"), 0, NA)))
}
Here's a try at using a tidyverse approach with mutate_all as suggested by #BenBolker.
library(tidyverse)
Grade_Dist2 <- tibble(ACCT2301_FA15_z = c("A", "F", "C", "Z"))
Grade_Dist2 <- Grade_Dist2 %>%
mutate_all(., funs(if_else(. %in% c("A", "B", "C"), 1,
if_else(. %in% c("D", "F", "W", "Q"), 0, NA_real_))))
Grade_Dist2
#> # A tibble: 4 x 1
#> ACCT2301_FA15_z
#> <dbl>
#> 1 1
#> 2 0
#> 3 1
#> 4 NA
If you want to append the dummy variables to the existing data instead of overwriting then
mutate_all(., funs("dummy" = if_else(. %in% c("A", "B", "C"), 1,
if_else(. %in% c("D", "F", "W", "Q"), 0, NA_real_))))
will append variables with names like ACCT2301_FA15_z_dummy (or be called dummy if there is only one variable being mutated).
The data I have contain pair-wise distance between different locations (x,y,z) and (a,b,c,d,e,f,g,h,i,j). See below:
set.seed(123)
x <- rnorm(10, 15,1)
y <- rnorm(10, 7,0.1)
z <- rnorm(10, 3,0.01)
distdat <- data.frame(x,y,z)
rownames(distdat) <- c("a", "b", "c", "d", "e", "f", "g", "h", "i", "j")
I need to create another data that include: 1) the column name, row name of the min, and the minimum three values for each column. So in total, the new data will contain
three column and nine rows. Here is the first rows:
col_name <- c("x", "x", "x")
row_name <- c("h", "g", "a")
min_val <- c(14.21208, 14.88804, 14.98797)
newdat <- data.frame(col_name, row_name, min_val)
Similarly, we need to repeat this for column y and z.
How about this:
set.seed(123)
x <- rnorm(10, 15,1)
y <- rnorm(10, 7,0.1)
z <- rnorm(10, 3,0.01)
distdat <- data.frame(x,y,z)
rownames(distdat) <- c("a", "b", "c", "d", "e", "f", "g", "h", "i", "j")
# find indices of smallest values
idx <- sapply(distdat, order)[1:3, ]
# put everything in a data.frame
data.frame(col_name = rep(colnames(distdat), each = 3),
row_name = row.names(distdat)[c(idx)],
min_val = distdat[cbind(c(idx), rep(1:3, each = 3))]
)
Also, with the given seed I could not replicate your example, let me know if I missed something.
Its not real pretty, but this could work:
set.seed(123)
x <- rnorm(10, 15,1)
y <- rnorm(10, 7,0.1)
z <- rnorm(10, 3,0.01)
distdat <- data.frame(x,y,z)
rownames(distdat) <- c("a", "b", "c", "d", "e", "f", "g", "h", "i", "j")
distdat$row_name <- rownames(distdat)
select(distdat, x, row_name) %>%
arrange(x) %>%
head(3) %>%
mutate(col_name='x') %>%
rename(min_val = x) -> newdat_x
select(distdat, y, row_name) %>%
arrange(y) %>%
head(3) %>%
mutate(col_name='y') %>%
rename(min_val = y) -> newdat_y
select(distdat, z, row_name) %>%
arrange(z) %>%
head(3) %>%
mutate(col_name='z') %>%
rename(min_val = z) -> newdat_z
newdat <- bind_rows(newdat_x, newdat_y, newdat_z)
certainly, we could (should) create a function to create those newdat_ dfs, and then run the function for each variable x,y,z.
You can use the dplyr and tidyr packages to do this. They make transformations much more readable.
newdat <- distdat %>%
mutate(row = rownames(.)) %>%
gather(col, dist, -row) %>%
group_by(col) %>%
arrange(col, dist) %>%
top_n(-3, dist)
I tried to run this simple rscript on both RStudio and R x64 but nothing appeared as a result. No plot, nothing... Could someone help please?
# Load package
library(networkD3)
# Create fake data
src <- c("A", "A", "A", "A",
"B", "B", "C", "C", "D")
target <- c("B", "C", "D", "J",
"E", "F", "G", "H", "I")
networkData <- data.frame(src, target)
# Plot
simpleNetwork(networkData)
Code taken by: http://christophergandrud.github.io/networkD3/
I appreciate all your answers! I found a solution that worked to me.
Using the command:
%>% saveNetwork(file = 'File.html')
save an .html file and open it via browser.
I am having trouble with ggsave() from the ggplot2 library. I wrote a function that I pass arguments to, and that is supposed to produce and then save the results with ggsave().
Here is some example data and code to reproduce the error:
example.df.1 <- data.frame(matrix(1:100, nrow = 20, ncol = 5))
colnames(example.df.1) <- c("var1", "var2", "var3", "var4", "var5")
rownames(example.df.1) <- c("A", "B", "C", "D", "E", "F", "G", "H",
"I", "J", "K", "L", "M", "N", "O", "P",
"Q", "R", "S", "T")
example.df.2 <- data.frame(matrix(ncol = 2, nrow = 24))
example.df.2[,1] <- c("A", "B", "C", "D", "E", "F", "G", "H",
"I", "J", "K", "L", "M", "N", "O", "P",
"Q", "R", "S", "T", "U", "V", "W", "X")
example.df.2[,2] <- rnorm(24, 10, 2)
problematic_func <- function(data1, col, title, var, data2) {
# only include rows without missing values
loc1 <- subset(data1, rowSums(is.na(data1)) == 0)
loc1 <- cbind(loc1, rank(-as.data.frame(loc1[,1]), ties.method = "first"))
# reduce data2 to only those rows that correspond to rows in data1
loc2 <- data2[data2[,1] %in% rownames(loc1),]
# order loc2
loc2.ordered <- loc2[order(loc2[,1]),]
# correlation between loc1 and loc2.ordered
corr <- cor(loc1[,1], loc2.ordered[,2])
# creating the plot
i <- ggplot(loc1, aes_q(x = loc1[,1], y = loc2.ordered))
i <- i + geom_point(colour = col, size = 4)
i <- i + ggtitle(title)
i <- i + xlab(var)
i <- i + ylab("y-axis")
i <- i + coord_cartesian(xlim = c(0, max(loc1[,1])),
ylim = c(0, max(loc2.ordered[,2])*1.2))
i <- i + annotate("text", x = max(loc1[,1])*.5, y = 1,
label = paste("Correlation coef: ", as.character(corr)), size = 3)
# saving the plot - this is where the error occurs according
# to the debugger
ggsave(filename = paste("my_example_plot_", var, ".png", sep = ""),
plot = i, device = png, width = 625, height = 625, limitsize = FALSE)
}
for (i in 1:ncol(example.df.1)) {
sv <- as.data.frame(example.df.1[,i])
rownames(sv) <- rownames(example.df.1)
problematic_func(sv, "orange", colnames(example.df.1[i]),
colnames(example.df.1[i]), data2 = example.df.2)
}
Edit: Sorry, I forgot to add the error I get:
Error in FUN(X[[2L]], ...) : Unknown input:data.frame
I found the mistake myself. The error was not in the ggsave() function, but in the ggplot() function with which I created the plot in the first place. The correct code must be:
i <- ggplot(loc1, aes_q(x = loc1[,1], y = loc2.ordered[,2]))
The difference is that the y aesthetic needs to be passed a column of loc2.ordered, not the entire dataframe.