Color parts of x-axis characters [duplicate] - r

This question already has an answer here:
Change the color of the axis labels
(1 answer)
Closed 5 years ago.
Edit: This question addresses how to color only subsets of the x-axis labels. This is not a duplicate question.
I have made the x-axis labels to represent a nucleotide sequence, and I would like to add color to different sections of the nucleotides. How?
Thanks.
ggplot(data = miRNA3) +
geom_line(mapping = aes(x = Position, y = Count_combined)) +
scale_y_continuous(breaks = seq(0, 120, 10)) +
ylab("Count") +
scale_x_continuous(breaks=1:150, labels=c("T", "G", "A", "T", "G", "T", "C", "C", "G", "T", "G", "T", "C", "C", "A", "C", "T", "C", "G", "T", "T", "G", "T", "T", "T", "T", "C", "A", "A", "C", "T", "T", "C", "T", "T", "C", "C", "C", "G", "C", "A", "A", "T", "T", "T", "A", "C", "C", "T", "T", "C", "A", "T", "G", "G", "T", "T", "A", "A", "A", "C", "A", "A", "T", "A", "A", "A", "T", "C", "A", "G", "C", "T", "A", "A", "G", "G", "T", "A", "T", "G", "G", "A", "C", "A", "C", "T", "G", "T", "A", "A", "C", "T", "A", "C", "T", "C", "T", "G", "A", "A", "G", "G", "T", "A", "A", "G", "T", "T", "G", "C", "G", "A", "G", "A", "G", "G", "A", "A", "G", "T", "T", "T", "C", "A", "A", "G", "T", "A", "G", "C", "A", "T", "T", "G", "G", "A", "T", "T", "C", "G", "G", "A", "C", "G", "T", "T", "A", "T", "G"), expand = c(0, 0)) +
xlab("Supercontig_1.4289:xxx-xxx") +
theme(panel.grid.minor.x=element_blank(),
panel.grid.major.x=element_blank(),
panel.grid.minor.y=element_blank())
Edit: I would like to make something like this (see the letters on the x-axis):

df = data.frame(x = 1:4, y = 1:4)
my_labs = c("G", "A", "A", "T")
my_cols = c("red", "blue", "blue", "chartreuse")
ggplot(df, aes(x, y)) + geom_point() +
scale_x_continuous(breaks = 1:4, labels = my_labs) +
theme(axis.text.x = element_text(color = my_cols))
I had no idea this was possible until I saw #UnivStudent's comment. Pretty cool!

Related

Sort qualitative variable with groups and keeping the indexes

I have a variable composed by 6 different letters, I need to sort this obtaining 6 different indexes, so that I will be able to sort a dataset according to this qualitative variable.
here's the variable:
data = c("H", "H", "A", "A", "B", "R", "E", "B", "E", "B", "A", "E",
"R", "R", "I", "B", "I", "I", "H", "A", "E", "I", "B", "I", "H",
"B", "R", "E", "B", "R", "H", "R", "I", "A", "B", "E", "A", "E",
"I", "H", "A", "E", "I", "H", "R", "H", "A", "R")
if I sort this I'm obtaining only the alphabetic order:
data_idx = sort(data, index.return = TRUE)
How can I obtain these indexes and reorder this variable?
We can extract with either $ or [[ as it is a list output when we use index.return = TRUE
sort(data, index.return = TRUE)$ix
Another option is order
order(data)
If we need to obtain index
match(data, unique(data))
Or may be
split(seq_along(data), data)
Or with ave
ave(seq_along(data), data, FUN = seq_along)

Efficient subsetting of a data.frame based on another jagged data.frame

I'm working on a project where I need to repeatedly subset a data.frame based on different combinations of attributes. Right now I'm subsetting the data.frame using the merge function as I don't know what the attributes input will be at run time, and this works. However, I'm wondering if there is a faster way to create the subsets.
require(data.table)
df <- structure(list(att1 = c("e", "a", "c", "a", "d", "e", "a", "d", "b", "a", "c", "a", "b", "e", "e", "c", "d", "d", "a", "e", "b"),
att2 = c("b", "d", "c", "a", "e", "c", "e", "d", "e", "b", "e", "e", "c", "e", "a", "a", "e", "c", "b", "b", "d"),
att3 = c("c", "b", "e", "b", "d", "d", "d", "c", "c", "d", "e", "a", "d", "c", "e", "a", "d", "e", "d", "a", "e"),
att4 = c("c", "a", "b", "a", "e", "c", "a", "a", "b", "a", "a", "e", "c", "d", "b", "e", "b", "d", "d", "b", "e")),
.Names = c("att1", "att2", "att3", "att4"), class = "data.frame", row.names = c(NA, -21L))
#create combinations of attributes
#attributes to search through
cnames <- colnames(df)
att_combos <- data.table()
for(i in 2:length(cnames)){
combos <- combn(cnames, i)
for(x in 1:ncol(combos)){
df_sub <- unique(df[,combos[1:nrow(combos), x]])
att_combos <- rbind(att_combos, df_sub, fill = T)
}
}
rm(df_sub, i, x, combos, cnames)
for(i in 1:nrow(att_combos)){
att_sub <- att_combos[i, ]
att_sub <- att_sub[, is.na(att_sub)==F, with = F]
#need to subset data.frame here - very slow on large data.frames
#anyway to speed this up?
df_subset_for_analysis <- merge(df, att_sub)
}
I would use data.table keys on the columns you want to subset on, and then generate a data.table (at runtime) with the combinations you are interested in, and then merge the two.
Here is an example with a single combination of attributes (simple_combinations) and one with multiple combinations of attributes (multiple_combinations):
require(data.table)
df <- structure(list(att1 = c("e", "a", "c", "a", "d", "e", "a", "d", "b", "a", "c", "a", "b", "e", "e", "c", "d", "d", "a", "e", "b"),
att2 = c("b", "d", "c", "a", "e", "c", "e", "d", "e", "b", "e", "e", "c", "e", "a", "a", "e", "c", "b", "b", "d"),
att3 = c("c", "b", "e", "b", "d", "d", "d", "c", "c", "d", "e", "a", "d", "c", "e", "a", "d", "e", "d", "a", "e"),
att4 = c("c", "a", "b", "a", "e", "c", "a", "a", "b", "a", "a", "e", "c", "d", "b", "e", "b", "d", "d", "b", "e")),
.Names = c("att1", "att2", "att3", "att4"), class = "data.frame", row.names = c(NA, -21L))
# Convert to data.table
dt <- data.table(df)
# Set key on the columns used for "subsetting"
setkey(dt, att1, att2, att3, att4)
# Simple subset on a single set of attributes
simple_combinations <- data.table(att1 = "d", att2 = "e", att3 = "d", att4 = "e")
setkey(simple_combinations, att1, att2, att3, att4)
# Merge to generate simple output subset (simple_combinations of att present in dt)
simple_subset <- merge(dt, simple_combinations)
# Complex (multiple) sets of attributes
multiple_combinations <- data.table(expand.grid(att1=c("d"), att2=c("c", "d", "e"),
att3 = c("d"), att4 = c("b", "e")))
setkey(multiple_combinations, att1, att2, att3, att4)
# Merge to generate output subset (multiple_combinations of att present in dt)
multiple_subset <- merge(dt, multiple_combinations)
The output is in simple_subset and multiple_subset.

R: How do I produce a state transition matrix from a vector that represent states over discrete time steps? [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 7 years ago.
Improve this question
I'm using R and need some help.
Background:
I video-recorded participants in a behavioural study. I then coded different aspects of their behaviour from the videos so that I now have one data frame per participant. The df has many unordered factors, each representing the discrete temporal sequence of the participant's states for one specific behavioural dimension (e.g. gaze direction). Each row holds the value for one second for that dimension. To simplify, let's assume one such vector might look like this:
p01.gaze = factor(x = c("a", "b", "b", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "b", "b", "a", "d", "d", "d", "a", "a", "a", "e", "e", "d", "e", "e", "a","a", "e", "a", "a", "a", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "d", "b", "b", "b", "d", "d", "d", "d", "d", "d", "d", "b", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", "a", "b", "a", "d", "d", "a", "c", "e", "e", "e", "c", "c", "a", "e", "e", "a", "a", "a"))
Problem:
For each vector I want to construct a 'state transition matrix' by calculating the frequency of transitions (using counts or alternatively proportion) between all possible pairs of states. So the matrix would be:
p01.gaze.m = matrix(nrow=5, ncol=5, dimnames = list(c("a", "b", "c", "d", "e"), c("a", "b", "c", "d", "e")))
NOTES:
1) I'm new to programming and couldn't find the right functions. I did search thoroughly but didn't find appropriate solutions so any help would be welcome.
2) The function markovchainFit (package markovchain) sounded tempting but I don't think I want/need to fit a Markov Model to my data (because of implications and commitments I don't want to make).
3) The function count.transitions (package RDS) also sounded tempting but I couldn't figure out how to coerce my data into rds.data object.
Many thanks =]
moe
Use the markovchain package for your #1 & #3.
Here is some sample code for your data that shows counting state transitions, and then graphing the transition probability matrix:
library(markovchain)
p01.gaze = factor(x = c("a", "b", "b", "a", "a", "a",
"a", "a", "a", "a", "a", "a",
"a", "b", "b", "a", "d", "d",
"d", "a", "a", "a", "e", "e",
"d", "e", "e", "a","a", "e",
"a", "a", "a", "e", "e", "e",
"e", "e", "e", "e", "e", "e",
"e", "d", "b", "b", "b", "d",
"d", "d", "d", "d", "d", "d",
"b", "d", "d", "d", "d", "d",
"d", "d", "d", "d", "d", "d",
"d", "d", "d", "d", "d", "d",
"d", "d", "d", "d", "d", "d",
"d", "d", "d", "d", "d", "a",
"b", "a", "d", "d", "a", "c",
"e", "e", "e", "c", "c", "a",
"e", "e", "a", "a", "a"))
p01_gaze_tpm = createSequenceMatrix(p01.gaze, toRowProbs = TRUE)
p01_gaze_mc = as(p01_gaze_tpm, "markovchain")
plot(p01_gaze_mc, edge.arrow.size = 0.2)
This gives the following graph:
Once you upload sample data for your second problem, I will update my answer to address that as well.

How can I generate a user password in R [closed]

Closed. This question does not meet Stack Overflow guidelines. It is not currently accepting answers.
This question does not appear to be about programming within the scope defined in the help center.
Closed 8 years ago.
Improve this question
I want to generate a passwort for user in R. Until now I'm using Excel and the following VB-Script. How canIi transform the functionality in an appropriate R script. Thank you very much.
myArr = Array("", 2, 3, 4, 5, 6, 7, 8, 9, "A", "B", _
"C", "D", "E", "F", "G", "H", "J", "K", "L", "M", _
"N", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", _
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
"p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", _
"!", "§", "$", "%", "&", "(", ")", "*")
intArr = UBound(myArr)
intA = Application.InputBox("Wieviele Passwörter sollen erstellt werden?", "PasswortGenerator", 10, , , , , 1)
If Not TypeName(intA) = "Boolean" Then
Randomize
intC = ActiveCell.Column
intZ = ActiveCell.Row
For intZ = intZ To intZ + intA 'Anzahl Passwörter
For intP = 1 To 8 'Anzahl Stellen des Passwortes
strP = strP & myArr(Int(intArr * Rnd + 1))
Next intP
If Application.WorksheetFunction.CountIf(ActiveCell.EntireColumn, strP) = 0 Then
ActiveSheet.Cells(intZ, intC).Value = strP
End If
strP = ""
Next intZ
End If
End Sub
Thank you very much.
OK, from bellow I'm trying to set up a function that will generate a password for each employee (mitarbeiter). I wnat to add a new variable 'passwort' in the function with the generated password for each employee. So, thanks again for your help.
genPsw <- function(num, len=8) {
# Vorgaben für die Passwortkonventionen festlegen
myArr <- c("", 2, 3, 4, 5, 6, 7, 8, 9, "A", "B",
"C", "D", "E", "F", "G", "H", "J", "K", "L", "M",
"N", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
"p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
"!", "§", "$", "%", "&", "(", ")", "*")
# replicate is a wrapper for the common use of sapply for repeated evaluation of an expression
# (which will usually involve random number generation).
replicate(num, paste(sample(myArr, size=len, replace=T), collapse=""))
# Lanege von dataframe mitarbeiter ermitteln
dim_mitarbeiter <- nrow(mitarbeiter)
for(i in 1:dim_mitarbeiter) {
# Random Number Generation
set.seed(i)
# Generate Passwort
mitarbeiter$passwort <- genPsw(i)
}
}
genPsw <- function(num, len=8) {
myArr <- c("", 2, 3, 4, 5, 6, 7, 8, 9, "A", "B",
"C", "D", "E", "F", "G", "H", "J", "K", "L", "M",
"N", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
"p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
"!", "§", "$", "%", "&", "(", ")", "*")
replicate(num, paste(sample(myArr, size=len, replace=T), collapse=""))
}
set.seed(1)
genPsw(3)
# [1] "JRf§E§&l" "j5ECnSsa" "p*St%Fk9"

tkplot in latex via knitr and igraph

This may be a wild strange dream. I dreampt that I could put a tkplot from igraph inside a latex document via knitr. I know Yihui is know for animation stuff so I thought maybe this is possible. A google search didn't show what I was after so here's a non working attempt:
\documentclass[a4paper]{scrartcl}
\begin{document}
<<setup, include=FALSE, cache=FALSE>>=
library(igraph)
#
<<network>>=
edges <- structure(c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J",
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "A", "B", "C",
"D", "E", "F", "G", "H", "I", "J", "E", "G", "G", "F", "H", "G",
"D", "J", "J", "D", "B", "C", "D", "I", "I", "H", "A", "B", "G",
"I", "F", "D", "F", "J", "D", "B", "E", "E", "A", "E"), .Dim = c(30L,
2L), .Dimnames = list(NULL, c("person", "choice")))
g <- graph.data.frame(edges, directed=TRUE)
tkplot(g)
#
\end{document}
OK, a quick and dirty answer:
\documentclass{article}
\begin{document}
<<setup, include=FALSE, cache=FALSE>>=
library(igraph)
library(tcltk)
knit_hooks$set(igraph = function(before, options, envir) {
if (before) return()
path = knitr:::fig_path('.eps')
tkpostscript(igraph:::.tkplot.get(options$igraph)$canvas,
file = path)
sprintf('\\includegraphics{%s}', path)
})
#
<<network, igraph=1>>=
edges <- structure(c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J",
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "A", "B", "C",
"D", "E", "F", "G", "H", "I", "J", "E", "G", "G", "F", "H", "G",
"D", "J", "J", "D", "B", "C", "D", "I", "I", "H", "A", "B", "G",
"I", "F", "D", "F", "J", "D", "B", "E", "E", "A", "E"), .Dim = c(30L,
2L), .Dimnames = list(NULL, c("person", "choice")))
g <- graph.data.frame(edges, directed=TRUE)
tkplot(g)
#
\end{document}
Feel free to polish it with hook_plot_custom.

Resources