How to change the x axis labels using scale_x_discrete - r

Using the following script:
df <- read.csv("/covpl.csv")
m <- melt(df)
Time <- m$variable
coverage_plot <- ggplot(data=m, aes(x=Time, y=value, group=config, color=config)) +
geom_line(size=1) +
geom_point(aes(shape=config, colour = config), show.legend = T, size=3) +
scale_x_discrete(labels = seq(1, 60.0, by=1)) +
theme(legend.position="bottom", axis.text.x = element_text(angle = 90),text = element_text(size=13),legend.title=element_blank())+
labs(x = "Time (minutes)", y = "Coverage") +
guides(shape=guide_legend(override.aes=list(size=3, linetype=0)))
I get the following plot:
In the x-axis, I would like the labels to be from 1 to 30 (in this case 60 should be shown as 30) because the data represent a value that is stored after half a minute (this is why we have 60 data points) but I want to plot them as 30 minutes.
To do that, I changed scale_x_discrete(labels = seq(1, 30.0, by=1)) but this gives the following:
Do you have any idea how to fix this?
Reproducible data:
structure(list(config = structure(1:5, .Label = c("f1", "f2",
"f3", "f4", "f5"), class = "factor"), class = structure(c(1L,
1L, 1L, 1L, 1L), .Label = "com.google.common.base.Joiner", class = "factor"),
CoverageTimeline_T1 = c(0.85390153, 0.841557035, 0.8381143561,
0.8404624807, 0.8448297462), CoverageTimeline_T2 = c(0.9431633586,
0.9192875446, 0.9010343959, 0.9126220049, 0.938583703), CoverageTimeline_T3 = c(0.9881426292,
0.9793648538, 0.9406397492, 0.9507933561, 0.9762333662),
CoverageTimeline_T4 = c(0.9937107313, 0.9933404876, 0.9632557533,
0.9706779854, 0.9946485039), CoverageTimeline_T5 = c(0.9966666667,
1, 0.9799043011, 0.9830096664, 0.9966666667), CoverageTimeline_T6 = c(0.9966666667,
1, 0.9930106526, 0.9866666667, 0.9966666667), CoverageTimeline_T7 = c(0.9966666667,
1, 1, 0.991560876, 0.9966666667), CoverageTimeline_T8 = c(0.9966666667,
1, 1, 0.9966666667, 0.9966666667), CoverageTimeline_T9 = c(0.9966666667,
1, 1, 0.9966666667, 0.9966666667), CoverageTimeline_T10 = c(0.9966666667,
1, 1, 0.9966666667, 0.9966666667), CoverageTimeline_T11 = c(0.9966666667,
1, 1, 0.9966666667, 0.9966666667), CoverageTimeline_T12 = c(0.9966666667,
1, 1, 0.9966666667, 0.9966666667), CoverageTimeline_T13 = c(0.9966666667,
1, 1, 0.9966666667, 0.9966666667), CoverageTimeline_T14 = c(0.9966666667,
1, 1, 0.9966666667, 0.9966666667), CoverageTimeline_T15 = c(0.9966666667,
1, 1, 0.9966666667, 0.9966666667), CoverageTimeline_T16 = c(0.9966666667,
1, 1, 0.9966666667, 0.9966666667), CoverageTimeline_T17 = c(0.9966666667,
1, 1, 0.9966666667, 0.9966666667), CoverageTimeline_T18 = c(0.9966666667,
1, 1, 0.9966666667, 0.9966666667), CoverageTimeline_T19 = c(0.9966666667,
1, 1, 0.9966666667, 0.9966666667), CoverageTimeline_T20 = c(0.9966666667,
1, 1, 0.9966666667, 0.9989709749), CoverageTimeline_T21 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T22 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T23 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T24 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T25 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T26 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T27 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T28 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T29 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T30 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T31 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T32 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T33 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T34 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T35 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T36 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T37 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T38 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T39 = c(0.9966666667,
1, 1, 0.9966666667, 1), CoverageTimeline_T40 = c(0.9966666667,
1, 1, 1, 1), CoverageTimeline_T41 = c(0.9966666667, 1, 1,
1, 1), CoverageTimeline_T42 = c(0.9966666667, 1, 1, 1, 1),
CoverageTimeline_T43 = c(0.9966666667, 1, 1, 1, 1), CoverageTimeline_T44 = c(0.9966666667,
1, 1, 1, 1), CoverageTimeline_T45 = c(0.9966666667, 1, 1,
1, 1), CoverageTimeline_T46 = c(0.9966666667, 1, 1, 1, 1),
CoverageTimeline_T47 = c(0.9966666667, 1, 1, 1, 1), CoverageTimeline_T48 = c(0.9966666667,
1, 1, 1, 1), CoverageTimeline_T49 = c(0.9966666667, 1, 1,
1, 1), CoverageTimeline_T50 = c(0.9966666667, 1, 1, 1, 1),
CoverageTimeline_T51 = c(0.9966666667, 1, 1, 1, 1), CoverageTimeline_T52 = c(0.9966666667,
1, 1, 1, 1), CoverageTimeline_T53 = c(0.9966666667, 1, 1,
1, 1), CoverageTimeline_T54 = c(0.9966666667, 1, 1, 1, 1),
CoverageTimeline_T55 = c(0.9966666667, 1, 1, 1, 1), CoverageTimeline_T56 = c(0.9966666667,
1, 1, 1, 1), CoverageTimeline_T57 = c(0.9966666667, 1, 1,
1, 1), CoverageTimeline_T58 = c(0.9966666667, 1, 1, 1, 1),
CoverageTimeline_T59 = c(0.9966666667, 1, 1, 1, 1), CoverageTimeline_T60 = c(0.9966666667,
1, 1, 1, 1)), class = "data.frame", row.names = c(NA, -5L
))

Edit: It would be better if you define Time as a numeric vector based on the factor you have (m$variable). Using a simple regular expression we can pull out the number and divide by 2:
df <- read.csv("/covpl.csv")
m <- melt(df)
Time <- as.numeric(gsub('.*_T', '', m$variable)) / 2
coverage_plot <- ggplot(data=m, aes(x=Time, y=value, group=config, color=config)) +
geom_line(size=1) +
geom_point(aes(shape=config, colour = config), show.legend = T, size=3) +
theme(legend.position="bottom", axis.text.x = element_text(angle = 90),text = element_text(size=13),legend.title=element_blank())+
labs(x = "Time (minutes)", y = "Coverage") +
guides(shape=guide_legend(override.aes=list(size=3, linetype=0)))

Related

Subset data based on string values

I would like to subset my data frame based on the index column; I would like to keep those cases whose index is saved in myvar (eg. 110, 111). I don't understand why I receive 0 observations when running this code:
newdata <- df[ which(df$index=="myvars"), ]
Sample data:
df<-structure(list(index = c(111, 110, 101, 111), et = c(1, 1, 1,
1), d1_t2 = c(0, 1, 1, 1), d1_t3 = c(0, 0, 1, 1), d1_t4 = c(0,
1, 0, 1), d2_t1 = c(0, 0, 1, 1), d2_t2 = c(0, 1, 1, 1), d2_t3 = c(0,
0, 0, 1), d2_t4 = c(1, 0, 1, 1), d3_t1 = c(1, 0, 1, 1), d3_t2 = c(1,
1, 0, 1), d3_t3 = c(1, 0, 1, 1), d3_t4 = c(1, 1, 0, 1), d4_t1 = c(0,
0, 1, 1), d4_t2 = c(1, 1, 0, 1), d4_t3 = c(0, 0, 1, 1), d4_t4 = c(1,
0, 1, 1), d5_t1 = c(1, 0, 0, 1), d5_t2 = c(0, 1, 1, 1), d5_t3 = c(1,
0, 1, 1), d5_t4 = c(0, 0, 1, 1), d6_t1 = c(1, 0, 0, 1), d6_t2 = c(0,
0, 1, 1), d6_t3 = c(1, 0, 1, 1), d6_t4 = c(1, 0, 1, 1), d7_t1 = c(1,
1, 1, 1), d7_t2 = c(1, 1, 1, 1), d7_t3 = c(1, 0, 1, 1), d7_t4 = c(1,
0, 1, 1)), row.names = c(NA, 4L), class = "data.frame")
Code:
myvars<-c("110", "111")
try
myvars<-c(110, 111) # <-- !! no quotes !!
df[ which(df$index %in% myvars ), ] #also, no quotes round myvars
There are several basic problems with what you are trying to do.
You are not using the variable 'myvars' -- you are using a string with the value "myvars". None of your rows has the index "myvars".
You are using == which is good for one value (e.g. values==4), but myvars has multiple values in it. Instead, you could use df$index %in% myvars
This does work, but you have integer indices, and are accessing them with strings. This is unnecessary, and could lead to problems in other places.
You may be confused because of your very large and complex example data. You only need one column to test -- not twenty.

ggplot line graph shows overlap for discrete Y axis variable

I am trying to plot a time series plot using ggplot. I have a discrete variable that just describes if a vehicle engine was on or off through a given time.
Naturally, it should either be 0 or 1, there can't be an overlap. But as seen in the below plot, there's a considerable overlap.
I am pretty new to ggplot. Could someone please let me know what the issue is.
The code I am using is:
pd %>%
ggplot(aes(x = SampleTime, y = as.factor(Value))) + geom_line(color = 'black') +
labs(x = 'Time', y = 'Ignition Status', title = 'Ignition Status by Time') +
scale_x_datetime(date_breaks = '1 hour', date_labels = '%H') +
theme(axis.title = element_text(size = 18, face = 'bold'),
axis.text = element_text(size = 14, color = 'black'))
structure of data:
str(pd)
'data.frame': 474 obs. of 2 variables:
$ SampleTime: POSIXct, format: "2020-11-30 00:17:46" "2020-11-30 01:17:47" "2020-11-30 02:17:48" "2020-11-30 03:17:49" ...
$ Value : num 0 0 0 0 0 0 0 0 0 1 ...
data used:
structure(list(SampleTime = structure(c(1606675666, 1606679267,
1606682868, 1606686469, 1606690070, 1606693671, 1606697271, 1606700872,
1606704473, 1606706974, 1606707066, 1606707129, 1606707192, 1606707255,
1606707318, 1606707382, 1606707445, 1606707508, 1606707571, 1606707631,
1606707694, 1606707754, 1606707817, 1606707880, 1606707943, 1606708006,
1606708069, 1606708132, 1606708195, 1606708258, 1606708318, 1606708381,
1606708441, 1606708504, 1606708567, 1606708630, 1606708691, 1606708751,
1606708814, 1606708877, 1606708940, 1606709003, 1606709066, 1606709129,
1606709192, 1606709255, 1606709318, 1606709381, 1606709444, 1606709507,
1606709570, 1606709633, 1606709696, 1606709759, 1606709822, 1606709885,
1606709948, 1606710011, 1606710074, 1606710137, 1606710200, 1606710263,
1606710348, 1606710411, 1606710474, 1606710537, 1606710600, 1606710663,
1606710726, 1606710789, 1606710852, 1606710915, 1606710978, 1606711041,
1606711104, 1606711167, 1606711230, 1606711293, 1606711356, 1606711419,
1606711482, 1606711545, 1606711608, 1606711671, 1606711734, 1606711797,
1606711860, 1606711923, 1606711986, 1606712049, 1606712112, 1606712175,
1606712238, 1606712301, 1606712365, 1606712428, 1606712491, 1606712553,
1606712617, 1606712679, 1606712756, 1606712909, 1606712970, 1606713007,
1606713034, 1606713094, 1606713157, 1606713220, 1606713283, 1606713346,
1606713409, 1606713472, 1606713535, 1606713598, 1606713661, 1606713724,
1606713787, 1606713850, 1606713913, 1606713976, 1606714039, 1606714102,
1606714165, 1606714228, 1606714291, 1606714354, 1606714417, 1606714480,
1606714543, 1606714606, 1606714669, 1606714732, 1606714795, 1606714858,
1606714921, 1606714984, 1606715047, 1606715110, 1606715173, 1606715236,
1606715299, 1606715362, 1606715425, 1606715488, 1606715551, 1606715614,
1606715677, 1606715740, 1606715803, 1606715866, 1606715929, 1606715992,
1606716055, 1606716118, 1606716181, 1606716244, 1606716307, 1606716370,
1606716433, 1606716464, 1606716826, 1606716886, 1606716949, 1606717012,
1606717075, 1606717138, 1606717201, 1606717264, 1606717327, 1606717390,
1606717453, 1606717516, 1606717579, 1606717642, 1606717705, 1606717768,
1606717831, 1606717894, 1606717957, 1606718021, 1606718084, 1606718147,
1606718210, 1606718273, 1606718336, 1606718399, 1606718462, 1606718525,
1606718588, 1606718651, 1606718714, 1606718777, 1606718839, 1606718903,
1606718965, 1606719029, 1606719091, 1606719155, 1606719218, 1606719281,
1606719344, 1606719407, 1606719470, 1606719533, 1606719596, 1606719659,
1606719722, 1606719785, 1606719848, 1606719911, 1606719974, 1606720037,
1606720100, 1606720163, 1606720226, 1606720289, 1606720352, 1606720415,
1606720478, 1606720498, 1606721559, 1606721619, 1606721682, 1606721745,
1606721808, 1606721871, 1606721934, 1606721997, 1606722060, 1606722123,
1606722186, 1606722249, 1606722312, 1606722375, 1606722438, 1606722501,
1606722564, 1606722627, 1606722690, 1606722753, 1606722816, 1606722858,
1606722892, 1606722952, 1606723015, 1606723078, 1606723141, 1606723204,
1606723267, 1606723330, 1606723393, 1606723456, 1606723519, 1606723582,
1606723645, 1606723708, 1606723771, 1606723834, 1606723897, 1606723960,
1606724023, 1606724086, 1606724149, 1606724212, 1606724275, 1606724338,
1606724401, 1606724464, 1606724527, 1606724590, 1606724653, 1606724716,
1606724779, 1606724842, 1606724905, 1606724968, 1606725031, 1606725094,
1606725157, 1606725220, 1606725283, 1606725346, 1606725409, 1606725472,
1606725535, 1606725598, 1606725661, 1606725721, 1606725784, 1606725840,
1606729441, 1606730753, 1606730814, 1606730857, 1606731021, 1606731081,
1606731144, 1606731207, 1606731270, 1606731333, 1606731396, 1606731459,
1606731522, 1606731585, 1606731648, 1606731711, 1606731774, 1606731837,
1606731900, 1606731963, 1606732026, 1606732089, 1606732138, 1606732956,
1606733017, 1606733080, 1606733143, 1606733206, 1606733269, 1606733332,
1606733395, 1606733458, 1606733521, 1606733584, 1606733647, 1606733710,
1606733773, 1606733836, 1606733899, 1606733962, 1606734025, 1606734088,
1606734151, 1606734203, 1606735128, 1606735189, 1606735252, 1606735315,
1606735378, 1606735441, 1606735504, 1606735567, 1606735630, 1606735683,
1606739285, 1606742885, 1606744969, 1606745030, 1606745093, 1606745156,
1606745219, 1606745282, 1606745345, 1606745408, 1606745471, 1606745534,
1606745597, 1606745660, 1606745723, 1606745786, 1606745849, 1606745912,
1606745975, 1606746038, 1606746101, 1606746164, 1606746370, 1606746433,
1606746496, 1606746559, 1606746702, 1606746736, 1606746765, 1606746828,
1606746891, 1606746954, 1606747017, 1606747077, 1606747140, 1606747203,
1606747266, 1606747329, 1606747392, 1606747455, 1606747518, 1606747581,
1606747644, 1606747707, 1606747770, 1606747833, 1606747896, 1606747959,
1606748022, 1606748085, 1606748145, 1606748206, 1606748269, 1606748332,
1606748395, 1606748458, 1606748521, 1606748584, 1606748647, 1606748707,
1606748770, 1606748833, 1606748896, 1606748959, 1606749022, 1606749085,
1606749148, 1606749211, 1606749274, 1606749337, 1606749400, 1606749463,
1606749526, 1606749589, 1606749652, 1606749715, 1606749778, 1606749841,
1606749904, 1606749967, 1606750030, 1606750093, 1606750156, 1606750219,
1606750282, 1606750345, 1606750408, 1606750471, 1606750534, 1606750597,
1606750657, 1606750720, 1606750781, 1606750841, 1606750904, 1606750967,
1606751030, 1606751084, 1606754686, 1606758287, 1606759153, 1606759213,
1606759276, 1606759339, 1606759402, 1606759465, 1606759528, 1606759592,
1606759654, 1606759718, 1606759780, 1606759843, 1606759906, 1606759969,
1606760032, 1606760096, 1606760159, 1606760222, 1606760285, 1606760348,
1606760411, 1606760474, 1606760537, 1606760600, 1606760663, 1606760726,
1606760788, 1606760851, 1606760914, 1606760977), class = c("POSIXct",
"POSIXt"), tzone = ""), Value = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1)), row.names = c(NA, -474L), class = "data.frame")
ggplot automatically connects the gaps between observations if you use geom_line. Try to use geom_point instead to see the gaps:
pd %>%
ggplot(aes(x = SampleTime, y = as.factor(Value))) +
geom_point(color = 'black') +
labs(x = 'Time', y = 'Ignition Status', title = 'Ignition Status by Time') +
scale_x_datetime(date_breaks = '1 hour', date_labels = '%H') +
theme(axis.title = element_text(size = 18, face = 'bold'),
axis.text = element_text(size = 14, color = 'black'))
Alternatively, you can use geom_line but don't convert Value to a factor and use the numeric value instead. This way you can see the change from 0 to 1 more clearly:
pd %>%
ggplot(aes(x = SampleTime, y = Value)) +
geom_line(color = 'black') +
labs(x = 'Time', y = 'Ignition Status', title = 'Ignition Status by Time') +
scale_x_datetime(date_breaks = '1 hour', date_labels = '%H') +
theme(axis.title = element_text(size = 18, face = 'bold'),
axis.text = element_text(size = 14, color = 'black'))
Lastly, a slightly more appropriate option for categorical data might be to use geom_step instead, which at least connects the two categories with a vertical line:
pd %>%
ggplot(aes(x = SampleTime, y = Value)) +
geom_step(color = 'black') +
labs(x = 'Time', y = 'Ignition Status', title = 'Ignition Status by Time') +
scale_x_datetime(date_breaks = '1 hour', date_labels = '%H') +
theme(axis.title = element_text(size = 18, face = 'bold'),
axis.text = element_text(size = 14, color = 'black'))
If that doesn't suffice, check out this post on how to connect the lines between each consecutive dot per group via geom_segment.

poLCA in R is not executing. I am getting a weird error message

I'm pretty new to R and I'm encountering an error that I'm not sure why I'm getting. I'm running R Studio Version 1.3.959 on Windows 10 and trying to use the package poLCA.
Here is my code:
library(scatterplot3d)
library(MASS)
library(poLCA)
data <- read.csv(file.choose(), header = TRUE)
data[-3] <- lapply(data[-3],gsub,pattern = "1", replacement = "2", fixed = TRUE)
data[-3] <- lapply(data[-3],gsub,pattern = "0", replacement = "1", fixed = TRUE)
cols.num<- c("A", "B", "C", "D", "E")
data[cols.num] <- sapply(data[cols.num],as.numeric)
AMER_all <- data[which(data$SALES_LEVEL_1 == "AMERICAS"),]
AMER_ALL_LSA <- cbind("A", "B", "C", "D", "E")
AMER_less_1000 <- AMER_all[which(AMER_all$WALLET_BINS == "<$2k"),]
AMER_less_1000_LCA <- poLCA(AMER_ALL_LSA, data = AMER_less_1000, nclass = 3, graphs = TRUE)
This is the error that I'm getting -
Error in runif(R * K.j[j]) : invalid arguments
In addition: There were 50 or more warnings (use warnings() to see the first 50)
and when I look at the warning I get this:
In FUN(newX[, i], ...) : no non-missing arguments to max; returning -Inf
Any idea on why I'm getting this error? I'm wondering if its the version of R that I downloaded. I recently needed to reimage my laptop and download R and RStudio from fresh again..
Sample Data
structure(list(ï..COMPANY_ID = c("GUC_123",
"GUC_111", "GUC_112", "GUC_113", "GUC_114",
"GUC_115", "GUC_116", "GUC_117",
"GUC_118", "GUC_119", "GUC_120", "GUC_121",
"GUC_122", "GUC_123", "GUC_124",
"GUC_125", "GUC_126", "GUC_127",
"GUC_128", "GUC_129"), SALES_LEVEL_1 = c("AMERICAS",
"APJC__", "AMERICAS", "AMERICAS", "APJC__", "AMERICAS", "EMEAR-REGION",
"AMERICAS", "AMERICAS", "EMEAR-REGION", "AMERICAS", "AMERICAS",
"AMERICAS", "AMERICAS", "EMEAR-REGION", "AMERICAS", "AMERICAS",
"AMERICAS", "APJC__", "EMEAR-REGION"), WALLET_BIN = c("$1k-$15k",
"$15k-$50", "$1k-$15k", "$100k-$200k", "$1k-$15k", "$15k-$50",
"$1k-$15k", "$1k-$15k", "$15k-$50", "$15k-$50", "$1k-$15k", "$1k-$15k",
"$15k-$50", "$1k-$15k", "$1k-$15k", "$50k-$100k", "$50k-$100k",
"$15k-$50", "$50k-$100k", "$1k-$15k"), A = c(1,
1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1), B = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), C = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), D = c(2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1), E = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), F = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), G = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), H = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1), I = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1), J = c(2,
1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1), K = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), L = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), M = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), N = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), O = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1), P = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1), row.names = c(NA,
20L), class = "data.frame")
It looks like the very silly issue is defining the formula without quotation marks as also noted in this answer:
library(poLCA)
data <- read.delim("https://pastebin.com/raw/TNJNCxkH")
data[,5:ncol(data)] <- lapply(data[,5:ncol(data)],gsub,pattern = "1", replacement = "2", fixed = TRUE)
data[,5:ncol(data)] <- lapply(data[,5:ncol(data)],gsub,pattern = "0", replacement = "1", fixed = TRUE)
data[,5:ncol(data)] <- sapply(data[,5:ncol(data)],as.numeric)
AMER_all <- data[which(data$SALES_LEVEL_1 == "AMERICAS"),]
AMER_ALL_LSA <- cbind(PB_1,PB_2,PB_3,PB_4,PB_5)~1
AMER_less_1000_LCA <- poLCA(AMER_ALL_LSA, data = AMER_all, nclass = 3, graphs = TRUE)
#Conditional item response (column) probabilities,
# by outcome variable, for each class (row)
#...
#=========================================================
#Fit for 3 latent classes:
#=========================================================
#number of observations: 1016
#number of estimated parameters: 17
#residual degrees of freedom: 14
#maximum log-likelihood: -1068.675
#AIC(3): 2171.351
#BIC(3): 2255.052
#G^2(3): 1.362823 (Likelihood ratio/deviance statistic)
#X^2(3): 0.7509686 (Chi-square goodness of fit)
#
#ALERT: iterations finished, MAXIMUM LIKELIHOOD NOT FOUND

ComplexHeatmap - Highlight specific values in a heatmap

I'm using the ComplexHeatmap package in R to plot a matrix.
I would like to plot cells with specific values in different colors such as:
Red if value < threshold_low
Yellow if threshold_low < value < threshold_high
Green gradient if threshold_high < value < 0.9
White if value > 0.9
The code below produces something close to what I'm trying to achieve but not exactly...
There should be 3 values highlighted in 'red' (only are shown).
The legend doesn't show the red value.
I've tried different values in colorRamp2() but I still achieve the results I want.
Link to the package: https://github.com/jokergoo/ComplexHeatmap
Relevant documentation: https://bioconductor.statistik.tu-dortmund.de/packages/3.1/bioc/vignettes/ComplexHeatmap/inst/doc/ComplexHeatmap.html
Reproducible example:
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
mat <- structure(c(1, 0.154616894313456, 0.168336307012768, 1, 1, 0.453975613676526,
1, 1, 1, 1, 0.00056792615275985, 1, 1, 1, 1, 1, 0.105149642433548,
0.425158360263792, 1, 1, 0.501057722133202, 1, 1, 1, 0.00278294210196398,
1, 0.168271031634512, 1, 0.830063213792425, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0.451156419955746, 1, 0.0274775797365075, 1, 1, 0.717017044202449,
1, 1, 1, 0.717321290931695, 0.0845704812428829, 1, 0.0699956751028108,
1, 0.275329648641519, 1, 0.0201980178890521, 0.0455613967501329,
1, 1, 0.536035636664544, 1, 1, 1, 1, 0.358910397894989, 1, 1,
1, 1, 1, 0.0467579359404697, 0.100390014456881, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0.00196501415273741, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0.52718575076658, 0.589648919728843, 0.016642521684359, 1, 1,
0.100523837174713, 0.62858401844093, 1, 1, 1, 0.00272095544860866,
1, 0.00152884219028376, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.00715615768151061,
1, 1, 0.0017044025565658, 1, 0.900838683850751, 1, 0.00286177891699905,
0.964009307250068, 1, 1, 1, 1, 0.712505373834541, 0.156595117903911,
0.698630689400136, 0.0497797620046933, 0.0432669236844255, 1,
0.482930753871518, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.863198034258964,
0.0914794415922655, 1, 1, 1, 1, 1, 0.130770966476073, 0.133939889164611,
1, 0.357244624441367, 0.0574932940137384, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 0.10152332525958, 1, 1, 1, 1, 1, 1, 1,
0.613891861387759, 1, 1, 1, 1, 0.0194347260355869, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.836989723814452,
0.262496570009738, 0.0266926025635128, 1, 1, 1, 0.409918984740791,
1, 1, 1, 0.000826603369076878, 0.251385649690035, 0.262496570009738,
1, 0.636285376736921, 1, 1, 1, 0.131975914025068, 1, 0.302372382987271,
1, 0.829657680365184, 0.234804722298138, 1, 0.480189087472427,
1, 1, 0.434351639718704, 1, 1, 1, 1, 1, 1, 1, 1, 0.827162805570832,
1, 0.000754621951469572, 1, 1, 1, 0.0607557227589793, 0.0466844602596043,
0.166584131048057, 1, 0.00375850252234914, 1, 0.00714919767503871,
1, 1, 0.0274923567061487, 0.317749204479313, 0.523992490899291,
0.0154308196673378, 1, 1, 1, 1, 0.283874713710679, 1, 1, 0.0879611832530085,
0.00614031465663928, 1, 0.44858333972042, 1, 1, 1, 1, 1, 1, 0.0999740979976858,
1, 0.0857648171802508, 0.129753363733359, 0.205913325652849,
0.164132124317898, 1, 1, 0.135339969223928, 0.441555384681617,
1, 1, 1, 0.619544298689775, 1, 1, 1, 1, 1, 0.00237701375005596,
1, 1, 0.149979241487463, 5.65028970245137e-06, 0.00435404367430347,
0.102437190438328, 0.159652054514079, 1.34807420832168e-05, 0.0781174367417338,
0.00168329325806749, 1, 1, 0.0178846842165573, 0.562000511433683,
1, 0.226033494133538, 1, 1, 1, 0.0116432115170938, 0.0158685033438527,
1, 1, 1, 1, 1, 0.226315459031352, 1, 0.0839844171448521, 0.00448638209236464,
0.741937003903222, 1, 1, 0.0969877781197935, 1, 1, 0.899533885687393,
0.11038695014775, 0.0691406897023532, 0.00864645807981477, 0.000409055625639548,
0.0432931123545735, 0.000183686382465392, 1, 1, 1, 1, 1, 0.262742531844371,
1, 0.695486348184785, 0.129359897480463, 0.7456928940893, 0.85934081881042,
1, 1, 1, 1, 1, 0.608552296219399, 1, 0.39067450494575, 1, 1,
1, 1, 0.411281105041672, 0.484196212791728, 0.198292538014945,
0.677053068881975, 1, 1, 1, 0.0318317218601013, 1, 1, 0.791347998335414,
1, 1, 1, 1, 0.240600397196919, 0.135644227364626, 1, 1, 1, 8.78013264925257e-05,
0.259727834661945, 0.305025003277675, 0.0492101609648877, 1,
0.667608208227447, 0.15068985721195, 1, 0.00197605148651341,
0.194624037083573, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0758606965055501,
1, 1, 1, 1, 0.146826771370067, 1, 0.0318317218601013, 1, 0.267193726901327,
1, 0.0214353975572249, 1, 1), .Dim = c(53L, 8L), .Dimnames = list( NULL, NULL))
threshold_low <- 0.00015
threshold_high <- 0.05
color_breaks <-c(0, threshold_low, threshold_high, 0.9)
my_palette <- c( "red",
"yellow",
colorRampPalette(rev(brewer.pal(8, "Greens")))(n = length(color_breaks)-3),
"white")
col_fun = colorRamp2(color_breaks, my_palette)
Heatmap(mat,
cluster_rows = FALSE,
show_column_names = FALSE, # We use colored anno_block instead
#clustering_distance_rows = "euclidean",
cluster_columns=FALSE,
col = col_fun,
# Legend
heatmap_legend_param = list(
title = "values", direction = "horizontal",
legend_width = unit(.25, "npc"),
at = c(threshold_low, threshold_high, .5),
labels = c("<0.00015" , "<0.05", '1')
),
# Cell separator
rect_gp = gpar(col = "lightgray", lwd = 1),
width=unit(0.6,'npc'),
)
Following is the color mapping function I created:
offset = 1e-10
col_fun = colorRamp2(c(0, 0.00015, 0.00015+offset, 0.05, 0.05+offset, 0.9, 0.9+offset, 1),
c("red", "red", "yellow", "yellow", "lightgreen", "darkgreen", "white", "white"))
The correspondence is
0 "red"
0.00015 "red"
0.00015+offset "yellow"
0.05 "yellow"
0.05+offset "lightgreen"
0.9 "darkgreen"
0.9+offset "white"
1 "white"
The thing is you have different color schema in different intervals, you need to be careful with the endpoints of each interval.

How to plot a different colour depending on the value in geom_line ggplot

I have a ggplot like so:
I would like to have a line that is red if it is above the topmost geom_hline, preferably only when it is above the line. Failing this geom_point colour change will do:
The minimal dataset
structure(list(chr = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1), leftPos = c(480000, 600000, 2520000, 2760000, 2880000,
3000000, 3120000, 3480000, 3600000, 4440000, 5880000, 6120000,
7680000, 8400000, 8520000, 8640000, 8760000, 8880000, 9000000,
9120000, 10320000, 11280000, 12240000, 12360000, 12840000, 13080000,
14760000, 17520000, 17880000, 18120000, 18240000, 18480000, 18600000,
18720000, 19200000, 19320000, 20640000, 21000000, 21120000, 21840000,
22560000, 22680000, 24360000, 24600000, 24720000, 25080000, 25200000,
25440000, 25560000, 25680000, 25920000, 26160000, 26280000, 27000000,
27360000, 27480000, 27840000, 27960000, 28320000, 28440000, 28560000,
28680000, 29160000, 29280000, 29520000, 29760000, 29880000, 30000000,
30120000, 30360000, 30840000, 31200000, 31680000, 32760000, 33000000,
33360000, 33480000, 33600000, 33720000, 33840000, 34080000, 34200000,
34320000, 34440000, 34920000, 35040000, 35160000, 35400000, 35520000,
35640000, 35760000, 35880000, 36000000, 36120000, 36480000, 36720000,
36840000, 37440000, 37680000, 37800000), Means = c(45.1721105399911,
58.3236234466928, 10.701781072969, 81.1844097193854, 15.9764258576336,
72.9857717254786, 100.909692015799, 42.3842652589944, 48.5758167032384,
60.5307788344342, 9.72605226749078, 30.952302909114, 17.860141968134,
30.8530526895484, 20.4628162381967, 116.623581135536, 38.6678878887186,
78.9150157908195, 247.360337239763, 24.8838914877946, 23.429988222356,
53.2615600034024, 52.3424770503346, 170.222454449925, 50.5781138394727,
68.3087962976007, 89.4230029840393, 330.313625666045, 5.03584032143116,
89.2643864875047, 27.8653665885641, 201.299758574135, -5.42494623204948,
46.35601686698, 5.79103997302425, 112.341455197127, -4.28592603543802,
7.41453012965141, 54.7629963052909, 23.3995967033728, 38.50598295151,
51.1117491264575, 9.70064959823299, 145.915122377063, -1.93394721482472,
223.668891284723, 0.967813870514552, 3.98951268889225, 103.991761780532,
25.360339056567, 110.629872488342, 98.3711372064266, 4.29902758331359,
-6.49705043591403, 119.885301893303, 11.7856758287972, 2.14257993846943,
0.895276964913781, -4.13978377824383, 32.8265982947408, 102.894448568246,
16.4150366530933, 3.50639248897849, 96.4740592727108, 163.538533768379,
11.7417544456533, 271.336168593183, 21.9737935918722, 69.3505139183734,
39.0812235546367, -0.0979741416490998, 11.2593492499262, 1.09352485411191,
125.197079877443, 3.72187246405846, 60.0443035930762, 64.269810330157,
16.8509153029321, -1.53165469412626, 17.8174419767041, 12.4924416240186,
77.1154197241883, 50.0459666078323, 44.1948097160243, 7.87958496186204,
24.2661434245405, 36.080204093108, 57.8551517801946, 130.247092097128,
56.2802152403655, 24.1732000185415, 0.805669460114936, 20.805136831573,
89.3895887677943, 12.0889852368913, 189.261728063453, 18.3254402417783,
0.914727515857528, 26.4441526771417, 60.3908804957335)), .Names = c("chr",
"leftPos", "Means"), row.names = c(NA, 100L), class = "data.frame")
The code I am using for one of the plots (facet_wrap comes later)
UL1=400
LL1=0
Median1=200
ggplot(Zoutliers1,aes(x = leftPos,
y = as.numeric(Means),
group = chr,
xend = leftPos,
yend=0))+
#geom_bar(stat="identity",fill = "red", size = 1, colour = "red")+
geom_line()+
#geom_segment(linetype= 1, colour = "#919191")+
#ggtitle(TBBName)+
ylim(-100,480)+
facet_grid(.~chr, space="free_x", scales="free_x")+
geom_hline(yintercept = UL1,size=0.5)+
geom_hline(yintercept = Median1,size=0.5,colour='orange')+
geom_hline(yintercept = LL1,size=0.5)+
theme(panel.margin = unit(0.1, "lines"))+
theme(axis.text.x = element_blank())+
theme(panel.border = element_rect(fill=NA,color="darkred", size=0.5,
linetype="dashed"))+
theme(axis.title = element_blank())

Resources