Building legends with time series data, in ggplot - r

Aloha all,
I've struggled to build a legend for a mix/match of time series data I'm making. Here is some code:
My understanding is that I need to somehow clean my data and put it all in the same data frame, but all of the time series don't line up very well. Some is at 15 minutes, other one hour. Is there any way to force a legend for these datasets? I don't know what else to post here - since the 5 datasets are quite large.
Plot I'm working on:
q<- ggplot(subset(cr200_Auwai1, timedate>startd & timedate<endd), aes(timedate, Turb_SS)) +
geom_point(color="coral4")+
geom_point(data=subset(dsloi_wl, timedate>startd & timedate<endd), aes(timedate, level), color="blue")+
#geom_point(data=subset(flow_data, mdate>startd & mdate<endd), aes(as.POSIXct(mdate), flow_cfs*1000), color="red")+
geom_point(data=subset(cr300_Wai1, timedate>startd & timedate<endd), aes(timedate, Lvl_m*1000), color="forestgreen", size=1)+ #aquamarine3
geom_point(data=subset(cr300_Wai1, timedate>startd & timedate<endd), aes(timedate, Turb_SS), color="orange")+
#geom_point(data=subset(hihimanu_wl, timedate>startd & timedate<endd), aes(timedate, level), color="azure4", size=0.1)+
#geom_point(data=subset(rain_data, timedate>startd & timedate<endd), aes(timedate, rainmm), color="red",size=5)+
geom_point(data=subset(haptuk_ysi, datetime>startd & datetime<endd), aes(datetime, Turb), color="pink")+
#scale_x_date(breaks=date_breaks("month"), labels = date_format("%b-%y"))+
xlab("Date")+
ylab("Turbidity (NTU) and Water Level (mm)")+
coord_cartesian(ylim=c(0, 1500))+
theme_bw()+
theme(axis.text=element_text(size=14),
axis.title=element_text(size=16,face="bold"),
legend.justification = c(1, 1),
legend.position = c(1, 1),
legend.title=element_text(size=14),
legend.text=element_text(size=12))
Here is a sample of two of the datasets: Note that the times don't line up at all... since I'm mixing sources.
dsloi_wl:
structure(list(ReceptionTime = c(1533895414.1134, 1533895414.1733,
1533895414.19397, 1533895414.20708, 1533895414.22283, 1533895414.23634,
1533895414.25135, 1533895414.26387, 1533895414.27653, 1533895414.29126,
1533896013.68755, 1533896013.7638, 1533896013.79232, 1533896013.80917,
1533896013.82312, 1533896013.83648, 1533896013.84988, 1533896013.8648,
1533896013.87724, 1533896013.8894), d2w = c(776.7, 789.7, 790.2,
777.1, 777.2, 777.7, 778.4, 793.4, 779.6, 794.1, 819.9, 780.7,
794.1, 806.9, 781.9, 781.9, 782.7, 782.8, 783.1, 783.4), timedate = structure(c(1533895414.1134,
1533895414.1733, 1533895414.19397, 1533895414.20708, 1533895414.22283,
1533895414.23634, 1533895414.25135, 1533895414.26387, 1533895414.27653,
1533895414.29126, 1533896013.68755, 1533896013.7638, 1533896013.79232,
1533896013.80917, 1533896013.82312, 1533896013.83648, 1533896013.84988,
1533896013.8648, 1533896013.87724, 1533896013.8894), class = c("POSIXct",
"POSIXt"), tzone = ""), level = c(723.3, 710.3, 709.8, 722.9,
722.8, 722.3, 721.6, 706.6, 720.4, 705.9, 680.1, 719.3, 705.9,
693.1, 718.1, 718.1, 717.3, 717.2, 716.9, 716.6)), .Names = c("ReceptionTime",
"d2w", "timedate", "level"), row.names = c(NA, 20L), class = "data.frame")
CR300_Wai1
structure(list(RECORD = 73027:73046, Temp_C = c(24.62861, 24.62332,
24.61533, 24.60857, 24.60189, 24.59733, 24.59068, 24.58404, 24.57869,
24.57327, 24.56781, 24.5606, 24.55551, 24.55218, 24.54648, 24.5416,
24.5358, 24.5319, 24.52781, 24.52294), Turb_BS = c(94.50522,
88.65939, 109.354, 57.71527, 134.1903, 46.37191, 78.17719, 52.22319,
58.07111, 96.95719, 51.47488, 44.65616, 70.43825, 99.58217, 93.68374,
87.4787, 175.5395, 167.6757, 110.8119, 132.5971), Turb_SS = c(36.63349,
34.31228, 37.02223, 32.97258, 36.68553, 33.82083, 37.43391, 33.43639,
31.17306, 33.6327, 34.69954, 30.99891, 34.69988, 33.64369, 32.54948,
32.1177, 32.86558, 48.97706, 30.65004, 33.71646), Temp_C_2 = c(24.9014,
24.89474, 24.88837, 24.88279, 24.87574, 24.86852, 24.86357, 24.85751,
24.85236, 24.84759, 24.84091, 24.83577, 24.83192, 24.82713, 24.8229,
24.81832, 24.81237, 24.80821, 24.8051, 24.80015), WD_OBS = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), Lvl_m = c(0.6907353, 0.6905226, 0.6896195, 0.6890779,
0.6881586, 0.6878724, 0.6862501, 0.6848835, 0.6844589, 0.6837503,
0.6836612, 0.6831629, 0.6821692, 0.6812283, 0.6799452, 0.6791196,
0.6782504, 0.6772775, 0.6763596, 0.6755115), timedate = structure(c(1533895500,
1533895800, 1533896100, 1533896400, 1533896700, 1533897000, 1533897300,
1533897600, 1533897900, 1533898200, 1533898500, 1533898800, 1533899100,
1533899400, 1533899700, 1533900000, 1533900300, 1533900600, 1533900900,
1533901200), class = c("POSIXct", "POSIXt"), tzone = "")), .Names = c("RECORD",
"Temp_C", "Turb_BS", "Turb_SS", "Temp_C_2", "WD_OBS", "Lvl_m",
"timedate"), row.names = c(NA, 20L), class = "data.frame")

Here is a solution using mock data (next time provide a sample of your data) :
library(tidyverse)
library(lubridate)
#>
#> Attachement du package : 'lubridate'
#> The following object is masked from 'package:base':
#>
#> date
# mock data
time_15m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "15 min")
time_30m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "30 min")
time_60m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "60 min")
data_1 <- data.frame(time = time_15m,
var_1 = cos(hour(time_15m) + minute(time_15m)))
data_2 <- data.frame(time = time_30m,
var_2 = sin(hour(time_30m) + minute(time_30m)))
data_3 <- data.frame(time = time_60m,
var_3 = cos(1 - hour(time_60m) + minute(time_60m)))
# the kind of plot you have (prefer the 2nd version)
ggplot(data_1, aes(x = time, y = var_1)) +
geom_point(color = "red") +
geom_point(data = data_2, aes(time, var_2), color = "green") +
geom_point(data = data_3, aes(time, var_3), color = "blue") +
theme_bw()
# a version with long format data and use of gather function
data_1 %>%
left_join(data_2) %>% # join data from data_2 (timestep = 30m), missing data is NA
left_join(data_3) %>% # join data from data_3 (timestep = 60m), missing data is NA
gather(variable_name, variable_value, var_1, var_2, var_3) %>% # gather var_1, var_2 and var_3 in a single column
ggplot(., aes(x = time, y = variable_value, color = variable_name)) +
theme_bw() +
geom_point(size = 2)
#> Joining, by = "time"
#> Joining, by = "time"
#> Warning: Removed 120 rows containing missing values (geom_point).
Created on 2018-08-22 by the reprex package (v0.2.0).
EDIT 1 (include provided datasets)
library(tidyverse)
dsloi_wl %>%
full_join(cr300_Wai1) %>%
mutate(Lvl_m = 100 * Lvl_m) %>%
gather(variable_name, variable_value, level, Lvl_m, Turb_SS) %>%
ggplot(., aes(x = timedate, y = variable_value, color = variable_name)) +
geom_point() +
scale_color_manual("Legend title",
values = c("level" = "blue",
"Lvl_m" = "forestgreen",
"Turb_SS" = "orange"))
#> Joining, by = "timedate"
#> Warning: Removed 60 rows containing missing values (geom_point).
Created on 2018-08-23 by the reprex package (v0.2.0).

Related

Plot multiple geom_line and geom_smooth objects in one plot

I have somewhat messy looking dataframes, like this one:
df0
# A tibble: 3 x 9
# Groups: Sequ [1]
Sequ Speaker Utterance A_intpl A_dur B_intpl B_dur C_intpl C_dur
<int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 2 ID16.A cool >wha… 31.44786152… 10.5,17,1… 32.86993284… 9.5,16,17… 58.3368399… 14,17,17…
2 2 NA (0.228) 32.75735987… 15.5,17,1… 30.83469006… 14.5,16.9… 26.0386462… 3,17,16,…
3 2 ID16.B u:m Tenne… 32.05752604… 4.5,17,16… 29.95825107… 3.5,16,17… 55.9298614… 8,17,17,…
I want to plot the *_intpl values for each speaker (A, B, or C) for each of the three Utterances in a single chart both as line charts and as trend lines.
I'm just half successful doing this:
library(tidyr)
library(ggplot2)
library(dplyr)
df0 %>%
pivot_longer(cols = contains("_"),
names_to = c("Event_by", ".value"),
names_pattern = "^(.*)_([^_]+$)") %>%
separate_rows(c(intpl, dur), sep = ",", convert = TRUE) %>%
mutate(Time = cumsum(dur)) %>%
mutate(Utterance = paste0(sub(".*(.)$", "\\1",Speaker), ": ", Utterance),
Utterance = factor(Utterance, levels = unique(Utterance))) %>%
ggplot(aes(x = Time, y = log2(intpl),
group = Event_by,
colour = Event_by)) +
geom_line()+
geom_smooth(method = 'lm', color = "red", formula = y~x)+
facet_wrap(~ Utterance, ncol = 1, scales= "free_x")
Half successful because the line plots and trend lines are side-by-side, as if in three columns, whereas they should be in rows, one below the other - how can that be achieved?
Reproducible data:
structure(list(Sequ = c(2L, 2L, 2L), Speaker = c("ID16.A", NA,
"ID16.B"), Utterance = c("cool >what part?<", "(0.228)", "u:m Tennessee="
), A_intpl = c("31.4478615210995,31.5797510648522,31.7143985369445,31.651083739602,31.5806035086034,36.8956763912703,36.2882129597292,35.2124499461012,34.1366869324732,34.1366869324732,32.1927035724058,30.2487202123383,28.3047368522709,26.3607534922035,30.5278334848495,30.5919390424853,30.8898529369568,31.578968913188,31.9011198738002,32.1543265113196,31.9708002079533,31.966536408565,31.8762658607759,31.8994741472105,31.4215913971938,32.1510578328563,31.7863350712876,32.4685052625667,31.7422271490296,32.3286054977263,31.9998974949481,32.5177992323864,32.4727499785435,32.9310888953766,32.7592010033585,33.2231711877427,33.1593949301066,33.2432973964816,33.2569729073414,33.492144800249,33.317650964723,33.4835787832119,33.2377190454279,32.9200836384356,32.9684568771567,32.6400987016883,27.5447101464944,29.3948945479171,35.3449171857603,33.5932932239592,31.8416692621581,30.0900453003569,32.7850431084597,32.7589003618266,32.8365550655013,32.386716057622,32.8420792704881,32.6909995562489,32.6269434402016,32.7370944106334,32.7529759209752,32.6528826975113,32.3663573764448,32.7326853004792,32.6930038462418,32.8975978772676,33.1752899475416,33.2034433355001,33.0667431432803,32.6322933080614,33.2503168843178,32.7573598713719",
"32.7573598713719,32.7531704791313,32.7366130631104,32.918942216354,32.8309939530596,32.3856893430525,32.5368873543441,32.5628510484821,32.5628510484821,32.5628510484821,32.5506564332008,32.7477119716583,32.3458470743288,32.0575260428013",
"32.0575260428013,32.1628824338111,32.0093334061923,32.1461460586991,31.9080762250966,31.9469105074833,31.7431187667232,31.7194255656503,31.7394296413187,31.8594986292975,31.7498243274746,31.9069142374258,32.0835520942767,31.6257067057109,31.757232379438,31.9036689124911,32.1319749301918,31.7203280774998,31.7877137245706,32.3030946636177,32.2800139298454,32.164646135728,32.3636504940227,32.5657818936495,32.3859453482697,32.4797898358193,32.5319835105237,32.92233491509,32.8240561109448,32.664496027779,33.1835064752029,33.0366413969703,33.0406288190821,33.3232964677672,33.2206260057731,33.1537134269402,33.2783471506207,33.2933281566788,33.5322350394609,33.3815736723684,33.7905544185063,33.6143820666896,33.7490659591585,33.7260102344634,34.0721931066557,34.0455026427054,34.3735788774521,34.2888420421073,34.3913721165542,34.5982135545306,34.4417202731001,34.6586347152449,31.1590521215434,31.3276405983897,28.2379253186548,31.133030931336,34.0715906921349,35.8967950760285,35.9334551147377,35.8565504335515,35.7446081905229,35.6300325834155,35.8390086948751,35.9711743270411,36.0029493274176,35.8891056768339"
), A_dur = c("10.5,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,0.5",
"15.5,17,17,16,17,17,16,17,17,16,17,17,16,12.5", "4.5,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,5.5"
), B_intpl = c("32.8699328424689,32.8154348109057,32.5454364786882,32.408257038977,32.5304564519672,32.3270203236281,31.9233218634346,32.0166346064182,31.7360745988363,31.7546527359571,31.8603220354065,31.6520061326962,31.5603191463274,31.3357561466519,31.0976090032219,31.1405090978825,31.1697180784961,31.0863999545386,31.3126984044729,30.580776446803,30.7137016246273,31.0801914571091,31.2343922096768,31.2749857511594,31.3488604642844,30.9327390960718,31.0750482778561,31.1849119826023,31.4180114886183,31.5284273181104,31.147361398529,31.1128597713973,31.5551385744611,31.7479939892741,31.5890352680344,31.5470790538009,31.5427330200078,31.3901913024084,31.5423214446953,31.4814325586741,31.4937336232021,31.3483738841556,31.2516462059018,31.2233881922543,31.2572951780583,31.0087226975291,31.1197589042273,31.053748381687,30.8202174718598,30.845143129195,30.8727194789634,30.4231467151428,30.7254093759809,30.2757746547116,30.6047530953025,29.6835591414008,28.257421076205,29.4634886416064,29.183064807185,28.6935506287734,29.3989017421637,30.8936090542518,30.6884831327852,30.805770713392,30.6938909098627,30.8317757801268,30.8509115577427,30.6836198471168,30.7979978629801,31.0260101704105,30.6248844591805,30.8346900656087",
"30.8346900656087,30.9826158466835,29.814086001996,29.7839590794955,30.7928804535206,31.1589874726521,31.0547403039501,31.2268131145794,31.155503802286,31.3036925274762,31.4782621660348,31.0928322383151,31.589958621025,29.9582510795225",
"29.9582510795225,29.9796434055214,29.9405638729798,30.2602098442174,30.5011865525849,30.6753859842987,28.9331380886365,30.7736467776919,30.8457967803438,30.843630408183,30.8767570425033,30.9178344980247,30.734598946287,30.8877440413271,30.9225051837881,30.9534076039184,31.0172861192043,30.9371712793451,30.9806052132295,31.0593603717961,31.1156928565737,30.4713263393479,26.028518302418,28.1426546887905,29.4308434671559,30.7190322455213,31.2289674937063,31.7389027418913,32.2488379900763,32.7587732382613,33.2687084864463,33.7786437346312,34.2885789828162,34.7985142310012,35.3084494791862,35.8183847273712,36.3283199755562,36.8382552237412,37.3481904719262,37.8581257201112,38.3680609682962,25.5986933949893,29.7968031963901,30.5336819967028,30.1876589408847,30.4260367500101,30.2997107671214,30.3429716412578,30.3537316791924,30.4111899964144,30.7293520851914,30.7778983966343,30.9712137067708,30.9072589183658,31.0696990205164,30.5713926084448,31.3458855877875,31.4169903025083,31.5148974986093,31.5972499257413,31.2293401943969,31.2033325602348,31.1657434266985,30.6784877073261,30.6991365599664,30.6763195188897"
), B_dur = c("9.5,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,2.5",
"14.5,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,13.4999999999854",
"3.5,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,7.5"
), C_intpl = c("58.3368399069697,58.249224089011,59.5198368051218,58.8722012497097,58.4418996252205,58.5849059154389,59.2752163985494,52.8407480422202,51.6276603912397,48.0255346632529,44.753541512539,41.4815483618252,38.2095552111114,34.9375620603975,31.6655689096837,28.3935757589698,25.121582608256,19.4712933827274,22.0108873782783,24.5504813738291,24.8441573376901,24.6902151101703,24.4029572181118,24.9753161974674,24.8664406826514,24.8486668451201,25.1137001504163,25.1142578332509,25.4902077628339,25.4075561268027,25.6622548410237,61.2421678149908,25.1600975771354,25.6667198263373,25.442560744158,25.8736383423437,25.5859074180431,24.7860400673889,24.4337707697216,24.3214953242744,23.915753514736,23.7363185577661,23.7186569801299,23.4313514771952,23.5730151254578,62.5124513171595,23.3260531660862,23.4498217326665,23.2145314844252,57.5586745434594,63.4646233226955,23.0706406704345,23.3318690599491,62.044649715831,62.2720656330432,22.2532276715887,62.7059140614625,22.9511208849958,22.5603175709988,23.3456453893988,63.2523901625561,60.6655429980934,60.2358824325868,59.957910796633,57.3999702562457,54.8277282980263,43.0269305132552,31.2261327284841,19.425334943713,22.7319906068577,26.0386462700023",
"26.0386462700023,29.345301933147,32.6519575962917,35.9586132594364,48.3773995023798,60.7961857453232,49.4980424442242,55.9907960862667,57.2956837917999,58.1409925994177,59.025022056064,60.0098263540792,60.4028460580062,61.2629030450653,55.9298614021542",
"55.9298614021542,55.3877180252389,61.3547152702855,61.7847919095391,56.2457623439544,62.5477315546977,62.3078007189967,62.4272469013149,57.6479672147315,62.9844338801191,58.0081708266629,63.3872796098875,59.0138830718112,58.0612924481098,58.38680047729,58.687179350318,63.8724230039733,63.4126777597892,63.6865154626743,63.5670658627636,63.4496590540706,63.7595297692908,58.9069708176601,63.4547681163061,64.3198376700797,63.415319961042,64.0985879957056,64.1201809531605,63.677902665454,64.1934303628317,64.4682003346273,64.2868853545462,24.8444135816353,64.1579626357752,63.8897139146875,58.5472675827292,64.5784992977498,64.0848591719068,63.8841268679761,64.2901359712354,64.395692486112,64.5425896391638,64.8060565909917,64.3618830026368,64.7088481705444,64.5005944199885,64.5540289192148,64.7408010459365,63.378880767685,63.3415589069662,63.5362700331647,63.5924807719723,63.575801461932,63.6799360982113,64.0041021410894,64.3144923757986,63.8692943755376,63.8594574363473,64.2731841085802,63.3314657812309,64.2758880216293,64.1011768977101,64.0261661917799,64.2865302330478,63.724697791255,64.1202175712152"
), C_dur = c("14,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,14",
"3,17,16,17,17,16,17,17,16,17,17,16,17,17,8", "8,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,2"
)), row.names = c(NA, -3L), groups = structure(list(Sequ = 2L,
.rows = structure(list(1:3), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
There's a possible solution with use of grid.arrange() func from library(gridExtra) library(grid) packages.
I've wrapped your data into unique charts and combined them together into arranged chart.
df1 = df0 %>%
pivot_longer(cols = contains("_"),
names_to = c("Event_by", ".value"),
names_pattern = "^(.*)_([^_]+$)") %>%
separate_rows(c(intpl, dur), sep = ",", convert = TRUE) %>%
mutate(Time = cumsum(dur)) %>%
mutate(Utterance = paste0(sub(".*(.)$", "\\1",Speaker), ": ", Utterance),
Utterance = factor(Utterance, levels = unique(Utterance)))
Set chart objects into enviroment:
for (i in unique(df1$Event_by)){
for (j in levels(df1$Utterance)){
assign(x = paste0(i,j), value = ggplot(data = df1[df1$Event_by == i & df1$Utterance == j,], aes(x = Time, y = log2(intpl))) +
geom_line()+
geom_smooth(method = 'lm', color = "red", formula = y~x))
}
}
Create grided chart:
library(gridExtra) library(grid)
grid.arrange(
`AA: cool >what part?<`,
`AB: u:m Tennessee=` ,
`ANA: (0.228)` ,
`BA: cool >what part?<` ,
`BB: u:m Tennessee=` ,
`BNA: (0.228)` ,
`CA: cool >what part?<` ,
`CB: u:m Tennessee=` ,
`CNA: (0.228)` ,
nrow = 3)
Although i think there should be better solution for that.
You can also try to explore below articlesfor arranging plots:
http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/81-ggplot2-easy-way-to-mix-multiple-graphs-on-the-same-page/
https://ggplot2-book.org/facet.html
Moreover, there's is no themming added to my solution

How to reorder a graph with multiple variable based on one value?

I am trying to reorder the following graph based on the rank of the lowest confidence interval (conf.low). This means that Austria (AU) should be the first country, Bulgaria (BG) the second and Belgium (BE) the third. I know there is a way to do it manually by choosing the order of the country variable but i prefer to find a way to do it automatically since i have 30 countries. Could someone help?
Here is the data and the code:
df= structure(list(cntry = structure(1:3, .Label = c("AU", "BE",
"BG"), class = "factor"), estimate = c(0.0053, 0.01740,
0.0036), conf.low = c(-0.0257, 0.0005,
-0.0006), conf.high = c(0.0365, 0.0343,
0.0079)), row.names = c(NA, -3L), class = "data.frame")
df %>%
arrange(estimate) %>%
mutate(label = replace(round(estimate, 3),cntry==1, '')) %>%
ggplot(aes(estimate, cntry,label=label)) +
geom_point()+
geom_text(vjust= -1) +
geom_linerange(mapping=aes(xmin=conf.low , xmax=conf.high, y=cntry)) +
geom_point(mapping=aes(x=estimate, y=cntry))
Using forcats::fct_reorder() you could do this:
library(dplyr)
library(ggplot2)
library(forcats)
df %>%
arrange(estimate) %>%
mutate(label = replace(round(estimate, 3), cntry==1, '')) %>%
ggplot(aes(estimate, fct_reorder(cntry, conf.low, .desc = TRUE),label=label)) +
geom_point()+
geom_text(vjust= -1) +
geom_linerange(mapping=aes(xmin=conf.low , xmax=conf.high, y=cntry)) +
geom_point(mapping=aes(x=estimate, y=cntry))+
ylab("Country")
Created on 2021-04-22 by the reprex package (v2.0.0)
data
df= structure(list(cntry = structure(1:3, .Label = c("AU", "BE",
"BG"), class = "factor"), estimate = c(0.0053, 0.01740,
0.0036), conf.low = c(-0.0257, 0.0005,
-0.0006), conf.high = c(0.0365, 0.0343,
0.0079)), row.names = c(NA, -3L), class = "data.frame")

Adjust area from geom_area to a line from geom_line

I'm trying to make a hourly dispatch curve with generation and energy consumpsion data, which have the characteristic that when we do a power balance (generation minus consumpsion) we get values nearly to zero.
Into the generation data there are also net interchange values, that be negative when de power system are exporting energy and positive when the system are importing energy to complete the consumption.
Thus, to the plot created with geom_area and geom_line be ok, the black line (consumption) needs be adjusted with the generation area, so that there's no gap between the area and the black line. But, in my attempts I couldn't do it. How you can see, same the energy balence resulting in zero, there are a gap beetwen 19 and 20 hours. I don't know what is wrong. Someone have idea how to do that?
Thanks in advance.
Data to the plot:
generation <-
data.frame('dayHour' = c('18/11/2018 18:00','18/11/2018 19:00','18/11/2018 20:00','18/11/2018 21:00','18/11/2018 18:00','18/11/2018 19:00','18/11/2018 20:00','18/11/2018 21:00','18/11/2018 18:00','18/11/2018 19:00','18/11/2018 20:00','18/11/2018 21:00','18/11/2018 18:00','18/11/2018 19:00','18/11/2018 20:00','18/11/2018 21:00'),
'power' = c(-1364.290, -433.110, 1132.39, 749.48, 463.75, 467.8, 469.35, 436.51, 2025.5, 2133.07, 2306.85, 2304.91, 211.52, 213.16, 214.33, 214.59),
'label' = c('net interchange', 'net interchange', 'net interchange', 'net interchange', 'gas', 'gas', 'gas', 'gas', 'hydro', 'hydro', 'hydro', 'hydro', 'biomass', 'biomass', 'biomass', 'biomass'))
generation$label <- factor(generation$label, levels = c('net interchange', 'gas', 'hydro', 'biomass'))
net.load <-
data.frame('dayHour' = c('18/11/2018 18:00', '18/11/2018 19:00', '18/11/2018 20:00', '18/11/2018 21:00'), 'power' = c(1336.48, 2380.91, 4122.91, 3705.49), 'label' = c('net load', 'net load', 'net load', 'net load'))
generation$dayHour <-
as.POSIXct(strptime(generation$dayHour,format='%d/%m/%Y %H:%M'))
net.load$dayHour <-
as.POSIXct(strptime(net.load$dayHour,format='%d/%m/%Y %H:%M'))
Power balance
pb <-
filter(generation, label == "biomass")$power +
filter(generation, label == "hydro")$power +
filter(generation, label == "gas")$power +
filter(generation, label == "net interchange")$power -
net.load$power
summary(pb)
Dispatch curve
ggplot() +
geom_area(data = generation,
aes(y = power,
x = dayHour,
fill = label)) +
geom_line(data = net.load,
aes(y = power,
x = dayHour,
colour = label),
size = 1.2,
colour = "black") +
labs(fill = "generation",
colour = 'net load')
It looks like position_stack is getting confused when the interpolation crosses the x-axis.
To fix it, you can interpolate manually before plotting (e.g. with approx):
library(tidyverse)
generation <- data.frame(
dayHour = structure(c(1542585600, 1542589200, 1542592800, 1542596400, 1542585600, 1542589200, 1542592800, 1542596400, 1542585600, 1542589200, 1542592800, 1542596400, 1542585600, 1542589200, 1542592800, 1542596400), class = c("POSIXct", "POSIXt"), tzone = ""),
power = c(-1364.29, -433.11, 1132.39, 749.48, 463.75, 467.8, 469.35, 436.51, 2025.5, 2133.07, 2306.85, 2304.91, 211.52, 213.16, 214.33, 214.59),
label = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("net interchange", "gas", "hydro", "biomass"), class = "factor")
)
generation_interpolated <- generation %>%
group_by(label) %>%
summarise(data = list(as_tibble(approx(dayHour, power, n = 501)))) %>%
unnest() %>%
mutate(x = as.POSIXct(x, origin = '1970-01-01', tz = 'UTC'))
net_power_interpolated <- generation_interpolated %>%
group_by(x) %>%
summarise(y = sum(y))
ggplot(generation_interpolated, aes(x, y)) +
geom_area(aes(fill = label)) +
geom_line(data = net_power_interpolated)
To see how approx works, a simpler, ungrouped example:
df <- data.frame(x = c(0, 5, 10), y = c(0, 20, 10))
interpolated <- approx(df$x, df$y, n = 11)
str(interpolated)
#> List of 2
#> $ x: int [1:11] 0 1 2 3 4 5 6 7 8 9 ...
#> $ y: num [1:11] 0 4 8 12 16 20 18 16 14 12 ...
ggplot(as.data.frame(interpolated), aes(x, y)) +
geom_line() +
geom_point() +
geom_point(data = df, color = 'dodgerblue', size = 4)

Faceting on y axis is plotting a single point in R shiny using ggplotly for visualisation

I have a dataframe, I am attaching here the output of dput of that dataframe and code snippet for plotting that dataframe using ggplotly. I am using R shiny for building the application. If I am not using faceting then I get the required output. But when I use faceting then on the y axis of the graph the groupings are done but only one point is plotted on the bottom of the first row.
dput(head(sub_data))
Output of dput function.
structure(list(RT_ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L),
.Label=c("RT_007606","RT_007950", "RT_007991", "RT_008100", "RT_008423",
"RT_009020", "RT_009122", "RT_009134", "RT_009715",
"RT_009838", "RT_009841", "RT_009976", "RT_010015",
"RT_010118", "RT_010154", "RT_010363", "RT_010448",
"RT_010927", "RT_011560", "RT_011722", "RT_000189",
"RT_000260", "RT_000278", "RT_000887", "RT_000891",
"RT_001080", "RT_001874", "RT_001925", "RT_001987",
"RT_002048", "RT_002249", "RT_002465", "RT_002806",
"RT_003107", "RT_003175", "RT_003251", "RT_003590",
"RT_003614", "RT_003918", "RT_004664", "RT_004719",
"RT_004807", "RT_005181", "RT_005298", "RT_005896",
"RT_005951", "RT_006598", "RT_006780", "RT_006851",
"RT_007061", "RT_007298", "RT_007588", "RT_008718",
"RT_008756", "RT_008992", "RT_009371", "RT_009449",
"RT_010044", "RT_010206", "RT_010550", "RT_010767",
"RT_010858", "RT_010892", "RT_011868", "RT_011941",
"RT_012216", "RT_012221", "RT_012355", "RT_012383",
"RT_012477", "RT_012742", "RT_012810", "RT_012870",
"RT_013172", "RT_013390", "RT_013442", "RT_013471",
"RT_013510", "RT_013764", "RT_000030", "RT_000139",
"RT_000142", "RT_000319", "RT_000330", "RT_000527",
"RT_000895", "RT_000987", "RT_001185", "RT_001188",
"RT_001397", "RT_001430", "RT_001663", "RT_001809",
"RT_002778", "RT_002788", "RT_003020", "RT_003678",
"RT_003838", "RT_004276", "RT_004539", "RT_004706",
"RT_004903", "RT_005367", "RT_005386", "RT_005796",
"RT_005936", "RT_006059", "RT_06162", "RT_006265",
"RT_006438", "RT_006547", "RT_006550", "RT_006653",
"RT_006758", "RT_006995", "RT_006996", "RT_007170",
"RT_007179", "RT_007291", "RT_007341", "RT_007576",
"RT_007639", "RT_007852", "RT_007939", "RT_008424",
"RT_008428", "RT_008498", "RT_008567", "RT_008710",
"RT_009103", "RT_009105", "RT_009336", "RT_009811",
"RT_009901", "RT_010525", "RT_010569", "RT_010635",
"RT_010900", "RT_010902", "RT_011118", "RT_011163",
"RT_011310", "RT_011351", "RT_011561", "RT_011630",
"RT_011854", "RT_011899", "RT_012077", "RT_012436",
"RT_012439", "RT_012523", "RT_012546", "RT_012814",
"RT_013262", "RT_013314", "RT_013854", "RT_000049",
"RT_000081", "RT_000267", "RT_000403", "RT_000477",
"RT_000624", "RT_000709", "RT_000739", "RT_001151",
"RT_001396", "RT_001507", "RT_002198", "RT_002500",
"RT_002524", "RT_002850", "RT_002855", "RT_003239",
"RT_003273", "RT_003344", "RT_003408", "RT_003826",
"RT_003882", "RT_004056", "RT_004082", "RT_004248",
"RT_004293", "RT_004610", "RT_004625", "RT_004780",
"RT_004810", "RT_005169", "RT_005526", "RT_005655",
"RT_006102", "RT_006334", "RT_006424", "RT_006506",
"RT_006507", "RT_006720", "RT_006889", "RT_006969",
"RT_007018", "RT_007069", "RT_007287", "RT_007478",
"RT_007897", "RT_007945", "RT_007999", "RT_008070",
"RT_008671", "RT_008679", "RT_008968", "RT_009252",
"RT_009615", "RT_009809", "RT_010119", "RT_010508",
"RT_010624", "RT_010835", "RT_010910", "RT_010927",
"RT_010933", "RT_011260", "RT_011671", "RT_011854",
"RT_012273", "RT_012641", "RT_012690", "RT_012706",
"RT_012839", "RT_013125"), class = "factor"), Date = structure(c(15939,
15940, 15943, 15946, 15948, 15951), class = "Date"), Event_type = structure(c(5L,
5L, 5L, 5L, 5L, 5L), .Label = c("Admission", "Blast-High", "Blast-Low",
"Discharge", "Examination", "RBC-High", "RBC-Low", "RBC-Normal",
"Ultrasound", "WBC-High", "WBC-Low", "WBC-Normal", "X-ray"), class = "factor"),
Value = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), x2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("PID001",
"PID002", "PID003", "PID004", "PID005", "PID006", "PID007",
"PID008", "PID009", "PID010", "PID011", "PID012", "PID013",
"PID014", "PID015", "PID016", "PID017", "PID018", "PID019",
"PID020", "PID021", "PID022", "PID023", "PID024", "PID025",
"PID026", "PID027", "PID028", "PID029", "PID030", "PID031",
"PID032", "PID033", "PID034", "PID035", "PID036", "PID037",
"PID038", "PID039", "PID040", "PID041", "PID042", "PID043",
"PID044", "PID045", "PID046", "PID047", "PID048", "PID049",
"PID050", "PID051", "PID052", "PID053", "PID054", "PID055",
"PID056", "PID057", "PID058", "PID059", "PID060", "PID061",
"PID062", "PID063", "PID064", "PID065", "PID066", "PID067",
"PID068", "PID069", "PID070", "PID071", "PID072", "PID073",
"PID074", "PID075", "PID076", "PID077", "PID078", "PID079",
"PID080", "PID081", "PID082", "PID083", "PID084", "PID085",
"PID086", "PID087", "PID088", "PID089", "PID090", "PID091",
"PID092"), class = "factor"), separate = c("EXM", "EXM",
"EXM", "EXM", "EXM", "EXM"), days = c(15939, 15940, 15943,
15946, 15948, 15951), Patient_ID = structure(c(264L, 264L,
264L, 264L, 264L, 264L), .Label = c("PID092_ADM", "PID091_EXM",
"PID091_DIS", "PID091_ADM", "PID090_EXM", "PID090_DIS", "PID090_ADM",
"PID089_EXM", "PID088_EXM", "PID088_DIS", "PID088_ADM", "PID087_EXM",
"PID087_DIS", "PID087_ADM", "PID086_EXM", "PID085_EXM", "PID085_DIS",
"PID085_ADM", "PID084_EXM", "PID084_DIS", "PID084_ADM", "PID083_EXM",
"PID083_DIS", "PID083_ADM", "PID082_EXM", "PID082_DIS", "PID082_ADM",
"PID081_EXM", "PID081_DIS", "PID081_ADM", "PID080_EXM", "PID080_DIS",
"PID080_ADM", "PID079_EXM", "PID079_DIS", "PID079_ADM", "PID078_EXM",
"PID078_DIS", "PID078_ADM", "PID077_EXM", "PID077_DIS", "PID077_ADM",
"PID076_EXM", "PID076_DIS", "PID076_ADM", "PID075_EXM", "PID075_DIS",
"PID075_ADM", "PID074_EXM", "PID074_DIS", "PID074_ADM", "PID073_EXM",
"PID073_DIS", "PID073_ADM", "PID072_EXM", "PID072_DIS", "PID072_ADM",
"PID071_EXM", "PID071_DIS", "PID070_EXM", "PID070_DIS", "PID070_ADM",
"PID069_EXM", "PID069_DIS", "PID069_ADM", "PID068_EXM", "PID068_DIS",
"PID068_ADM", "PID067_EXM", "PID067_DIS", "PID067_ADM", "PID066_EXM",
"PID066_DIS", "PID066_ADM", "PID065_EXM", "PID065_DIS", "PID065_ADM",
"PID064_EXM", "PID064_DIS", "PID064_ADM", "PID063_EXM", "PID063_DIS",
"PID063_ADM", "PID062_EXM", "PID062_DIS", "PID062_ADM", "PID061_EXM",
"PID061_DIS", "PID061_ADM", "PID060_EXM", "PID060_DIS", "PID060_ADM",
"PID059_EXM", "PID059_DIS", "PID059_ADM", "PID058_EXM", "PID058_DIS",
"PID058_ADM", "PID057_EXM", "PID057_DIS", "PID057_ADM", "PID056_EXM",
"PID056_DIS", "PID056_ADM", "PID055_EXM", "PID055_DIS", "PID055_ADM",
"PID054_EXM", "PID054_DIS", "PID054_ADM", "PID053_EXM", "PID053_DIS",
"PID053_ADM", "PID052_EXM", "PID052_DIS", "PID052_ADM", "PID051_EXM",
"PID051_DIS", "PID051_ADM", "PID050_EXM", "PID050_DIS", "PID050_ADM",
"PID049_EXM", "PID049_DIS", "PID049_ADM", "PID048_EXM", "PID048_DIS",
"PID048_ADM", "PID047_EXM", "PID047_DIS", "PID047_ADM", "PID046_EXM",
"PID046_DIS", "PID046_ADM", "PID045_EXM", "PID045_DIS", "PID045_ADM",
"PID044_EXM", "PID044_DIS", "PID044_ADM", "PID043_EXM", "PID043_DIS",
"PID043_ADM", "PID042_EXM", "PID042_DIS", "PID042_ADM", "PID041_EXM",
"PID041_DIS", "PID041_ADM", "PID040_EXM", "PID040_DIS", "PID040_ADM",
"PID039_EXM", "PID039_DIS", "PID039_ADM", "PID038_EXM", "PID038_DIS",
"PID038_ADM", "PID037_EXM", "PID037_DIS", "PID037_ADM", "PID036_EXM",
"PID036_DIS", "PID036_ADM", "PID035_EXM", "PID035_DIS", "PID035_ADM",
"PID034_EXM", "PID034_DIS", "PID034_ADM", "PID033_EXM", "PID033_DIS",
"PID033_ADM", "PID032_EXM", "PID032_DIS", "PID032_ADM", "PID031_EXM",
"PID031_DIS", "PID031_ADM", "PID030_EXM", "PID030_DIS", "PID030_ADM",
"PID029_EXM", "PID029_DIS", "PID029_ADM", "PID028_EXM", "PID028_DIS",
"PID028_ADM", "PID027_EXM", "PID027_DIS", "PID027_ADM", "PID026_DIS",
"PID025_EXM", "PID025_DIS", "PID025_ADM", "PID024_EXM", "PID024_DIS",
"PID024_ADM", "PID023_EXM", "PID023_DIS", "PID023_ADM", "PID022_EXM",
"PID022_DIS", "PID022_ADM", "PID021_EXM", "PID021_DIS", "PID021_ADM",
"PID020_EXM", "PID020_DIS", "PID020_ADM", "PID019_EXM", "PID019_DIS",
"PID019_ADM", "PID018_EXM", "PID018_DIS", "PID018_ADM", "PID017_EXM",
"PID017_DIS", "PID017_ADM", "PID016_EXM", "PID016_DIS", "PID016_ADM",
"PID015_EXM", "PID015_DIS", "PID015_ADM", "PID014_EXM", "PID014_DIS",
"PID014_ADM", "PID013_EXM", "PID013_DIS", "PID013_ADM", "PID012_EXM",
"PID012_DIS", "PID012_ADM", "PID011_EXM", "PID011_DIS", "PID011_ADM",
"PID010_EXM", "PID010_DIS", "PID010_ADM", "PID009_EXM", "PID009_DIS",
"PID008_EXM", "PID008_DIS", "PID008_ADM", "PID007_EXM", "PID007_DIS",
"PID007_ADM", "PID006_EXM", "PID006_DIS", "PID006_ADM", "PID005_EXM",
"PID005_DIS", "PID005_ADM", "PID004_EXM", "PID004_DIS", "PID004_ADM",
"PID003_EXM", "PID003_DIS", "PID003_ADM", "PID002_EXM", "PID002_DIS",
"PID002_ADM", "PID001_EXM", "PID001_DIS", "PID001_ADM"), class="factor")),.Names = c("RT_ID",
"Date", "Event_type", "Value", "x2", "separate", "days", "Patient_ID"
), row.names = c(NA, 6L), class = "data.frame")
code snippet for plotting the data using ggplot and ggplotly.
g <- ggplot(sub_data, aes(x=Date , y=Patient_ID, color= Event_type, text=paste("Event Date:", sub_data$Date, "<br>", "Value:", sub_data$Value, "<br>","MR_ID: ", sub_data$MR_ID))) +
labs(x="Date") + labs(y= "Patient ID") +
geom_point(size=1, shape=22, stroke=2) +
labs(color = "") + theme( axis.ticks.x = element_blank(), axis.text.y=element_blank(), axis.ticks.y = element_blank(), plot.margin = unit(c(0, 1, 1, 1), "cm")) +
scale_colour_discrete(drop=TRUE,limits = levels(sub_data$Event_type)) +
scale_x_date(expand = c(0, 5)) + scale_y_discrete(expand = c(0, 1))
g <- g + facet_grid(factor(x2) ~ .)
ggplotly(g, width = as.numeric(input$width), height = as.numeric(input$height))%>% layout(legend = list(orientation = 'h', x=0, y=as.numeric(input$height), margin=0, pad=0, xanchor="left"), yaxis = list(title = "Patient ID"))

Plot two sets of coordinates on geographical map

I created two sets of vectors to plot two sets of data on a map.
Everytime I run, R Studio crashes.
What am I missing?
library(ggmap)
setwd("d:/GIS/31R")
sep <- read.csv("California_SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.11", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
library(sp)
lst <- split(sep, sep[,8] >= 50)
under50 <- lst[[1]]
over50 <- lst[[2]]
coords <- cbind(Longitude = as.numeric(as.character(under50$Longitude)),Latitude=as.numeric(as.character(under50$Latitude)))
coords2 <- cbind(Longitude2 = as.numeric(as.character(over50$Longitude)),Latitude2=as.numeric(as.character(over50$Latitude)))
map <- qmap('Yorba Linda', zoom = 11, maptype = 'hybrid')
map + geom_point(data=under50, aes(x = Longitude, y = Latitude), color="red", size = 5, alpha = 0.5) + geom_point(data=over50, aes(x = Longitude2, y = Latitude2), color="green", size = 5, alpha = 0.5)
Original Code
My original code plotted all points
library(ggmap)
setwd("d:/GIS/31R")
sep <- read.csv("California_SEP_assets_csv.csv")
library(sp)
coords <- cbind(Longitude = as.numeric(as.character(sep$Longitude)),Latitude=as.numeric(as.character(sep$Latitude)))
sep.pts <- SpatialPointsDataFrame(coords,sep[,-(2:3)],proj4string = CRS("+init=epsg:4326"))
plot(sep.pts, pch=".",col="darkred")
map <- qmap('Yorba Linda', zoom = 11, maptype = 'hybrid')
map + geom_point(data=sep, aes(x = Longitude, y = Latitude), color="red", size = 5, alpha = 0.5)
Gave this
I am able to plot points standalone, i.e.
library(ggmap)
setwd("d:/GIS/31R")
sep <- read.csv("California_SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.11", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
library(sp)
lst <- split(sep, sep[,8] >= 50)
under50 <- lst[[1]]
over50 <- lst[[2]]
coords <- cbind(Longitude = as.numeric(as.character(under50$Longitude)),Latitude=as.numeric(as.character(under50$Latitude)))
under50.pts <- SpatialPointsDataFrame(coords, under50[, -(2:3)], proj4string = CRS("+init=epsg:4326"))
coords2 <- cbind(Longitude2 = as.numeric(as.character(over50$Longitude)),Latitude2=as.numeric(as.character(over50$Latitude)))
over50.pts <- SpatialPointsDataFrame(coords2, over50[, -(2:3)], proj4string = CRS("+init=epsg:4326"))
plot(over50.pts, pch = 22, col = "darkgreen")
and I replace the last line, plot(...
with
plot(under50.pts, pch = 22, col = "darkred")
If think you are making things more complicated than needs to be. If you want to color the points to a certain grouping variable, just create such a variable. Based on the data you posted in this question, you can do this as follows:
library(ggmap)
library(ggplot2)
# create a new grouping variable
sep$newvar <- ifelse(sep[,8] >= 50, "Over 50", "Under 50")
# get the map
map <- get_map('Yorba Linda', zoom = 11, maptype = 'hybrid')
# plot the map and use the grouping variable for the fill inside the aes
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=newvar), size=7, alpha=0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))
this gives:
Used data:
sep <- structure(list(Site = structure(1:6, .Label = c("31R001", "31R002", "31R003", "31R004", "31R005", "31R006"), class = "factor"),
Latitude = c(33.808874, 33.877256, 33.820825, 33.852373, 33.829697, 33.810274),
Longitude = c(-117.844048, -117.700135, -117.811845, -117.795516, -117.787532, -117.830429),
Windows.SEP.11 = c(63L, 174L, 11L, 85L, 163L, 71L),
Mac.SEP.11 = c(0L, 1L, 4L, 0L, 0L, 50L),
Windows.SEP.12 = c(124L, 185L, 9L, 75L, 23L, 5L),
Mac.SEP.12 = c(0L, 1L, 32L, 1L, 0L, 50L),
newCol = c(33.6898395721925, 48.4764542936288, 26.7857142857143, 52.7950310559006, 87.6344086021505, 68.75),
newvar = c("Under 50", "Under 50", "Under 50", "Over 50", "Over 50", "Over 50")),
.Names = c("Site", "Latitude", "Longitude", "Windows.SEP.11", "Mac.SEP.11", "Windows.SEP.12", "Mac.SEP.12","newCol", "newvar"),
row.names = c(NA, 6L), class = "data.frame")
I fixed the code. However, if you can post more elegant code and explain it, I will mark as solution.
library(ggmap)
setwd("d:/GIS/31R")
sep <- read.csv("California_SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.11", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
library(sp)
lst <- split(sep, sep[,8] >= 50)
under50 <- lst[[1]]
over50 <- lst[[2]]
coords <- cbind(Longitude = as.numeric(as.character(under50$Longitude)),Latitude=as.numeric(as.character(under50$Latitude)))
under50.pts <- SpatialPointsDataFrame(coords, under50[, -(2:3)], proj4string = CRS("+init=epsg:4326"))
coords2 <- cbind(Longitude = as.numeric(as.character(over50$Longitude)),Latitude=as.numeric(as.character(over50$Latitude)))
over50.pts <- SpatialPointsDataFrame(coords2, over50[, -(2:3)], proj4string = CRS("+init=epsg:4326"))
map <- qmap('Yorba Linda', zoom = 11, maptype = 'hybrid')
map + geom_point(data=over50, aes(x = Longitude, y = Latitude), color="green", size = 5, alpha = 0.5) + geom_point(data=under50, aes(x = Longitude, y = Latitude), color="red", size = 5, alpha = 0.5)
Format of the .csv file
Site Latitude Longitude Windows.SEP.11 Mac.SEP.11 Windows.SEP.12 Mac.SEP.12 newCol
1 31R001 33.80887 -117.8440 63 0 124 0 33.68984
2 31R002 33.87726 -117.7001 174 1 185 1 48.47645
3 31R003 33.82082 -117.8118 11 4 9 32 26.78571
4 31R004 33.85237 -117.7955 85 0 75 1 52.79503
5 31R005 33.82970 -117.7875 163 0 23 0 87.63441
6 31R006 33.81027 -117.8304 71 50 5 50 68.75000

Resources