I have two datasets:
one is actual count and other one is predicted counts. I want to do a pearson correlation between them.
My actual count data look like this:
My predicted counts data look like this:
I want to do pearson correlation for these two datasets for each geneID.
I have written this code:
install.packages("Rcpp")
library(Rcpp)
library("reshape2")
library("ggplot2")
# import in the actual expression values and the gene predicted values
act_cts <- read.delim("GVDS_normalized_counts_2021v1.txt", header = TRUE, sep="\t")
## fix the column names
colnames(act_cts)[1]<-"gene"
colnames(act_cts)<- substr(colnames(act_cts), 1, 7)
pred_cts<-read.delim("GVDS_PrediXcan_Test_2021v1.txt", header=TRUE, sep="\t")
colnames(pred_cts)<-substr(colnames(pred_cts), 1, 15)
## melt the predict counts, so the columns change to row entries FID, IID, gene
melt_pred_cts<-melt(pred_cts, id.vars=c("FID","IID"), variable.name="gene", value.name = "gene_exp")
## melts the actual counts, so it can be easily joined to the final prediction
melt_act_cts<-melt(act_cts, id.vars="gene", variable.name="IID", value.name = "act_gene_exp")
final_cts<-merge(melt_pred_cts,melt_act_cts)
## this takes a minute/ several minutes to run because it is joining on both gene and IID
# runs the Pearson correlation for each gene
all_genes<-unique(final_cts$gene)
pear_cor_all_df<- data.frame(gene=character(), pear_coeff=double())
## runs the correlation
for(g in all_genes)
{
wrk_cts_all<-final_cts[which(final_cts$gene==g),]
# temp working df for each gene
pear_coef_all<-cor(wrk_cts_all$gene_exp, wrk_cts_all$act_gene_exp, method="pearson")
# runs the correlation for each gene between gene_exp and act_gene_exp
new_row_all<-c(g, pear_coef_all)
pear_cor_all_df<-rbind(pear_cor_all_df, new_row_all)
#saves this to the df
}
But its not giving me the correct results.
This is data for act_count:
dput(act_counts[1:10, 1:10])
structure(list(gene = c("ENSG00000152931.6", "ENSG00000183696.9",
"ENSG00000139269.2", "ENSG00000169129.8", "ENSG00000134602.11",
"ENSG00000136237.12", "ENSG00000259425.1", "ENSG00000242284.2",
"ENSG00000235027.1", "ENSG00000228169.3"), Gene_Sy = c("ENSG00000152931.6",
"ENSG00000183696.9", "ENSG00000139269.2", "ENSG00000169129.8",
"ENSG00000134602.11", "ENSG00000136237.12", "ENSG00000259425.1",
"ENSG00000242284.2", "ENSG00000235027.1", "ENSG00000228169.3"
), Chr = c("5", "7", "12", "10", "X", "7", "15", "X", "11", "10"
), Coord = c(59783540, 48128225, 57846106, 116164515, 131157293,
22396763, 23096869, 134953994, 1781578, 116450393), HG00096 = c(0.101857770468582,
8.1838049456063, 1.19991028786682, 0.831939826228749, 27.6464223725999,
3.78850273139249, 0.0540590649819536, 0.351716382898523, 0.200791414339667,
96.1821778045089), HG00097 = c(0.0781095249582053, 5.68691050653862,
1.57357169691446, 0.0697777450667378, 24.3955715036476, 2.05096276937706,
0.112185357489692, 0.444540251941709, 0.190137938062251, 101.17926156721
), HG00099 = c(0.0489806714207954, 2.43465332606958, 0.521615781673147,
0.93108575037257, 16.4453735152148, 4.00031300285966, 0.00359181983091798,
0.227707651999832, 0.0929246302159905, 58.7830634918037), HG00100 = c(0.118597118618172,
3.83089421985197, 1.44722544015787, 0.620940765480242, 24.8066495438254,
3.27161920134705, 0.00049968321150251, 0.714112406249513, 0.108789749488722,
105.483527339859), HG00101 = c(0.00403496367614745, 6.61228835251498,
3.56579072437701, 1.66066836204679, 25.1133488775017, 1.79821591847768,
0.0293976115522442, 0.450911709524112, 0.23244822901371, 105.818192023699
), HG00102 = c(0.0109253485646219, 4.70964559086586, 1.98268073472144,
0.570481056180073, 19.2339882617972, 1.51668840574531, 0.0312661751488703,
0.491437808951175, 0.250905117203001, 136.140843495464)), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
This is prd_counts:
dput(prd_counts[1:10, 1:10])
structure(list(FID = c("HG00096", "HG00097", "HG00099", "HG00100",
"HG00101", "HG00102", "HG00103", "HG00105", "HG00106", "HG00107"
), IID = c("HG00096", "HG00097", "HG00099", "HG00100", "HG00101",
"HG00102", "HG00103", "HG00105", "HG00106", "HG00107"), ENSG00000182902.8 = c(0.0223611610092831,
0.0385031316687293, -0.0682504384265577, 0.00018098416274239,
-0.045492721345375, -0.10473163051734, -0.0215970711860838, 0.060455638944161,
-0.00889260689717109, -0.102096211855105), ENSG00000183307.3 = c(0.129041336028238,
-0.13226906002202, 0.005409246530295, -0.0539556427088601, -0.00699884042001628,
-0.204743560777908, -0.0534359750800079, -0.235648260835705,
-0.10230402771496, -0.0914043464852205), ENSG00000237438.1 = c(-0.758838434524167,
-0.579236418964912, -0.695762357174973, -0.368416879945024, -0.339555280234214,
-0.809438763600528, -0.359798980325098, -0.417769387016999, -0.724636782037491,
-0.309671271758401), ENSG00000243156.2 = c(-0.58456094489168,
0.105851861253113, -0.275061563982305, -0.0406543077034047, -0.522672785138957,
-0.126100301787985, -0.288382571274346, -0.354309857822533, -0.314842662063296,
-0.141401921597711), ENSG00000099968.13 = c(0.135357355615122,
0.157616292043257, 0.180059097593111, 0.250009792099489, 0.170653230854707,
0.316157576642492, 0.314671674077333, 0.224102148083679, 0.232969333848649,
0.14963210689311), ENSG00000069998.8 = c(-0.0346986034383362,
-0.0173493017191681, 0, -0.0173493017191681, -0.645266014640116,
-0.0346986034383362, -0.0173493017191681, -0.0173493017191681,
-0.0346986034383362, 0), ENSG00000184979.8 = c(-0.160573318589815,
0.54683218159596, 0.3503062647549, 0.653899917577768, 0.321280544783323,
0.653727041876318, 0.822864620159811, 1.03780221621802, -0.195295753744408,
-0.228590172992798), ENSG00000070413.12 = c(0.775225873145799,
0.602092262450708, 1.0198591935485, 0.65587457098494, 0.306445027670957,
0.581202299884586, 0.836112660742631, 0.559373823767867, 0.46977171007116,
0.84426113999649)), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"))
The provided test samples will not work because there are no genes in common between act_counts and prd_counts. I took the liberty of fixing that by reassigning column names:
library(dplyr)
library(tidyr)
## the line below fixes the problem with test samples
colnames(prd_counts)[3:10] <- act_counts$gene[1:8]
acts <- pivot_longer(act_counts,
cols = starts_with("HG"),
names_to = "FID",
values_to = "Actual")
prds <- pivot_longer(prd_counts,
cols = starts_with("ENSG"),
names_to = "gene",
values_to = "Predicted")
inner_join(acts, prds,
by = c("gene", "FID")) |>
select(gene, FID, Actual, Predicted) |>
group_by(gene) |>
summarize(rho = cor(Actual, Predicted))
##> # A tibble: 8 × 2
##> gene rho
##> <chr> <dbl>
##> 1 ENSG00000134602.11 -0.445
##> 2 ENSG00000136237.12 0.446
##> 3 ENSG00000139269.2 0.543
##> 4 ENSG00000152931.6 0.770
##> 5 ENSG00000169129.8 -0.802
##> 6 ENSG00000183696.9 0.405
##> 7 ENSG00000242284.2 -0.503
##> 8 ENSG00000259425.1 -0.110
I have somewhat messy looking dataframes, like this one:
df0
# A tibble: 3 x 9
# Groups: Sequ [1]
Sequ Speaker Utterance A_intpl A_dur B_intpl B_dur C_intpl C_dur
<int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 2 ID16.A cool >wha… 31.44786152… 10.5,17,1… 32.86993284… 9.5,16,17… 58.3368399… 14,17,17…
2 2 NA (0.228) 32.75735987… 15.5,17,1… 30.83469006… 14.5,16.9… 26.0386462… 3,17,16,…
3 2 ID16.B u:m Tenne… 32.05752604… 4.5,17,16… 29.95825107… 3.5,16,17… 55.9298614… 8,17,17,…
I want to plot the *_intpl values for each speaker (A, B, or C) for each of the three Utterances in a single chart both as line charts and as trend lines.
I'm just half successful doing this:
library(tidyr)
library(ggplot2)
library(dplyr)
df0 %>%
pivot_longer(cols = contains("_"),
names_to = c("Event_by", ".value"),
names_pattern = "^(.*)_([^_]+$)") %>%
separate_rows(c(intpl, dur), sep = ",", convert = TRUE) %>%
mutate(Time = cumsum(dur)) %>%
mutate(Utterance = paste0(sub(".*(.)$", "\\1",Speaker), ": ", Utterance),
Utterance = factor(Utterance, levels = unique(Utterance))) %>%
ggplot(aes(x = Time, y = log2(intpl),
group = Event_by,
colour = Event_by)) +
geom_line()+
geom_smooth(method = 'lm', color = "red", formula = y~x)+
facet_wrap(~ Utterance, ncol = 1, scales= "free_x")
Half successful because the line plots and trend lines are side-by-side, as if in three columns, whereas they should be in rows, one below the other - how can that be achieved?
Reproducible data:
structure(list(Sequ = c(2L, 2L, 2L), Speaker = c("ID16.A", NA,
"ID16.B"), Utterance = c("cool >what part?<", "(0.228)", "u:m Tennessee="
), A_intpl = c("31.4478615210995,31.5797510648522,31.7143985369445,31.651083739602,31.5806035086034,36.8956763912703,36.2882129597292,35.2124499461012,34.1366869324732,34.1366869324732,32.1927035724058,30.2487202123383,28.3047368522709,26.3607534922035,30.5278334848495,30.5919390424853,30.8898529369568,31.578968913188,31.9011198738002,32.1543265113196,31.9708002079533,31.966536408565,31.8762658607759,31.8994741472105,31.4215913971938,32.1510578328563,31.7863350712876,32.4685052625667,31.7422271490296,32.3286054977263,31.9998974949481,32.5177992323864,32.4727499785435,32.9310888953766,32.7592010033585,33.2231711877427,33.1593949301066,33.2432973964816,33.2569729073414,33.492144800249,33.317650964723,33.4835787832119,33.2377190454279,32.9200836384356,32.9684568771567,32.6400987016883,27.5447101464944,29.3948945479171,35.3449171857603,33.5932932239592,31.8416692621581,30.0900453003569,32.7850431084597,32.7589003618266,32.8365550655013,32.386716057622,32.8420792704881,32.6909995562489,32.6269434402016,32.7370944106334,32.7529759209752,32.6528826975113,32.3663573764448,32.7326853004792,32.6930038462418,32.8975978772676,33.1752899475416,33.2034433355001,33.0667431432803,32.6322933080614,33.2503168843178,32.7573598713719",
"32.7573598713719,32.7531704791313,32.7366130631104,32.918942216354,32.8309939530596,32.3856893430525,32.5368873543441,32.5628510484821,32.5628510484821,32.5628510484821,32.5506564332008,32.7477119716583,32.3458470743288,32.0575260428013",
"32.0575260428013,32.1628824338111,32.0093334061923,32.1461460586991,31.9080762250966,31.9469105074833,31.7431187667232,31.7194255656503,31.7394296413187,31.8594986292975,31.7498243274746,31.9069142374258,32.0835520942767,31.6257067057109,31.757232379438,31.9036689124911,32.1319749301918,31.7203280774998,31.7877137245706,32.3030946636177,32.2800139298454,32.164646135728,32.3636504940227,32.5657818936495,32.3859453482697,32.4797898358193,32.5319835105237,32.92233491509,32.8240561109448,32.664496027779,33.1835064752029,33.0366413969703,33.0406288190821,33.3232964677672,33.2206260057731,33.1537134269402,33.2783471506207,33.2933281566788,33.5322350394609,33.3815736723684,33.7905544185063,33.6143820666896,33.7490659591585,33.7260102344634,34.0721931066557,34.0455026427054,34.3735788774521,34.2888420421073,34.3913721165542,34.5982135545306,34.4417202731001,34.6586347152449,31.1590521215434,31.3276405983897,28.2379253186548,31.133030931336,34.0715906921349,35.8967950760285,35.9334551147377,35.8565504335515,35.7446081905229,35.6300325834155,35.8390086948751,35.9711743270411,36.0029493274176,35.8891056768339"
), A_dur = c("10.5,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,0.5",
"15.5,17,17,16,17,17,16,17,17,16,17,17,16,12.5", "4.5,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,5.5"
), B_intpl = c("32.8699328424689,32.8154348109057,32.5454364786882,32.408257038977,32.5304564519672,32.3270203236281,31.9233218634346,32.0166346064182,31.7360745988363,31.7546527359571,31.8603220354065,31.6520061326962,31.5603191463274,31.3357561466519,31.0976090032219,31.1405090978825,31.1697180784961,31.0863999545386,31.3126984044729,30.580776446803,30.7137016246273,31.0801914571091,31.2343922096768,31.2749857511594,31.3488604642844,30.9327390960718,31.0750482778561,31.1849119826023,31.4180114886183,31.5284273181104,31.147361398529,31.1128597713973,31.5551385744611,31.7479939892741,31.5890352680344,31.5470790538009,31.5427330200078,31.3901913024084,31.5423214446953,31.4814325586741,31.4937336232021,31.3483738841556,31.2516462059018,31.2233881922543,31.2572951780583,31.0087226975291,31.1197589042273,31.053748381687,30.8202174718598,30.845143129195,30.8727194789634,30.4231467151428,30.7254093759809,30.2757746547116,30.6047530953025,29.6835591414008,28.257421076205,29.4634886416064,29.183064807185,28.6935506287734,29.3989017421637,30.8936090542518,30.6884831327852,30.805770713392,30.6938909098627,30.8317757801268,30.8509115577427,30.6836198471168,30.7979978629801,31.0260101704105,30.6248844591805,30.8346900656087",
"30.8346900656087,30.9826158466835,29.814086001996,29.7839590794955,30.7928804535206,31.1589874726521,31.0547403039501,31.2268131145794,31.155503802286,31.3036925274762,31.4782621660348,31.0928322383151,31.589958621025,29.9582510795225",
"29.9582510795225,29.9796434055214,29.9405638729798,30.2602098442174,30.5011865525849,30.6753859842987,28.9331380886365,30.7736467776919,30.8457967803438,30.843630408183,30.8767570425033,30.9178344980247,30.734598946287,30.8877440413271,30.9225051837881,30.9534076039184,31.0172861192043,30.9371712793451,30.9806052132295,31.0593603717961,31.1156928565737,30.4713263393479,26.028518302418,28.1426546887905,29.4308434671559,30.7190322455213,31.2289674937063,31.7389027418913,32.2488379900763,32.7587732382613,33.2687084864463,33.7786437346312,34.2885789828162,34.7985142310012,35.3084494791862,35.8183847273712,36.3283199755562,36.8382552237412,37.3481904719262,37.8581257201112,38.3680609682962,25.5986933949893,29.7968031963901,30.5336819967028,30.1876589408847,30.4260367500101,30.2997107671214,30.3429716412578,30.3537316791924,30.4111899964144,30.7293520851914,30.7778983966343,30.9712137067708,30.9072589183658,31.0696990205164,30.5713926084448,31.3458855877875,31.4169903025083,31.5148974986093,31.5972499257413,31.2293401943969,31.2033325602348,31.1657434266985,30.6784877073261,30.6991365599664,30.6763195188897"
), B_dur = c("9.5,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,2.5",
"14.5,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,13.4999999999854",
"3.5,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17.0000000000146,16.9999999999854,16,17,16.9999999999854,16.0000000000146,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,7.5"
), C_intpl = c("58.3368399069697,58.249224089011,59.5198368051218,58.8722012497097,58.4418996252205,58.5849059154389,59.2752163985494,52.8407480422202,51.6276603912397,48.0255346632529,44.753541512539,41.4815483618252,38.2095552111114,34.9375620603975,31.6655689096837,28.3935757589698,25.121582608256,19.4712933827274,22.0108873782783,24.5504813738291,24.8441573376901,24.6902151101703,24.4029572181118,24.9753161974674,24.8664406826514,24.8486668451201,25.1137001504163,25.1142578332509,25.4902077628339,25.4075561268027,25.6622548410237,61.2421678149908,25.1600975771354,25.6667198263373,25.442560744158,25.8736383423437,25.5859074180431,24.7860400673889,24.4337707697216,24.3214953242744,23.915753514736,23.7363185577661,23.7186569801299,23.4313514771952,23.5730151254578,62.5124513171595,23.3260531660862,23.4498217326665,23.2145314844252,57.5586745434594,63.4646233226955,23.0706406704345,23.3318690599491,62.044649715831,62.2720656330432,22.2532276715887,62.7059140614625,22.9511208849958,22.5603175709988,23.3456453893988,63.2523901625561,60.6655429980934,60.2358824325868,59.957910796633,57.3999702562457,54.8277282980263,43.0269305132552,31.2261327284841,19.425334943713,22.7319906068577,26.0386462700023",
"26.0386462700023,29.345301933147,32.6519575962917,35.9586132594364,48.3773995023798,60.7961857453232,49.4980424442242,55.9907960862667,57.2956837917999,58.1409925994177,59.025022056064,60.0098263540792,60.4028460580062,61.2629030450653,55.9298614021542",
"55.9298614021542,55.3877180252389,61.3547152702855,61.7847919095391,56.2457623439544,62.5477315546977,62.3078007189967,62.4272469013149,57.6479672147315,62.9844338801191,58.0081708266629,63.3872796098875,59.0138830718112,58.0612924481098,58.38680047729,58.687179350318,63.8724230039733,63.4126777597892,63.6865154626743,63.5670658627636,63.4496590540706,63.7595297692908,58.9069708176601,63.4547681163061,64.3198376700797,63.415319961042,64.0985879957056,64.1201809531605,63.677902665454,64.1934303628317,64.4682003346273,64.2868853545462,24.8444135816353,64.1579626357752,63.8897139146875,58.5472675827292,64.5784992977498,64.0848591719068,63.8841268679761,64.2901359712354,64.395692486112,64.5425896391638,64.8060565909917,64.3618830026368,64.7088481705444,64.5005944199885,64.5540289192148,64.7408010459365,63.378880767685,63.3415589069662,63.5362700331647,63.5924807719723,63.575801461932,63.6799360982113,64.0041021410894,64.3144923757986,63.8692943755376,63.8594574363473,64.2731841085802,63.3314657812309,64.2758880216293,64.1011768977101,64.0261661917799,64.2865302330478,63.724697791255,64.1202175712152"
), C_dur = c("14,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,14",
"3,17,16,17,17,16,17,17,16,17,17,16,17,17,8", "8,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,17,16,17,2"
)), row.names = c(NA, -3L), groups = structure(list(Sequ = 2L,
.rows = structure(list(1:3), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
There's a possible solution with use of grid.arrange() func from library(gridExtra) library(grid) packages.
I've wrapped your data into unique charts and combined them together into arranged chart.
df1 = df0 %>%
pivot_longer(cols = contains("_"),
names_to = c("Event_by", ".value"),
names_pattern = "^(.*)_([^_]+$)") %>%
separate_rows(c(intpl, dur), sep = ",", convert = TRUE) %>%
mutate(Time = cumsum(dur)) %>%
mutate(Utterance = paste0(sub(".*(.)$", "\\1",Speaker), ": ", Utterance),
Utterance = factor(Utterance, levels = unique(Utterance)))
Set chart objects into enviroment:
for (i in unique(df1$Event_by)){
for (j in levels(df1$Utterance)){
assign(x = paste0(i,j), value = ggplot(data = df1[df1$Event_by == i & df1$Utterance == j,], aes(x = Time, y = log2(intpl))) +
geom_line()+
geom_smooth(method = 'lm', color = "red", formula = y~x))
}
}
Create grided chart:
library(gridExtra) library(grid)
grid.arrange(
`AA: cool >what part?<`,
`AB: u:m Tennessee=` ,
`ANA: (0.228)` ,
`BA: cool >what part?<` ,
`BB: u:m Tennessee=` ,
`BNA: (0.228)` ,
`CA: cool >what part?<` ,
`CB: u:m Tennessee=` ,
`CNA: (0.228)` ,
nrow = 3)
Although i think there should be better solution for that.
You can also try to explore below articlesfor arranging plots:
http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/81-ggplot2-easy-way-to-mix-multiple-graphs-on-the-same-page/
https://ggplot2-book.org/facet.html
Moreover, there's is no themming added to my solution
Aloha all,
I've struggled to build a legend for a mix/match of time series data I'm making. Here is some code:
My understanding is that I need to somehow clean my data and put it all in the same data frame, but all of the time series don't line up very well. Some is at 15 minutes, other one hour. Is there any way to force a legend for these datasets? I don't know what else to post here - since the 5 datasets are quite large.
Plot I'm working on:
q<- ggplot(subset(cr200_Auwai1, timedate>startd & timedate<endd), aes(timedate, Turb_SS)) +
geom_point(color="coral4")+
geom_point(data=subset(dsloi_wl, timedate>startd & timedate<endd), aes(timedate, level), color="blue")+
#geom_point(data=subset(flow_data, mdate>startd & mdate<endd), aes(as.POSIXct(mdate), flow_cfs*1000), color="red")+
geom_point(data=subset(cr300_Wai1, timedate>startd & timedate<endd), aes(timedate, Lvl_m*1000), color="forestgreen", size=1)+ #aquamarine3
geom_point(data=subset(cr300_Wai1, timedate>startd & timedate<endd), aes(timedate, Turb_SS), color="orange")+
#geom_point(data=subset(hihimanu_wl, timedate>startd & timedate<endd), aes(timedate, level), color="azure4", size=0.1)+
#geom_point(data=subset(rain_data, timedate>startd & timedate<endd), aes(timedate, rainmm), color="red",size=5)+
geom_point(data=subset(haptuk_ysi, datetime>startd & datetime<endd), aes(datetime, Turb), color="pink")+
#scale_x_date(breaks=date_breaks("month"), labels = date_format("%b-%y"))+
xlab("Date")+
ylab("Turbidity (NTU) and Water Level (mm)")+
coord_cartesian(ylim=c(0, 1500))+
theme_bw()+
theme(axis.text=element_text(size=14),
axis.title=element_text(size=16,face="bold"),
legend.justification = c(1, 1),
legend.position = c(1, 1),
legend.title=element_text(size=14),
legend.text=element_text(size=12))
Here is a sample of two of the datasets: Note that the times don't line up at all... since I'm mixing sources.
dsloi_wl:
structure(list(ReceptionTime = c(1533895414.1134, 1533895414.1733,
1533895414.19397, 1533895414.20708, 1533895414.22283, 1533895414.23634,
1533895414.25135, 1533895414.26387, 1533895414.27653, 1533895414.29126,
1533896013.68755, 1533896013.7638, 1533896013.79232, 1533896013.80917,
1533896013.82312, 1533896013.83648, 1533896013.84988, 1533896013.8648,
1533896013.87724, 1533896013.8894), d2w = c(776.7, 789.7, 790.2,
777.1, 777.2, 777.7, 778.4, 793.4, 779.6, 794.1, 819.9, 780.7,
794.1, 806.9, 781.9, 781.9, 782.7, 782.8, 783.1, 783.4), timedate = structure(c(1533895414.1134,
1533895414.1733, 1533895414.19397, 1533895414.20708, 1533895414.22283,
1533895414.23634, 1533895414.25135, 1533895414.26387, 1533895414.27653,
1533895414.29126, 1533896013.68755, 1533896013.7638, 1533896013.79232,
1533896013.80917, 1533896013.82312, 1533896013.83648, 1533896013.84988,
1533896013.8648, 1533896013.87724, 1533896013.8894), class = c("POSIXct",
"POSIXt"), tzone = ""), level = c(723.3, 710.3, 709.8, 722.9,
722.8, 722.3, 721.6, 706.6, 720.4, 705.9, 680.1, 719.3, 705.9,
693.1, 718.1, 718.1, 717.3, 717.2, 716.9, 716.6)), .Names = c("ReceptionTime",
"d2w", "timedate", "level"), row.names = c(NA, 20L), class = "data.frame")
CR300_Wai1
structure(list(RECORD = 73027:73046, Temp_C = c(24.62861, 24.62332,
24.61533, 24.60857, 24.60189, 24.59733, 24.59068, 24.58404, 24.57869,
24.57327, 24.56781, 24.5606, 24.55551, 24.55218, 24.54648, 24.5416,
24.5358, 24.5319, 24.52781, 24.52294), Turb_BS = c(94.50522,
88.65939, 109.354, 57.71527, 134.1903, 46.37191, 78.17719, 52.22319,
58.07111, 96.95719, 51.47488, 44.65616, 70.43825, 99.58217, 93.68374,
87.4787, 175.5395, 167.6757, 110.8119, 132.5971), Turb_SS = c(36.63349,
34.31228, 37.02223, 32.97258, 36.68553, 33.82083, 37.43391, 33.43639,
31.17306, 33.6327, 34.69954, 30.99891, 34.69988, 33.64369, 32.54948,
32.1177, 32.86558, 48.97706, 30.65004, 33.71646), Temp_C_2 = c(24.9014,
24.89474, 24.88837, 24.88279, 24.87574, 24.86852, 24.86357, 24.85751,
24.85236, 24.84759, 24.84091, 24.83577, 24.83192, 24.82713, 24.8229,
24.81832, 24.81237, 24.80821, 24.8051, 24.80015), WD_OBS = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), Lvl_m = c(0.6907353, 0.6905226, 0.6896195, 0.6890779,
0.6881586, 0.6878724, 0.6862501, 0.6848835, 0.6844589, 0.6837503,
0.6836612, 0.6831629, 0.6821692, 0.6812283, 0.6799452, 0.6791196,
0.6782504, 0.6772775, 0.6763596, 0.6755115), timedate = structure(c(1533895500,
1533895800, 1533896100, 1533896400, 1533896700, 1533897000, 1533897300,
1533897600, 1533897900, 1533898200, 1533898500, 1533898800, 1533899100,
1533899400, 1533899700, 1533900000, 1533900300, 1533900600, 1533900900,
1533901200), class = c("POSIXct", "POSIXt"), tzone = "")), .Names = c("RECORD",
"Temp_C", "Turb_BS", "Turb_SS", "Temp_C_2", "WD_OBS", "Lvl_m",
"timedate"), row.names = c(NA, 20L), class = "data.frame")
Here is a solution using mock data (next time provide a sample of your data) :
library(tidyverse)
library(lubridate)
#>
#> Attachement du package : 'lubridate'
#> The following object is masked from 'package:base':
#>
#> date
# mock data
time_15m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "15 min")
time_30m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "30 min")
time_60m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "60 min")
data_1 <- data.frame(time = time_15m,
var_1 = cos(hour(time_15m) + minute(time_15m)))
data_2 <- data.frame(time = time_30m,
var_2 = sin(hour(time_30m) + minute(time_30m)))
data_3 <- data.frame(time = time_60m,
var_3 = cos(1 - hour(time_60m) + minute(time_60m)))
# the kind of plot you have (prefer the 2nd version)
ggplot(data_1, aes(x = time, y = var_1)) +
geom_point(color = "red") +
geom_point(data = data_2, aes(time, var_2), color = "green") +
geom_point(data = data_3, aes(time, var_3), color = "blue") +
theme_bw()
# a version with long format data and use of gather function
data_1 %>%
left_join(data_2) %>% # join data from data_2 (timestep = 30m), missing data is NA
left_join(data_3) %>% # join data from data_3 (timestep = 60m), missing data is NA
gather(variable_name, variable_value, var_1, var_2, var_3) %>% # gather var_1, var_2 and var_3 in a single column
ggplot(., aes(x = time, y = variable_value, color = variable_name)) +
theme_bw() +
geom_point(size = 2)
#> Joining, by = "time"
#> Joining, by = "time"
#> Warning: Removed 120 rows containing missing values (geom_point).
Created on 2018-08-22 by the reprex package (v0.2.0).
EDIT 1 (include provided datasets)
library(tidyverse)
dsloi_wl %>%
full_join(cr300_Wai1) %>%
mutate(Lvl_m = 100 * Lvl_m) %>%
gather(variable_name, variable_value, level, Lvl_m, Turb_SS) %>%
ggplot(., aes(x = timedate, y = variable_value, color = variable_name)) +
geom_point() +
scale_color_manual("Legend title",
values = c("level" = "blue",
"Lvl_m" = "forestgreen",
"Turb_SS" = "orange"))
#> Joining, by = "timedate"
#> Warning: Removed 60 rows containing missing values (geom_point).
Created on 2018-08-23 by the reprex package (v0.2.0).