I have a manhattan plot of genetic information:
It was generated using the qqman package https://cran.r-project.org/web/packages/qqman/vignettes/qqman.html) in R which takes a dataframe of P-values, chromosome position and a gene position (for any biologists reading, this is a per gene manhattan hence the sparsity of signal). The data looks like this (with an example dataset below:
SNP P CHR BP
ABC 1.1e-300 16 875849
AAS 1.2e-150 4 2343
JTL 4.2e-07 3 436544
LKS 4.1e-06 2 23565
JKSA 0.000432 1 98043
LKF 0.0032 22 387235
A20 0.0054 10 3252
AKLF 0.0235 4 4543543
structure(list(Gene = c("ABC1", "HGT2", "SLC34A3_ENSG00000198569",
"OR9K2_ENSG00000170605", "NFKB2_ENSG00000077150", "EFR3A_ENSG00000132294",
"SLC7A9_ENSG00000021488", "SEMG1_ENSG00000124233", "EWSR1_ENSG00000182944",
"ATP5PD_ENSG00000167863", "MAST3_ENSG00000099308", "KRT31_ENSG00000094796",
"FOXI1_ENSG00000168269", "CHCHD7_ENSG00000170791", "MAPK6_ENSG00000069956",
"SPRYD3_ENSG00000167778", "HOXB13_ENSG00000159184", "SLC12A9_ENSG00000146828",
"EXOC2_ENSG00000112685", "KCNJ15_ENSG00000157551", "SLC22A18_ENSG00000110628",
"ARID4A_ENSG00000032219", "SKP2_ENSG00000145604", "ZNF831_ENSG00000124203",
"ZNF275_ENSG00000063587", "SLC16A2_ENSG00000147100", "ADRB1_ENSG00000043591",
"DSCAM_ENSG00000171587", "PPM1H_ENSG00000111110", "IFNA14_ENSG00000228083",
"STX2_ENSG00000111450", "VPS54_ENSG00000143952", "ANXA7_ENSG00000138279",
"MAP3K12_ENSG00000139625", "MED13L_ENSG00000123066", "CHRM2_ENSG00000181072",
"RBP7_ENSG00000162444", "DRD1_ENSG00000184845", "CCDC121_ENSG00000176714",
"HMG20B_ENSG00000064961", "POU5F1B_ENSG00000212993", "SESN1_ENSG00000080546",
"DNASE1_ENSG00000213918", "FBXO24_ENSG00000106336", "RAG2_ENSG00000175097",
"UTS2_ENSG00000049247", "KMT2B_ENSG00000272333", "RBM33_ENSG00000184863",
"SNRPB2_ENSG00000125870", "FOXO4_ENSG00000184481", "NBPF3_ENSG00000142794",
"PPL_ENSG00000118898", "LYPD6B_ENSG00000150556", "POLD3_ENSG00000077514",
"PIK3CB_ENSG00000051382", "BCL2L12_ENSG00000126453", "CDC45_ENSG00000093009",
"DUXA_ENSG00000258873", "MCM3_ENSG00000112118", "CAPN3_ENSG00000092529",
"FMO4_ENSG00000076258", "B3GALT2_ENSG00000162630", "MICB_ENSG00000204516",
"CCL22_ENSG00000102962", "JKAMP_ENSG00000050130", "GSDME_ENSG00000105928",
"IZUMO4_ENSG00000099840", "NCKAP5L_ENSG00000167566", "ZRANB1_ENSG00000019995",
"TAL1_ENSG00000162367", "SLTM_ENSG00000137776", "SPC25_ENSG00000152253",
"GAP43_ENSG00000172020", "FGD3_ENSG00000127084", "PTCD3_ENSG00000132300",
"PAH_ENSG00000171759", "MMP8_ENSG00000118113", "RSBN1L_ENSG00000187257",
"AC026740.3_ENSG00000286094", "FAM189A2_ENSG00000135063", "TMEM245_ENSG00000106771",
"DDX50_ENSG00000107625", "SP140_ENSG00000079263", "C21orf91_ENSG00000154642",
"MEIKIN_ENSG00000239642", "TNFRSF8_ENSG00000120949", "RNF24_ENSG00000101236",
"CDK5_ENSG00000164885", "HINT1_ENSG00000169567", "TYRO3_ENSG00000092445",
"KRT75_ENSG00000170454", "RBM44_ENSG00000177483", "MYH8_ENSG00000133020",
"UBXN11_ENSG00000158062", "APOL3_ENSG00000128284", "NRXN3_ENSG00000021645",
"PRSS16_ENSG00000112812", "BST1_ENSG00000109743", "FAM49A_ENSG00000197872",
"SLC3A2_ENSG00000168003", "OR1C1_ENSG00000221888", "MYMK_ENSG00000187616",
"RASSF1_ENSG00000068028", "ARID5A_ENSG00000196843", "UAP1L1_ENSG00000197355",
"DPH2_ENSG00000132768", "G6PC_ENSG00000131482", "SH2B1_ENSG00000178188",
"RELL1_ENSG00000181826", "ABCC5_ENSG00000114770", "ZNF333_ENSG00000160961",
"NIF3L1_ENSG00000196290", "COMMD2_ENSG00000114744", "ZCCHC14_ENSG00000140948",
"P3H1_ENSG00000117385", "KRT14_ENSG00000186847", "SPG7_ENSG00000197912",
"ERCC6L_ENSG00000186871", "UPF1_ENSG00000005007", "FCGR3A_ENSG00000203747",
"SLC39A13_ENSG00000165915", "ACYP2_ENSG00000170634", "AL162596.1_ENSG00000285946",
"MEF2D_ENSG00000116604", "ATPAF1_ENSG00000123472", "DNAL4_ENSG00000100246",
"ADRA2A_ENSG00000150594", "ALDH3B2_ENSG00000132746", "L3MBTL3_ENSG00000198945",
"NR2E1_ENSG00000112333", "OTUD1_ENSG00000165312", "MCMDC2_ENSG00000178460",
"TXNL1_ENSG00000091164", "CES5A_ENSG00000159398", "CCL16_ENSG00000275152",
"ZBTB12_ENSG00000204366", "OGDHL_ENSG00000197444", "ARHGEF7_ENSG00000102606",
"RBM20_ENSG00000203867", "SELENOK_ENSG00000113811", "HBB_ENSG00000244734",
"WDR3_ENSG00000065183", "MAPKBP1_ENSG00000137802", "LTB4R2_ENSG00000213906",
"SLC25A15_ENSG00000102743", "ZBTB26_ENSG00000171448", "FDX2_ENSG00000267673",
"HSD3B7_ENSG00000099377", "RBFOX3_ENSG00000167281"), Pvalue = c(1.4e-300,
2.4e-150, 2.6089114579797e-07, 2.0296620694138e-06, 0.000147497259292417,
0.000229023886289315, 0.000245084674285079, 0.000256308708221289,
0.000261527824152563, 0.000288694716678695, 0.000290173032394758,
0.000320594572326915, 0.000346135729902497, 0.000355400110852,
0.000365256352980237, 0.000409731023356175, 0.000434204786603609,
0.000439775242591978, 0.000489192731765176, 0.000496753250110893,
0.00049911036273298, 0.000570787086811797, 0.000817460863988795,
0.000909350865229142, 0.000939159281654778, 0.00101875263711804,
0.00104161722087825, 0.00104642519111031, 0.0011025121215934,
0.00110797190460954, 0.00115516532029414, 0.00119237737210043,
0.00122886113380205, 0.00123316670384388, 0.00126924175390097,
0.00133083135434398, 0.00135900612361495, 0.00139601886941515,
0.00140034988031684, 0.00144667154281775, 0.00152488013161856,
0.00163920217629621, 0.00165121328565765, 0.00174281606991877,
0.00177541992540164, 0.00190567015024483, 0.00197012178338563,
0.00201154365191081, 0.00217761616500045, 0.00218849598206619,
0.00219107805420338, 0.00219952638949095, 0.0022100400174857,
0.00224988976742913, 0.00227842036080439, 0.00231351589815465,
0.00233840710255306, 0.00239368490047076, 0.00240800589782486,
0.00243072813003242, 0.00244930354205075, 0.00250643393459327,
0.00251262640919065, 0.00251308387281417, 0.00263512458389692,
0.00278748971622167, 0.00285692531240396, 0.00294631292976411,
0.0029855292366705, 0.00300042887433971, 0.00303321747691876,
0.00303431537337207, 0.00303655747990805, 0.00305247991142066,
0.00305779719421262, 0.0030773769185013, 0.00309595279588104,
0.00320602521859303, 0.00332374190234568, 0.00335845666631385,
0.00343476781423846, 0.00352132856036713, 0.0035370791144882,
0.00361921945446442, 0.00362829729460107, 0.00362925899436917,
0.00371857751928739, 0.00379170913533391, 0.00381786051662956,
0.00384603142808415, 0.0040621114920355, 0.00409131954647834,
0.00421076475281379, 0.00426968726537658, 0.00434706101829539,
0.00440972006588558, 0.00441860470852284, 0.00442578968523244,
0.00442716922579578, 0.00452215526426547, 0.00455658711791962,
0.00456768818316559, 0.00459525378983388, 0.00470562811526665,
0.00479427416502232, 0.00480697291736709, 0.00487609777383424,
0.00487626066774249, 0.0048982035968409, 0.00495106368869058,
0.00495974901689888, 0.0051182254688722, 0.00511868853158659,
0.00517459699358158, 0.0051863728177568, 0.0052533748441207,
0.0053048513357663, 0.00535144603215779, 0.00536294574878726,
0.00551084451782391, 0.00554884846488313, 0.0057184975334863,
0.00579274777888456, 0.00589230566622367, 0.00598698264647979,
0.00611781183554826, 0.00620691435617104, 0.00623285869674561,
0.00627192651777919, 0.00631120768525961, 0.00638288332792991,
0.00640000445930411, 0.00640676243762089, 0.00651734394089964,
0.0065624463096069, 0.00663922011120555, 0.00664879787639161,
0.00670461778135323, 0.00687266504207529, 0.00695679654393111,
0.00703352727799, 0.0070826001238915, 0.00709135444023445, 0.007142701991454,
0.00715597471729579, 0.00717318609326256, 0.00717726401691021,
0.00723420182380741, 0.00734437099984853), CHR = c(16L, 4L, 4L,
1L, 14L, 16L, 5L, 6L, 20L, 9L, 9L, 7L, 22L, 3L, 14L, 3L, 8L,
8L, 21L, 16L, 4L, 16L, 12L, 14L, 4L, 1L, 12L, 15L, 5L, 4L, 21L,
22L, 1L, 1L, 14L, 6L, 15L, 9L, 20L, 20L, 17L, 7L, 15L, 6L, 20L,
7L, 8L, 9L, 1L, 13L, 11L, 12L, 4L, 7L, 20L, 12L, 7L, 5L, 12L,
21L, 5L, 8L, 14L, 9L, 10L, 17L, 21L, 19L, 4L, 21L, 18L, 21L,
7L, 12L, 21L, 2L, 15L, 7L, 14L, 15L, 4L, 12L, 5L, 14L, 21L, 8L,
21L, 15L, 18L, 12L, 11L, 20L, 2L, 22L, 14L, 17L, 3L, 4L, 14L,
15L, 9L, 7L, 20L, 15L, 18L, 15L, 19L, 13L, 15L, 6L, 7L, 8L, 3L,
4L, 21L, 7L, 18L, 4L, 13L, 16L, 14L, 22L, 2L, 2L, 6L, 16L, 15L,
8L, 7L, 19L, 13L, 6L, 21L, 8L, 18L, 22L, 19L, 21L, 16L, 2L, 4L,
5L, 15L, 6L, 3L, 21L, 15L, 4L, 11L), POS = c(40665L, 197088L,
107291L, 210681L, 43546L, 79324L, 84342L, 184478L, 153093L, 180926L,
186110L, 117933L, 40682L, 54752L, 42758L, 61354L, 60378L, 157811L,
154466L, 126398L, 31037L, 115113L, 151914L, 10177L, 149587L,
79681L, 199754L, 129963L, 127032L, 175940L, 213708L, 51165L,
2584L, 166487L, 56259L, 130923L, 89219L, 170034L, 178967L, 102826L,
16982L, 188528L, 185007L, 6373L, 23298L, 199514L, 10429L, 58720L,
124518L, 210323L, 52212L, 186662L, 166963L, 58802L, 97157L, 14448L,
205795L, 70401L, 41824L, 93825L, 107954L, 207638L, 58648L, 64942L,
184005L, 19239L, 326L, 167713L, 106774L, 9145L, 174348L, 116079L,
38916L, 561L, 140433L, 123765L, 92497L, 187902L, 32027L, 63696L,
141286L, 67825L, 131698L, 120443L, 72621L, 165143L, 188862L,
52376L, 16769L, 77430L, 38655L, 145317L, 188469L, 113143L, 198322L,
26732L, 165043L, 25287L, 72392L, 12505L, 134208L, 126649L, 86308L,
199525L, 204348L, 103538L, 78610L, 176290L, 175950L, 73590L,
148494L, 151769L, 135252L, 141200L, 73351L, 45244L, 136493L,
33343L, 11165L, 915L, 80714L, 164700L, 142935L, 137224L, 554L,
92823L, 143083L, 166581L, 121459L, 19037L, 325L, 59959L, 155468L,
20896L, 33721L, 4468L, 113639L, 17103L, 184481L, 164337L, 174760L,
96405L, 207423L, 46590L, 168811L, 205743L, 74180L, 178456L, 126892L
)), row.names = c(NA, -149L), class = c("data.table", "data.frame"
), .internal.selfref = <pointer: 0x55a80de817a0>)
In reality there are around 20,000 lines for each gene in the human genome.
Using qqman, one uses:
manhttahn(gwas_data...)
To get the plot.
I would like the same plot but with the axis broken between 8-149 and then again from 149-300 so that the bottom part isn't all compressed. qqman is unable to do this.
I have tried modifying the script from this website: https://danielroelfs.com/blog/how-i-create-manhattan-plots-using-ggplot/
And my code looks like this:
table above: gwas_data
data_cum <- gwas_data %>%
group_by(CHR) %>%
summarise(max_bp = max(BP)) %>%
mutate(bp_add = lag(cumsum(max_bp), default = 0)) %>%
select(CHR, bp_add)
gwas_data <- gwas_data %>%
inner_join(data_cum, by = "CHR") %>%
mutate(bp_cum = bp + bp_add)
axis_set <- gwas_data %>%
group_by(CHR) %>%
summarize(center = mean(bp_cum))
ylim <- gwas_data %>%
filter(P == min(P)) %>%
mutate(ylim = abs(floor(log10(P))) + 2) %>%
pull(ylim)
sig <- 0.05/length(gwas_data$P) #this is a bonferroni correction
manhplot <- ggplot(gwas_data, aes(x = bp_cum, y = -log10(P),
color = as_factor(CHR), size = -log10(P))) +
geom_hline(yintercept = -log10(sig), color = "grey40", linetype = "dashed") +
geom_point(alpha = 0.75) +
scale_x_continuous(label = axis_set$chr, breaks = axis_set$center) +
scale_y_continuous(expand = c(0,0), limits = c(0, ylim)) +
scale_color_manual(values = rep(c("#276FBF", "#183059"), unique(length(axis_set$chr)))) +
scale_size_continuous(range = c(0.5,3)) +
labs(x = NULL,
y = "-log<sub>10</sub>(p)") +
theme_minimal() +
theme(
legend.position = "none",
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.title.y = element_markdown(),
axis.text.x = element_text(angle = 60, size = 8, vjust = 0.5)
)
This gives me:
Which is wrong. However, if I try and then cut the axis using the ggbreak package with:
t <- manhplot +scale_y_cut(break=c(10,140))
t+ scale_y_cut(break=c(140,300))
Which gives me:
How would I sort the chromosome x-axis and the breaks out so it looks like the qqman plot but with the y-axis compressed?
Many thanks
Related
I am trying to write a code that finds the 3 consecutives months that are the coldest.
For now I have written a code for the 3 first months (1,2,3) but then it should be applied to (4,5,6), (7,8,9), (10,11,12), (2,3,4), (5,6,7), (8,9,10), (11,12,1), (3,4,5), (6,7,8), (9,10,11) and (12,1,2) which are all the possible combinations of 3 consecutives months.
The code I wrote is here :
cold <- data_example %>%
group_by(Site) %>%
filter(Month %in% c(1,2,3)) %>%
mutate(mean_temperature = mean(t_q)) %>%
dplyr::select(-c(t_q,Month)) %>%
distinct(Site, mean_temperature)
average_temp_month_1_2_3 <- cold$mean_temperature
Then I replaced the c(1,2,3) by all possiblities, I have created a new column for each output.
I end up with a dataset with row corresponding to Site and columns are all the possibilities of 3 consecutive months.
After I took the min value for each row using the function apply() and min() and it gives me the coldest quarter for each Site.
I am looking for a way to generalize it, like creating a loop on the possiblities.
The structure of data_example is as follow :
structure(list(Site = c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 25L, 25L, 25L, 25L, 25L, 25L, 25L,
25L, 25L, 25L, 25L, 25L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L,
26L, 26L, 26L, 26L), Month = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L), t_q = c(9.67754848470332, -6.74555496540183,
5.67969761460384, 12.537207581471, -9.4899105618945, 21.0747672424502,
15.2643039243614, -3.62839910494421, 11.3919028351455, 1.69988257436554,
4.22015024307287, 11.7045830784212, 8.91437673833493, 0.579081429509138,
-10.8207481229903, 7.05356868592628, 13.0911580912516, 17.2032089167605,
-2.47642708849114, -11.2105599344486, 33.986736305027, 17.8578689773214,
-14.9114468266335, 14.4681380389141, 0.568074240873411, 7.65458408777801,
1.91368344556659, 6.01571556896127, 11.4858297513536, 2.2608458985328,
-2.08200762781776, 12.1540989284163, 20.9941815285413, 0.375777604316208,
-2.7137027317614, -6.17690210400591, 11.2549857164403, 17.447156776654,
-6.96565197389579, -5.41542361226991, 11.1680111873065, 16.2266522778922,
-11.4503938582433, 5.93300314835716, -18.2818398656237, 16.2930210946949,
9.80219192652316, -0.48237356523527, 7.72680942503686, 5.84113084181759,
9.66129413490096, -4.53018262186904, 7.42187509892118, 9.2559478576895,
8.25120948667013, 8.18182063263247, 16.3703081943971, 19.5469951420341,
3.71888263185773, -0.150179891749435, 1.32057298670562, -5.63556532224354,
21.3918542474341, 4.58752188336035, 5.49430262894033, 5.99587512047837,
-3.76459024109216, -8.53522098071824, 8.01805680562232, 26.2227490426066,
8.90822434139878, 5.04259034084471, 6.89740304247746, 11.9484584922927,
-11.5085102739471, 30.4526759119379, 21.878533782357, -5.39936677076962,
-9.83965056853816, 19.3083455159472, 7.90653548036154, 3.11876660277767,
-8.85027083180008, -9.9225496831988, 5.97307112581907, -2.83528336599284,
-2.75758002814396, 4.68388181004449, 6.61649031537118, -6.65988084338133,
-0.981075313384259, 5.84898952305179, -5.20962191660178, 0.416662319713158,
-10.5336993269853, 19.5350642296553, 26.9696625385792, 15.3291059661081,
15.0799591208354, 13.2310653499033, 7.2053382722482, -7.87288386491102,
20.8083797469715, 6.16664220270041, 8.3360949793043, -14.4000921795463,
-10.5503025782944, 14.3185205291177, 5.83802399796341, 2.49660818997943,
15.7399297014092, -0.834086173817971, 12.4883230222372, 6.73548467376379,
7.7988835803825, -5.13583355913738, 7.51054162811707, 11.6610602814336,
-11.8864185954223, 4.2704440943851)), row.names = c(NA, -120L
), groups = structure(list(Site = c(4L, 5L, 13L, 14L, 15L, 16L,
17L, 18L, 25L, 26L), .rows = structure(list(1:12, 13:24, 25:36,
37:48, 49:60, 61:72, 73:84, 85:96, 97:108, 109:120), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
You can use raster::movingFun to do a moving average with circular data, then use slice_min to get the minimum value per group.
library(dplyr)
circ <- function(x, by) ifelse(x%%by == 0, by, x%%by)
df %>%
group_by(Site) %>%
mutate(rolmean = raster::movingFun(t_q, n = 3, fun = mean, circular = TRUE)) %>%
slice_min(rolmean) %>%
mutate(coldest = toString(circ(c(Month-1, Month, Month+1), 12)))
output
# A tibble: 10 × 5
# Groups: Site [10]
Site Month t_q rolmean coldest
<int> <int> <dbl> <dbl> <chr>
1 4 2 -6.75 2.87 1, 2, 3
2 5 3 -10.8 -1.06 2, 3, 4
3 13 11 -2.71 -2.84 10, 11, 12
4 14 8 5.93 -7.93 7, 8, 9
5 15 3 9.66 3.66 2, 3, 4
6 16 7 -3.76 -2.10 6, 7, 8
7 17 11 -8.85 -5.22 10, 11, 12
8 18 10 0.417 -5.11 9, 10, 11
9 25 10 -14.4 -5.54 9, 10, 11
10 26 12 4.27 -0.593 11, 12, 1
Using which.min in aggregate on a moving average window.
aggregate(t_q ~ Site, dat, \(s) {
win <- 3 ## window length
sq <- Map(seq, 1:(length(s) - win + 1), win:length(s))
toString(sq[[which.min(sapply(sq, \(sq) mean(s[sq])))]])
})
# Site t_q
# 1 4 1, 2, 3
# 2 5 2, 3, 4
# 3 13 10, 11, 12
# 4 14 7, 8, 9
# 5 15 2, 3, 4
# 6 16 6, 7, 8
# 7 17 10, 11, 12
# 8 18 9, 10, 11
# 9 25 9, 10, 11
# 10 26 10, 11, 12
I have a data set which I wish to produce a heat map for and export the heat map as a raster so that I can use in another GIS software. I have the following data set;
structure(list(Longitude = c(83.01902, 83.04428, 83.07211, 83.08611,
83.11934, 83.12175, 83.13665, 83.14032, 83.15158, 83.15324, 83.17485,
83.17823, 83.19479, 83.21212, 83.22364, 83.24, 83.24301, 83.24766,
83.24869, 83.25096, 83.26048, 83.26767, 83.27696, 83.28758, 83.28842,
83.29257, 83.29861, 83.30751, 83.31339, 83.31459, 83.31735, 83.32446,
83.35895, 83.3939, 83.39481, 83.41, 83.49965, 83.5265, 83.54585,
83.54642, 83.56225, 83.58743, 83.6327, 83.67486), Latitude = c(27.49143,
27.53033, 27.54134, 27.589, 27.60669, 27.53022, 27.46776, 27.56822,
27.50877, 27.63619, 27.67205, 27.54466, 27.51068, 27.69448, 27.41738,
27.49342, 27.53017, 27.63331, 27.44174, 27.56867, 27.60454, 27.47598,
27.57293, 27.55558, 27.63096, 27.6531, 27.55854, 27.67777, 27.52301,
27.55364, 27.4924, 27.59005, 27.60943, 27.62901, 27.67632, 27.51055,
27.51166, 27.53252, 27.57101, 27.51175, 27.60436, 27.51426, 27.53022,
27.53555), Total = c(11L, 5L, 8L, 16L, 13L, 5L, 10L, 25L, 4L,
5L, 1L, 7L, 11L, 2L, 2L, 10L, 4L, 3L, 4L, 6L, 1L, 12L, 4L, 3L,
7L, 1L, 1L, 6L, 3L, 2L, 3L, 4L, 12L, 19L, 11L, 9L, 16L, 17L,
9L, 9L, 18L, 7L, 7L, 5L)), row.names = c(NA, -44L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(Longitude = c(83.01902,
83.04428, 83.07211, 83.08611, 83.11934, 83.12175, 83.13665, 83.14032,
83.15158, 83.15324, 83.17485, 83.17823, 83.19479, 83.21212, 83.22364,
83.24, 83.24301, 83.24766, 83.24869, 83.25096, 83.26048, 83.26767,
83.27696, 83.28758, 83.28842, 83.29257, 83.29861, 83.30751, 83.31339,
83.31459, 83.31735, 83.32446, 83.35895, 83.3939, 83.39481, 83.41,
83.49965, 83.5265, 83.54585, 83.54642, 83.56225, 83.58743, 83.6327,
83.67486), .rows = list(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L,
35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L)), row.names = c(NA,
-44L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
I have produced a heat map using ggplot with the following code
Data <- ggplot(Raptor_Fixed_Table_Sum, aes(x= Longitude, y= Latitude)) +
geom_point(data=Raptor_Fixed_Table_Sum, mapping=aes(x=Longitude, y=Latitude, size=Total))+
xlim(82.8,83.8)+
ylim(27.35,27.8)
Data + geom_density2d()+
stat_density2d(data=Raptor_Fixed_Table_Sum, aes(fill = ..level..),alpha=0.5, geom = "polygon")+
scale_fill_gradient(low = "green", high = "red")+
scale_alpha(range = c(0, 0.3), guide = FALSE)+
geom_point(data=Raptor_Fixed_Table_Sum, mapping=aes(x=Longitude, y=Latitude, size=Total), colour ="blue")
I don't know how to go about extracting just the heat map. Ideally I would like it as a raster so that I can save it and use in QGIS to overlay a map of the area.
Any help would be greatly appreciated.
I need some help regarding transforming a geom_bar into a geom_area plot. This is my df:
dput(df)
df <- structure(list(new_day = c(-25L, 3L, 7L, -7L, 3L, 7L, -7L, 0L,
-25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L, -7L,
0L, -25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L, -7L, 0L, 3L, 7L, -7L,
0L, -25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L,
-25L, 3L, 7L, -7L, 0L, 3L, -7L, 0L, -25L, 7L, 3L, 7L, -7L, 0L,
-25L, 3L, 7L, -7L, 0L, -25L, 3L, 7L, 3L, 7L, -7L, 0L, -25L, 3L,
7L, -7L, 0L, 7L, -25L, 3L, 7L, -7L, 0L, 3L, 7L, -25L, -25L, -25L,
-25L, -25L, -25L, -25L), order = structure(c(8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 11L, 11L, 11L, 11L, 11L, 13L, 13L, 13L, 13L,
13L, 10L, 10L, 10L, 10L, 10L, 7L, 7L, 7L, 7L, 7L, 2L, 2L, 2L,
2L, 2L, 7L, 7L, 7L, 7L, 9L, 9L, 9L, 9L, 9L, 1L, 1L, 1L, 1L, 1L,
9L, 9L, 9L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 13L, 13L, 14L, 14L,
14L, 14L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 13L, 13L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 2L, 2L, 2L, 2L, 2L, 6L, 6L, 1L, 7L, 5L, 2L,
12L, 2L, 2L), .Label = c("Alteromonadales", "Betaproteobacteriales",
"Caulobacterales", "Chitinophagales", "Flavobacteriales", "Parvibaculales",
"Pseudomonadales", "Rhizobiales", "Rhodobacterales", "Rhodospirillales",
"Sneathiellales", "Sphingobacteriales", "Sphingomonadales", "Thalassobaculales"
), class = "factor"), family = structure(c(13L, 13L, 13L, 13L,
12L, 12L, 12L, 12L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
16L, 7L, 7L, 7L, 7L, 7L, 11L, 11L, 11L, 11L, 11L, 1L, 1L, 1L,
1L, 1L, 11L, 11L, 11L, 11L, 14L, 14L, 14L, 14L, 14L, 4L, 4L,
4L, 4L, 4L, 14L, 14L, 14L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 16L,
16L, 17L, 17L, 17L, 17L, 8L, 8L, 8L, 8L, 8L, 5L, 5L, 5L, 16L,
16L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 8L, 8L, 8L, 8L,
8L, 10L, 10L, 6L, 11L, 3L, 1L, 9L, 1L, 1L), .Label = c("Burkholderiaceae",
"Chitinophagaceae", "Flavobacteriaceae", "Gallaecimonadaceae",
"Hyphomonadaceae", "Idiomarinaceae", "Magnetospiraceae", "Methylophilaceae",
"NS11-12_marine_group", "Parvibaculaceae", "Pseudomonadaceae",
"Rhizobiaceae", "Rhizobiales_unclassified", "Rhodobacteraceae",
"Sneathiellaceae", "Sphingomonadaceae", "Thalassobaculaceae"), class = "factor"),
genus = structure(c(16L, 16L, 16L, 16L, 7L, 7L, 7L, 7L, 3L,
3L, 3L, 3L, 3L, 19L, 19L, 19L, 19L, 19L, 24L, 24L, 24L, 24L,
24L, 14L, 14L, 14L, 14L, 14L, 17L, 17L, 17L, 17L, 17L, 14L,
14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 5L, 5L, 5L, 5L, 5L,
10L, 10L, 10L, 2L, 2L, 2L, 2L, 2L, 22L, 22L, 22L, 20L, 20L,
23L, 23L, 23L, 23L, 11L, 11L, 11L, 11L, 11L, 8L, 8L, 8L,
21L, 21L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 11L, 11L,
11L, 11L, 11L, 13L, 13L, 9L, 14L, 4L, 6L, 12L, 1L, 18L), .Label = c("Burkholderiaceae_unclassified",
"Cupriavidus", "Ferrovibrio", "Flavobacteriaceae_unclassified",
"Gallaecimonas", "GKS98_freshwater_group", "Hoeflea", "Hyphomonas",
"Idiomarina", "Marivivens", "Methylotenera", "NS11-12_marine_group_ge",
"Parvibaculum", "Pseudomonas", "Pseudorhodobacter", "Rhizobiales_unclassified",
"Rhodoferax", "RS62_marine_group", "Sphingomonadaceae_unclassified",
"Sphingopyxis", "Sphingorhabdus", "Terrimonas", "Thalassobaculum",
"uncultured"), class = "factor"), Abundance = c(0.758296593899054,
0.728046713738242, 0.421798852637834, 0.185971692147469,
7.36584152568739, 11.0004160226707, 1.93134577450352, 19.7144376530921,
46.2350237547082, 25.8715062086956, 22.1549641486618, 34.4112477828867,
20.4937613394223, 3.73518219692229, 15.9295990367068, 13.8490383262387,
13.3481723220855, 20.3866145291388, 0.165618346100574, 8.86991024549668,
8.5330814375361, 6.86819004205197, 5.72129192186814, 1.04512973253723,
3.77880217461655, 6.47871112880127, 1.12084852451492, 0.903754246093232,
19.0854333497858, 15.7152146349298, 12.3768753373503, 15.8790763239117,
10.2875187327705, 2.82159106304821, 4.22393981370602, 8.82452898193968,
4.8507226701533, 6.19619716749583, 8.28477594908417, 8.05201189383953,
9.7404731686272, 9.84535225459449, 1.7940554465653, 2.62276259756813,
2.74008811315788, 0.543937440677315, 0.55325167765205, 0.910457573040239,
0.451385497886567, 0.655661306732001, 6.59400178917785, 1.92570846362683,
2.62192443054515, 2.10049053655497, 2.13139299576524, 0.20799245164738,
0.324291631088576, 0.369492771993701, 1.52162438803598, 0.151864202275619,
0.420953084533189, 0.391517677365401, 0.29116200940885, 0.232440441774702,
4.21428798609281, 0.859779996836882, 1.33107018783728, 1.013155122065,
0.447286602320585, 0.165001492967355, 0.285983094976304,
0.377758692391269, 0.21556919104275, 0.314057858254493, 0.354649793637887,
0.338799824269294, 0.218027624939685, 0.914324162324944,
1.22932824654674, 0.731649603629864, 0.566393265064962, 0.247942012186621,
1.73171328618728, 0.636597714441988, 0.505393049999761, 0.491318560043637,
0.990988961717433, 0.195417142399681, 0.210412739808352,
0.476107780140271, 0.936663899397428, 0.251540964619117,
0.963667386912928, 0.504905545701818, 0.296220086916766,
0.240809811677774)), class = "data.frame", row.names = c(52L,
68L, 72L, 93L, 165L, 169L, 190L, 194L, 246L, 262L, 266L, 287L,
291L, 343L, 359L, 363L, 384L, 388L, 440L, 456L, 460L, 481L, 485L,
634L, 650L, 654L, 675L, 679L, 731L, 747L, 751L, 772L, 776L, 844L,
848L, 869L, 873L, 925L, 941L, 945L, 966L, 970L, 1022L, 1038L,
1042L, 1063L, 1067L, 1216L, 1232L, 1236L, 1313L, 1329L, 1333L,
1354L, 1358L, 1426L, 1451L, 1455L, 1507L, 1527L, 1717L, 1721L,
1742L, 1746L, 2186L, 2202L, 2206L, 2227L, 2231L, 2380L, 2396L,
2400L, 3075L, 3079L, 3294L, 3298L, 3350L, 3366L, 3370L, 3391L,
3395L, 3467L, 4223L, 4239L, 4243L, 4264L, 4268L, 4433L, 4437L,
4708L, 4805L, 4902L, 5193L, 5969L, 7909L, 8006L))
and this is the structure:
> str(df)
'data.frame': 96 obs. of 5 variables:
$ new_day : int -25 3 7 -7 3 7 -7 0 -25 3 ...
$ order : Factor w/ 14 levels "Alteromonadales",..: 8 8 8 8 8 8 8 8 11 11 ...
$ family : Factor w/ 17 levels "Burkholderiaceae",..: 13 13 13 13 12 12 12 12 15 15 ...
$ genus : Factor w/ 24 levels "Burkholderiaceae_unclassified",..: 16 16 16 16 7 7 7 7 3 3 ...
$ Abundance: num 0.758 0.728 0.422 0.186 7.366 ...
my data is about relative abundances of species over time, I removed rare species so it doesn't add up to 100 % anymore,
but that is fine, it is about 98 % per date. However, I get these weird free polygons and triangles which I recognize from incorrect grouping etc., but the group parameter did not change anything here. I also tried several position and stat arguments, which did not help. Maybe it is about the order of factors or something?
What I'm looking for is a stacked plot of the abundances of cumulated orders without empty spaces in between etc. Create proportional geom_area plot directly in ggplot2
# area plot combining species on order level
ggplot(df, aes(x = new_day, y = Abundance, fill = order)) +
geom_area(stat = "identity") +
geom_vline(aes(xintercept = 0), linetype = "dashed", size = 1.2)
I get fewer weird shapes when going to a more detailed hierarchical level (genus instead of order)
# area plot on genus level
ggplot(df, aes(x = new_day, y = Abundance, fill = genus)) +
geom_area(stat = "identity", position = "stack") +
geom_vline(aes(xintercept = 0), linetype = "dashed", size = 1.2)
but these are still more blank areas than there should be by the sum of abundances for a given time
# total abundance per day
sum(subset(df, new_day == -25)$Abundance)
[1] 98.03997
Any suggestions on how to fix this?
The problem is that you sometimes have several abundance values for one new_day, even with more detailed hierarchical levels.
This is what creates discontinuities in the area plot. You need to have only one unique value for each new_day. In my example below, I just take the first abundance value after grouping by new_day and order, but it is probably not relevant for what you want to show. (You may want to take the mean or attributes these values to other new_day points in between, whatever you need).
The remaining little gaps are caused by the missing abundance values, since as you said, it does not add up to 100%. This is not a big deal, but you can probably fix it by replacing the missing values by 0.
EDIT : Now doing the sum of abundance values as you mentioned, and removing the small remaining gaps by replacing missing values by 0.
library(tidyverse)
df %>%
# Sum abundance values, to only keep one per point
group_by(new_day, order) %>%
summarise(abundance=sum(Abundance)) %>%
ungroup() %>%
# Replace missing values by 0
spread(key=order, value=abundance) %>%
gather(key=order, value=abundance, -new_day) %>%
replace_na(list(abundance=0)) -> data
ggplot(data, aes(x = new_day, y = abundance, fill=order)) +
geom_area(stat = "identity") +
geom_vline(aes(xintercept = 0), linetype = "dashed", size = 1.2)
I have a data set with 4 columns, 2 of which are numeric, 1 is categorical and 1 is the label. The label has 13 levels (A to M). I tried to use knncat package in R to do classification, but every time I ran the code, I got the following error message:
Error in `[<-.data.frame`(`*tmp*`, factor.vars, value = c("M", "J", "K", :
replacement has 45500 rows, data has 1
The following is the code I used:
data <- read.csv('mosaic_data2.csv', header = T)
num <- dim(data)[1]
library(sampling)
set.seed(1234)
train_index <- sample(seq(1,num,1), floor(num * 0.7), replace = F)
test_index <- setdiff(seq(1,num,1), train_index)
train_data <- data[train_index,]
test_data <- data[test_index,]
library(knncat)
model <- knncat(train_data, classcol = 2)
Could anyone please take a look at the code and advise how I could eliminate this bug? Thank you very much!
The output of dput(head(data,100)) is as follows:
structure(list(latitude = c(52.7326028, 52.74287543, 52.82107841,
52.82025363, 52.81980596, 52.81721897, 52.81274172, 52.81274172,
52.8089586, 52.81424219, 52.8089586, 52.74007929, 52.77394023,
52.73659034, 52.73672518, 52.73764626, 52.73753744, 52.73659034,
52.73815233, 52.73679388, 52.73890319, 52.71697237, 52.63730282,
52.62720385, 52.63730282, 52.63543017, 52.63768035, 52.63510366,
52.6346578, 52.6346578, 52.6346578, 52.63447454, 52.63576418,
52.63447454, 52.6346578, 52.63447454, 52.69820719, 52.69603926,
52.68246919, 52.54600173, 52.54210198, 52.60628983, 52.61003275,
52.60278236, 52.60239604, 52.60348688, 52.60239604, 52.60382146,
52.60315644, 52.86047938, 52.86576353, 52.86954228, 52.81039471,
52.82094872, 52.82395073, 52.82444705, 52.88098384, 52.88469208,
52.88469208, 52.84979201, 52.84720159, 52.84831759, 52.82435938,
52.82319493, 52.82168337, 52.8230402, 52.8230402, 52.82513486,
52.82472379, 52.82756385, 52.82475438, 52.82434902, 52.82166611,
52.823712, 52.82401481, 52.82483489, 52.82103704, 52.82060763,
52.8208682, 52.82211317, 52.81868547, 52.8198332, 52.82023595,
52.81989134, 52.8196971, 52.82051066, 52.82463338, 52.82539131,
52.82580625, 52.82509199, 52.83759415, 52.83946254, 52.83946254,
52.83891871, 52.83821538, 52.84757879, 52.84663773, 52.8449371,
52.84592185, 52.84331619), longitude = c(-6.892397941, -6.915346343,
-6.922554014, -6.924997835, -6.926099967, -6.883340697, -6.897757597,
-6.897757597, -6.895500952, -6.883129556, -6.895500952, -6.703781864,
-6.680851783, -6.771845364, -6.773301282, -6.772958488, -6.77484647,
-6.771845364, -6.773422218, -6.772164896, -6.770622695, -6.784187251,
-6.901922588, -6.905109015, -6.901922588, -6.976679508, -6.973114498,
-6.974753462, -6.947990431, -6.947990431, -6.947990431, -6.976921427,
-6.958295227, -6.976921427, -6.947990431, -6.976921427, -6.902010609,
-6.915233457, -6.871160885, -6.832461149, -6.862126342, -6.943925285,
-6.93813643, -6.925128034, -6.932247524, -6.93461305, -6.932247524,
-6.934657053, -6.929283954, -6.845259603, -6.861188287, -6.866476268,
-6.940851164, -6.939203401, -6.930506188, -6.933317462, -6.929441954,
-6.922589037, -6.922589037, -6.926037258, -6.929423169, -6.917829279,
-6.938211918, -6.940658091, -6.940651748, -6.940107883, -6.940107883,
-6.938704642, -6.939084526, -6.933331264, -6.937496468, -6.937678962,
-6.940276221, -6.94018054, -6.939876475, -6.938983181, -6.934235666,
-6.93387209, -6.933134226, -6.934193569, -6.934383596, -6.933832641,
-6.937454656, -6.933818238, -6.93443811, -6.936913947, -6.920030341,
-6.920400963, -6.92215006, -6.910771124, -6.901500591, -6.899018998,
-6.899018998, -6.903007684, -6.90119821, -6.91063672, -6.909935672,
-6.90240965, -6.900066763, -6.901411136), mosaic_group = structure(c(10L,
10L, 8L, 8L, 8L, 7L, 7L, 7L, 7L, 7L, 7L, 10L, 10L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 10L, 10L, 10L, 13L, 13L, 13L, 13L,
9L, 6L, 6L, 6L, 6L, 6L, 10L, 8L, 8L, 9L, 9L, 9L, 9L, 7L, 7L,
7L, 9L, 9L, 9L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 8L, 8L, 8L, 8L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 8L,
6L, 6L, 6L, 6L, 6L, 8L, 8L, 10L, 10L, 10L), .Label = c("A", "B",
"C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M"), class = "factor"),
small_code = c(1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 11L,
11L, 12L, 12L, 13L, 14L, 14L, 14L, 14L, 14L, 15L, 16L, 16L,
17L, 17L, 18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L, 21L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
22L, 22L, 22L, 22L, 23L, 23L, 23L, 23L, 23L, 23L, 24L, 24L,
24L, 25L, 26L, 26L, 26L, 26L, 26L, 27L, 27L, 28L, 28L, 28L
)), .Names = c("latitude", "longitude", "mosaic_group", "small_code"
), row.names = c(NA, 100L), class = "data.frame")
The function knncat::knncat accepts the argument classcol which is defined as:
Column with classification in it. Default: 1.
You have a data set with structure:
latitude longitude mosaic_group small_code
1 52.73260 -6.892398 J 1
2 52.74288 -6.915346 J 1
3 52.82108 -6.922554 H 2
4 52.82025 -6.924998 H 2
5 52.81981 -6.926100 H 2
6 52.81722 -6.883341 G 3
Therefore your argument should be classcol = 3 (or 4) I am assuming, but we can see that it certainly shouldn't be classcol = 2.
I want x- axis from 1 to 20 and y-axis from 1 to 6.
My data:
structure(list(HEI.ID = structure(c(12L, 9L, 14L, 19L, 20L, 1L,
7L, 5L, 11L, 3L, 10L, 18L, 2L, 8L, 6L, 15L, 13L, 17L, 4L, 16L
), .Label = c("BF", "CC", "DC", "ER", "IM", "MC", "ME ",
"MM", "MO", "OC", "OM", "OP", "SB", "SD", "SH", "SL", "SN", "TH",
"UN", "WS"), class = "factor"), X2007 = c(18L, 14L, 15L, 20L,
12L, 6L, 17L, 2L, 4L, 11L, 16L, 1L, 9L, 8L, 13L, 4L, 10L, 6L,
3L, 19L), X2008 = c(20L, 9L, 16L, 18L, 8L, 17L, 15L, 6L, 3L,
14L, 19L, 1L, 2L, 12L, 5L, 13L, 11L, 7L, 4L, 10L), X2009 = c(20L,
13L, 17L, 8L, 4L, 9L, 19L, 12L, 2L, 11L, 16L, 1L, 2L, 7L, 6L,
18L, 5L, 15L, 9L, 14L), X2010 = c(20L, 13L, 16L, 13L, 7L, 15L,
19L, 8L, 3L, 9L, 18L, 1L, 5L, 11L, 12L, 6L, 10L, 4L, 2L, 17L),
X2011 = c(20L, 2L, 16L, 14L, 6L, 10L, 17L, 8L, 3L, 15L, 19L,
1L, 4L, 18L, 13L, 11L, 8L, 12L, 4L, 7L), X2012 = c(20L, 12L,
19L, 13L, 8L, 14L, 15L, 10L, 11L, 9L, 17L, 2L, 7L, 18L, 5L,
16L, 3L, 4L, 6L, 1L)), .Names = c("HEI.ID", "X2007", "X2008",
"X2009", "X2010", "X2011", "X2012"), row.names = c(NA, -20L), class = "data.frame")
I use the following commands to draw histograms:
par(mfrow = c(3,4))
for(i in 1:20){
print(i)
hist(as.numeric(HEIrank11[i,-1]),nclass=12,,main='students/faculty',
xlab = STOF[i,1],cex.lab=1, cex.axis=1, cex.main=1, cex.sub=1)
}
But after using above commands, I get different number in x- axis and y-axis.
I don't understand what your plot would looks like. It's not clear from your question and data provided.
I've tried to plot it. Please comment if you think it's the way to go.
Considering dt is your data.frame
library(reshape)
dt <- melt(dt)
library(ggplot2)
ggplot(aes(x=HEI.ID, y = value, fill = variable), data = dt) +
geom_bar(stat = 'identity')
or
ggplot(aes(x=HEI.ID, y = value, fill = variable), data = dt1) +
geom_bar(stat = 'identity') +
facet_grid(variable ~.)
You could use xlim and ylim parameters in the hist function and control the axes using
axis:
par(mfrow = c(3,4))
for(i in 1:12){
print(i)
hist(as.numeric(HEIrank11[i,-1]),nclass=12,,main='students/faculty',
xlim=c(0, 21), ylim=c(0,6), xaxt='n', yaxt='n')
axis(1, at=c(0, 10, 20))
axis(2, at=0:6)
}
Do you really want your y-axis to go from 1 to 6? This will cut off parts of the bars.
Also, you iterate over all 20 rows for a grid with 12 plots. The code above gives the following plot: