utils.R 20.6 KB
Newer Older
1
2
#!/usr/bin/Rscript

Valentina Galata's avatar
Valentina Galata committed
3
## IMPORT
4

Valentina Galata's avatar
Valentina Galata committed
5
6
##############################
# INPUT
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24

#' Transform a molten data.frame into a squared data.frame
#' For lists of pairwise comparisons
#' @input df Molten data.frame w/ three columns (two w/ labels and one w/ values)
#' @input col1 Column name containing labels (1)
#' @input col2 Column name containing labels (2)
#' @return a data.frame with labels from two label columns as row and column names
dcast_sq <- function(df, col1, col2){
    # make sure the labels are identical
    testit::assert(all( sort(df[,col1]) == sort(df[,col2]) ))
    # reshape given data.frame using given formula
    df <- reshape2::dcast(df, as.formula(sprintf("%s ~ %s", col1, col2)))
    # use col. w/ tool names as rownames and remove from table
    rownames(df) <- df[,col1]
    df <- df[,setdiff(colnames(df), col1)]
    return(df)
} 

Valentina Galata's avatar
Valentina Galata committed
25
read_nanostats <- function(fname){
26
    print(sprintf("Reading: %s", fname))
Valentina Galata's avatar
Valentina Galata committed
27
28
29
30
31
32
33
34
35
36
37
38
    df <- read.csv(
        file=fname,
        sep="\t",
        header=TRUE,
        check.names=FALSE,
        stringsAsFactors=FALSE
    )
    df_cols <- c("stat"="Statistic", "value"="Value")
    colnames(df) <- df_cols[colnames(df)]
    return(df)
}

39
read_fastp <- function(fname){
40
    print(sprintf("Reading: %s", fname))
41
42
43
44
45
46
47
48
    df <- read.csv(
        file=fname,
        sep="\t",
        header=TRUE,
        row.names=1,
        check.names=FALSE,
        stringsAsFactors=FALSE
    )
49
    df <- df[,c("total_reads", "total_bases", "q20_rate", "q30_rate")]
50
51
52
    return(df)
}

53
read_mappability <- function(fname){
54
    print(sprintf("Reading: %s", fname))
55
56
57
58
    df <- read.csv(
        file=fname,
        sep="\t",
        header=TRUE,
59
60
        check.names=FALSE,
        stringsAsFactors=FALSE
61
62
63
64
65
66
    )
    testit::assert(all(df$tool %in% names(ASM_TOOL_NAMES)))
    df$tool <- ASM_TOOL_NAMES[df$tool]
    return(df)
}

67
read_prodigal <- function(fname){
68
    print(sprintf("Reading: %s", fname))
69
70
71
72
    df <- read.csv(
        file=fname,
        sep="\t",
        header=TRUE,
73
74
        check.names=FALSE,
        stringsAsFactors=FALSE
75
    )
76
77
    testit::assert(all(df$tool %in% names(ASM_TOOL_NAMES)))
    df$tool <- ASM_TOOL_NAMES[df$tool]
78
79
80
    return(df)
}

81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
read_prodigal_gcounts <- function(fname){
    df <- read_prodigal(fname)
    df$partial_pct <- 100 * df$partial / df$total
    return(df)
}

read_prodigal_glength <- function(fname){
    df <- read_prodigal(fname)
    return(df)
}

read_quast <- function(fname){
    print(sprintf("Reading: %s", fname))
    df <- read.csv(
        file=fname,
        sep="\t",
        header=TRUE,
        row.names=1,
        check.names=FALSE,
        stringsAsFactors=FALSE
    )
    testit::assert(all(colnames(df) %in% names(ASM_TOOL_NAMES)))
    colnames(df) <- ASM_TOOL_NAMES[colnames(df)]
    df <- df[QUAST_VARS, ASM_TOOL_NAMES]
    return(df)
}

read_plasflow <- function(fname){
109
110
111
112
113
114
115
116
117
118
    print(sprintf("Reading: %s", fname))
    df <- read.csv(
        file=fname,
        sep="\t",
        header=TRUE,
        stringsAsFactors=FALSE,
        check.names=FALSE
    )
    testit::assert(all(df$tool %in% names(ASM_TOOL_NAMES)))
    df$tool <- ASM_TOOL_NAMES[df$tool]
119
120
    df <- df[df$label %in% names(PLASFLOW_NAMES$labels),]
    df$label <- PLASFLOW_NAMES$labels[df$label]
121
122
123
    return(df)
}

124
read_rgi <- function(fname){
125
    print(sprintf("Reading: %s", fname))
126
127
128
129
130
131
132
    df <- read.csv(
        file=fname,
        sep="\t",
        header=TRUE,
        stringsAsFactors=FALSE,
        check.names=FALSE
    )
133
134
135
    colnames(df) <- sapply(colnames(df), function(x){ ifelse(x %in% names(ASM_TOOL_NAMES), ASM_TOOL_NAMES[x], x) })
    testit::assert(all(df$col %in% names(RGI_NAMES$col)))
    df$col <- RGI_NAMES$col[df$col]
136
137
138
    return(df)
}

139
140
141
142
143
144
145
proc_rgi <- function(df){
    df_melted <- reshape2::melt(df, id.vars=c("label", "col", "type"), variable.name="tool", value.name="count")
    df_aggr   <- df_melted[df_melted$col == "ARO",]
    df_aggr   <- aggregate(df_aggr$count, by=list(tool=df_aggr$tool, type=df_aggr$type), FUN=sum)
    return(list(melted=df_melted, aggr=df_aggr))
}

146
read_barrnap <- function(fname){
147
    print(sprintf("Reading: %s", fname))
148
149
150
151
    df <- read.csv(
        file=fname,
        sep="\t",
        header=TRUE,
152
153
        stringsAsFactors=FALSE,
        check.names=FALSE
154
    )
155
156
    testit::assert(all(df$tool %in% names(ASM_TOOL_NAMES)))
    df$tool <- ASM_TOOL_NAMES[df$tool]
157
158
    testit::assert(all(df$kingdom %in% names(BARRNAP_KINGDOM_NAMES)))
    df$kingdom <- BARRNAP_KINGDOM_NAMES[df$kingdom]
159
160
161
    return(df)
}

162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
proc_barrnap <- function(df){
    # total counts per tool, kingdom and partial/complete categories
    df_total <- aggregate(
        df$count,
        by=list(tool=df$tool, kingdom=df$kingdom, partial=grepl("partial", df$gene)),
        FUN=sum
    )
    # partial: FALSE/TRUE -> label
    df_total$partial <- c("complete", "partial")[df_total$partial + 1]
    # add partial + complete = total
    df_total <- rbind(
        df_total,
        aggregate(
            df_total$x,
            by=list(tool=df_total$tool, kingdom=df_total$kingdom, partial=rep("total", nrow(df_total))),
            FUN=sum
        )
    )
    return(df_total)
}

read_crispr <- function(fname){
184
    print(sprintf("Reading: %s", fname))
185
186
187
188
    df <- read.csv(
        file=fname,
        sep="\t",
        header=TRUE,
189
190
        stringsAsFactors=FALSE,
        check.names=FALSE
191
    )
192
193
194
    # testit::assert(all(df$crispr_tool %in% names(CRISPR_TOOL_NAMES)))
    testit::assert(all(df$tool    %in% names(ASM_TOOL_NAMES)))
    # df$crispr_tool <- CRISPR_TOOL_NAMES[df$crispr_tool]
195
196
197
198
    df$tool <- ASM_TOOL_NAMES[df$tool]
    return(df)
}

199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
aggr_crispr <- function(df){
    df_spacers <- aggregate(
        x=df$spacers,
        by=list(tool=df$tool),
        FUN=sum
    )
    rownames(df_spacers) <- df_spacers$tool
    df_arrays <- aggregate(
        x=df$seq_id,
        by=list(tool=df$tool),
        FUN=length
    )
    rownames(df_arrays) <- df_arrays$tool
    return(list(spacers=df_spacers, arrays=df_arrays))
}

215
read_diamondDB <- function(fname){
216
    print(sprintf("Reading: %s", fname))
217
218
219
220
221
222
223
224
225
    df <- read.csv(
        file=fname,
        sep="\t",
        header=TRUE,
        check.names=FALSE,
        stringsAsFactors=FALSE
    )
    testit::assert(all(df$tool %in% names(ASM_TOOL_NAMES)))
    df$tool <- ASM_TOOL_NAMES[df$tool]
226
227
228
    return(df)
}

229
read_ugenes <- function(fname){
230
    print(sprintf("Reading: %s", fname))
231
232
233
234
235
236
237
    df <- read.csv(
        file=fname,
        sep="\t",
        header=TRUE,
        check.names=FALSE,
        stringsAsFactors=FALSE
    )
238
239
240
    df$uniq_pct                 <- 100 * df$uniq / df$total
    df$highcovuniq_pct_total    <- 100 * df$highcovuniq / df$total
    df$highcovuniq_pct_uniq     <- 100 * df$highcovuniq / df$uniq
241
242
    testit::assert(all(df$tool1 %in% names(ASM_TOOL_NAMES)))
    testit::assert(all(df$tool2 %in% names(ASM_TOOL_NAMES)))
243
    df$tool1 <- ASM_TOOL_NAMES[df$tool1]
244
245
246
247
    df$tool2 <- ASM_TOOL_NAMES[df$tool2]
    return(df)
}

248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
read_fastani_many2many <- function(fname){
    print(sprintf("Reading: %s", fname))
    proc_name <- function(x){
        x <- basename(dirname(x))
        testit::assert(x %in% names(ASM_TOOL_NAMES))
        return(ASM_TOOL_NAMES[x])
    }
    dm <- read.csv(file=fname, sep='\t', header=FALSE, check.names=FALSE, stringsAsFactors=FALSE, col.names=c("tool1", "tool2", "ani", "mappings", "queries"))
    dm <- dcast_sq(df=dm[,c("tool1", "tool2", "ani")], col1="tool1", col2="tool2")
    # proc. names
    colnames(dm) <- sapply(colnames(dm), proc_name)
    rownames(dm) <- sapply(rownames(dm), proc_name)
    return(dm)
}

read_mummer_dnadiff <- function(fname){
    print(sprintf("Reading: %s", fname))
    df <- read.csv(file=fname, sep='\t', header=TRUE, check.names=FALSE, stringsAsFactors=FALSE)
    df$seqs_pct  <- 100 * df$seqs_aligned  / df$seqs_total
    df$bases_pct <- 100 * df$bases_aligned / df$bases_total
    testit::assert(all(df$tool1 %in% names(ASM_TOOL_NAMES)))
    testit::assert(all(df$tool2 %in% names(ASM_TOOL_NAMES)))
    df$tool1 <- ASM_TOOL_NAMES[df$tool1]
    df$tool2 <- ASM_TOOL_NAMES[df$tool2]
    return(df)
}

275
read_mash_dist_reads <- function(fname){
276
    print(sprintf("Reading: %s", fname))
277
    proc_name <- function(x){
278
279
280
281
282
        rtype <- basename(dirname(x))
        mtype <- basename(dirname(dirname(x)))
        testit::assert(rtype %in% names(READ_TYPES))
        testit::assert(mtype %in% names(META_TYPES))
        return(sprintf("%s %s", META_TYPES[mtype], READ_TYPES[rtype]))
283
284
285
286
287
288
289
    }
    dm <- read.csv(file=fname, sep='\t', header=TRUE, row.names=1, check.names=FALSE)
    colnames(dm) <- sapply(colnames(dm), proc_name)
    rownames(dm) <- sapply(rownames(dm), proc_name)
    return(dm)
}

290
read_mash_dist_asm <- function(fname){
291
    print(sprintf("Reading: %s", fname))
292
293
294
295
296
297
298
299
300
301
    proc_name <- function(x){
        x <- basename(dirname(x))
        testit::assert(x %in% names(ASM_TOOL_NAMES))
        return(ASM_TOOL_NAMES[x])
    }
    dm <- read.csv(file=fname, sep='\t', header=TRUE, row.names=1, check.names=FALSE)
    colnames(dm) <- sapply(colnames(dm), proc_name)
    rownames(dm) <- sapply(rownames(dm), proc_name)
    return(dm)
}
302

303
##############################
304
305
# PLOTS

306
307
308
309
310
311
312
set_tool_order <- function(df, cols){
    for(cname in cols){
        df[,cname] <- factor(df[,cname], ordered=TRUE, levels=ASM_TOOL_NAMES)
    }
    return(df)
}

313
plot_mappability <- function(df){
314
    df <- set_tool_order(df, c("tool"))
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
    pp <- 
        ggplot(data=df, aes(x=tool, y=value, fill=tool)) +
        geom_col() +
        geom_text(aes(label=value, y=0.5*value), color="black", size=4, angle=90) +
        scale_fill_manual(values=ASM_TOOL_COLORS, guide=NULL) +
        facet_wrap(vars(minlength), ncol=2, scales="fixed") +
        labs(
            x="",
            y="Mapped reads [%]"
        ) +
        mappability_theme
    return(pp)
}

plot_prodigal <- function(df_gc, df_gl){
    df_gcm <- reshape2::melt(df_gc[,c("tool", "total", "partial")], id.vars=c("tool"))
331
332
333
    df_gc  <- set_tool_order(df_gc,  c("tool"))
    df_gl  <- set_tool_order(df_gl,  c("tool"))
    df_gcm <- set_tool_order(df_gcm, c("tool"))
334
335
336
    pp1 <-
        ggplot(data=df_gcm, aes(x=variable, y=value, fill=tool)) +
        geom_col(position="dodge") +
337
        scale_fill_manual(values=ASM_TOOL_COLORS) +
338
339
340
341
342
343
344
345
346
        labs(
            x="",
            y="Gene count"
        ) +
        prodigal_theme
    
    pp2 <-
        ggplot(data=df_gc, aes(x=tool, y=partial_pct, fill=tool)) +
        geom_col() +
347
        scale_fill_manual(values=ASM_TOOL_COLORS, guide=NULL) +
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
        labs(
            x="",
            y="Percentage of partial genes"
        ) +
        prodigal_theme
    
    pp3 <-
        ggplot(data=df_gl, aes(x=tool, y=gene_length, fill=tool)) +
        geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
        scale_fill_manual(values=ASM_TOOL_COLORS, guide=NULL) +
        labs(
            x="",
            y="Gene length [bp]"
        ) +
        diamondDB_theme
    
    pp4 <- pp3 + coord_cartesian(ylim=c(0, 2000))

    return(list(
        gcounts=pp1,
        gpct=pp2,
        glen=pp3,
        glen_zoom=pp4
    ))
}

plot_barrnap <- function(df){
375
    df <- set_tool_order(df, c("tool"))
376
377
378
379
380
381
382
383
384
385
386
387
388
389
    pp <-
        ggplot(data=df, aes(x=tool, y=x, fill=tool)) +
        geom_col() +
        scale_fill_manual(values=ASM_TOOL_COLORS, guide=NULL) +
        facet_grid(vars(partial), vars(kingdom), scales="fixed") +
        labs(
            x="",
            y=sprintf("rRNA gene hits")
        ) +
        barrnap_theme
    return(pp)
}

plot_barrnap_genes <- function(df, subtitle){
390
    df <- set_tool_order(df, c("tool"))
391
392
393
394
395
396
397
398
399
400
401
402
403
404
    pp <-
        ggplot(data=df, aes(x=tool, y=count, fill=tool)) +
        geom_col() +
        scale_fill_manual(values=ASM_TOOL_COLORS, guide=NULL) +
        facet_wrap(vars(gene), ncol=1, scales="free_y") +
        labs(
            subtitle=subtitle,
            x="",
            y="rRNA gene hits"
        ) +
        barrnap_theme
    return(pp)
}

405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
plot_rgi_counts_total <- function(df){
    df <- set_tool_order(df, "tool")
    pp <-
        ggplot(data=df, aes(x=tool, y=x, fill=tool)) +
        geom_col() +
        scale_fill_manual(values=ASM_TOOL_COLORS, guide=NULL) +
        facet_wrap(vars(type), ncol=3) +
        labs(
            x="",
            y="Total hits"
        ) +
        rgi_theme
    return(pp)
}

plot_rgi_counts <- function(df, ctype, col){
    df_ <- df[df$type == ctype & df$col == col,]
    df_ <- set_tool_order(df_, c("tool"))
    pp <-
        ggplot(data=df_, aes(x=tool, y=count, fill=tool)) +
        geom_col() +
        scale_fill_manual(values=ASM_TOOL_COLORS, guide=NULL) +
        facet_wrap(vars(label), ncol=round(sqrt(length(unique(df_$label)))), scales="free_y") +
        labs(
            x="",
            y=sprintf("%s (%s hits)", col, ctype)
        ) +
        rgi_theme
    return(pp)
}

436
437
438
439
440
441
442
443
444
plot_rgi_overlap <- function(df, ctype, col){
    df <- df[df$type == ctype & df$col == col,]
    df_list <- lapply(ASM_TOOL_NAMES, function(x){ df[df[,x] > 0,"label"] })
    names(df_list) <- ASM_TOOL_NAMES[names(df_list)]
    UpSetR::upset(
        data=UpSetR::fromList(df_list),
        # overlap order
        order.by="degree",
        decreasing=FALSE,
445
446
        # number of sets to plot
        nsets=length(ASM_TOOL_NAMES),
447
448
449
        # y-label title
        mainbar.y.label=sprintf("Intersection size (%s hits, %s)", ctype, col),
        # text size
450
        text.scale = c(1.2, 1.2, 1.2, 1.2, 1.2, 1.2)#,
451
        # colors
452
453
454
455
456
457
458
459
        # set.metadata=list(
        #     data=data.frame(
        #         sets=names(df_list),
        #         Tool=names(df_list),
        #         stringsAsFactors=FALSE
        #     ), 
        #     plots=list(list(type="matrix_rows", column="Tool", colors=ASM_TOOL_COLORS, alpha=0.7))
        # )
460
461
462
    )
}

463
464
plot_crispr <- function(df){
    df_m <- reshape2::melt(df, id.vars="tool")
465
    df_m <- set_tool_order(df_m, c("tool"))
466
    pp <-
467
        ggplot(data=df_m, aes(x=tool, y=value, fill=tool)) +
468
469
        geom_col(position="dodge") +
        scale_fill_manual(values=ASM_TOOL_COLORS, guide=NULL) +
470
        facet_wrap(vars(variable), ncol=1, scales="free_y") +
471
472
        labs(
            x="",
473
            y="Number of features"
474
475
476
477
478
479
        ) +
        crispr_theme
    return(pp)
}

plot_plasflow <- function(df, ylab=""){
480
    df <- set_tool_order(df, c("tool"))
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
    pp <-
        ggplot(data=df, aes_string(x="tool", y="value", fill="label")) +
        geom_col(position="dodge") +
        scale_fill_manual(values=PLASFLOW_COLORS$labels, guide="legend") +
        labs(
            x="",
            y=ylab
        ) +
        plasflow_theme
    return(pp)
}

plot_quast <- function(df){
    df_m <- reshape2::melt(
        cbind(stat_vars=rownames(df), df),
        id.vars="stat_vars"
497
    )
498
    df_m <- set_tool_order(df_m, c("variable"))
499
500
501
502
503
504
505
506
507
508
509
    pp <-
        ggplot(data=df_m, aes(x=variable, y=value)) +
        geom_col(aes(fill=variable)) +
        scale_fill_manual(values=ASM_TOOL_COLORS, guide=NULL) +
        facet_wrap(vars(stat_vars), ncol=2, scales="free_y") +
        labs(
            x="",
            y="QUAST statistic"
        ) +
        quast_theme
    return(pp)
510
511
}

512
plot_ugenes_barplots <- function(df, ycol, ylab="", subtitle=""){
513
    df <- set_tool_order(df, c("tool1", "tool2"))
514
    pp <-
515
        ggplot(data=df, aes_string(x="tool2", y=ycol, fill="tool2")) +
516
517
518
519
        geom_col() +
        facet_wrap(vars(tool1), ncol=1) +
        scale_fill_manual(values=ASM_TOOL_COLORS, guide=NULL) +
        labs(
520
            subtitle=subtitle,
521
            x="Assembly 2",
522
            y=ylab
523
        ) +
524
        ugenes_theme
525
526
527
    return(pp)
}

528
plot_ugenes_scatterplot <- function(df, subtitle=""){
529
    df <- set_tool_order(df, c("tool1", "tool2"))
530
    pp <-
531
        ggplot(data=df, aes(x=uniq_pct, y=highcovuniq_pct_uniq, fill=tool2, shape=tool2)) +
532
        geom_point(colour="white", size=6) +
533
534
        scale_fill_manual(values=ASM_TOOL_COLORS, name="") +
        scale_shape_manual(values=ASM_TOOL_SHAPES, name="") +
535
        labs(
536
            subtitle=subtitle,
537
            x="Unique proteins [%, total]",
538
            y="Unique proteins w/ high mean cov. [% of unique]"
539
        ) +
540
541
542
543
544
        default_theme +
        theme(
            legend.position="bottom",
            legend.direction="horizontal"
        )
545
546
547
    return(pp)
}

548
plot_diamondDB_density <- function(df, col, xlim=NULL){
549
    df <- set_tool_order(df, c("tool"))
550
551
552
553
554
    pp <-
        ggplot(data=df, aes_string(x=col, colour="tool", fill="tool")) +
        geom_density(alpha=0.2) +
        scale_colour_manual(values=ASM_TOOL_COLORS, guide=NULL) +
        scale_fill_manual(values=ASM_TOOL_COLORS, guide=NULL) +
555
        facet_wrap(vars(tool), nrow=4, scales="fixed") +
556
        labs(
557
            subtitle=ifelse(!is.null(xlim), "zoomed in", ""),
558
559
560
            x=DIAMOND_VAR_LABELLER(col),
            y="Density"
        ) +
561
562
563
564
565
566
567
568
        diamondDB_theme
    if(!is.null(xlim)){
        pp <- pp + coord_cartesian(xlim=xlim)
    }
    return(pp)
}

plot_diamondDB_density2d <- function(df){
569
    df <- set_tool_order(df, c("tool"))
570
571
572
573
    pp <-
        ggplot(data=df, aes(x=qcov, y=scov)) +
        geom_bin2d(bins=25) +
        scale_fill_continuous(type="viridis") +
574
        facet_wrap(vars(tool), ncol=3, nrow=3) +
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
        labs(
            x=DIAMOND_VAR_LABELLER("qcov"),
            y=DIAMOND_VAR_LABELLER("scov")
        ) +
        diamondDB_theme
    return(pp)
}

#' Contig coverage and segmentation plot
#' @input df_cov Coverage data.frame incl. contig ID, base, coverage and state
#' @input cid Contig ID
#' @return ggplot2 object
plot_contig_cov_seg <- function(df_cov, cid){
    # subset cov.
    df_cov <- df_cov[df_cov$contig==cid,]
    # segments/states medians
    df_cov$state_median <- NA
    for(s in unique(df_cov$state)){
        df_cov$state_median[df_cov$state == s] <- median(df_cov[df_cov$state==s, "cov"])
    }
    # plot
    df_plot <-
        ggplot(data=df_cov, aes(x=base, y=cov)) +
        geom_line(colour="#666666") +
        geom_line(aes(x=base, y=state_median), colour="#0066CC", size=2) +
        scale_x_continuous(breaks=seq(0, max(df_cov$base), by=5000)) +
        scale_y_log10(breaks=trans_breaks("log10", function(x) 10^x), labels=trans_format("log10", math_format(10^.x))) +
        labs(
            title=cid,
            x="base",
            y="coverage"
        ) +
        theme_bw() +
        theme(
            axis.text.x=element_text(angle=90, vjust=0.5, hjust=1)
        )
    return(df_plot)
}

#' Coverage and segmentation scatter plot
#' @input df_states Coverage segmentation summary data.frame
#' @input title Title
#' @input subtitle Sub-title
#' @return ggplot object
plot_contig_cov_seg_scatterplot <- function(df_states, title="", subtitle=""){
    pp <-
        ggplot(data=df_states[df_states$states > 1,], aes(x=length, y=mean)) +
        geom_point(data=df_states[df_states$states < 2,], aes(x=length, y=mean), shape=4, color="#CCCCCC", size=0.5) +
        geom_point(aes(size=states_median_sd, fill=as.factor(states), color=as.factor(states)), shape=21, alpha=0.75) +
        scale_x_log10(
            breaks=trans_breaks("log10", function(x) 10^x),
            labels=trans_format("log10", math_format(10^.x))
        ) +
        scale_y_log10(
            breaks=trans_breaks("log10", function(x) 10^x),
            labels=trans_format("log10", math_format(10^.x))
        ) +
        labs(
            title=title, #"Contig coverage segmentation",
            subtitle=subtitle, #"Flye + Racon(metaG SR)",
            fill="Number of states",
            color="Number of states",
            size="State median SD",
            x="Contig length [bp]",
            y="Mean contig coverage"
        ) +
        theme_bw()
642
643
    return(pp)
}
Valentina Galata's avatar
Valentina Galata committed
644

645
646
647
648
649
650
# THEMES
default_theme <-
    # theme_bw() +
    theme_minimal(
        base_size=12
    ) +
Valentina Galata's avatar
Valentina Galata committed
651
    theme(
652
653
        plot.title=element_text(size=14, face="bold"),
        plot.subtitle=element_text(size=12, face="italic"),
Valentina Galata's avatar
Valentina Galata committed
654
655
        # legend
        legend.title=element_blank(),
656
        legend.text=element_text(size=12),
Valentina Galata's avatar
Valentina Galata committed
657
658
659
660
        # grid
        panel.grid=element_blank(),
        # strip
        strip.background=element_rect(fill="white"),
661
        strip.text=element_text(size=12),
Valentina Galata's avatar
Valentina Galata committed
662
        # axes
663
664
665
        axis.title=element_text(size=12, color="black"),
        axis.text.y=element_text(size=12, color="black"),
        axis.text.x=element_text(size=12, color="black")
Valentina Galata's avatar
Valentina Galata committed
666
667
    )

668
default_theme_axis_text_x <-
Valentina Galata's avatar
Valentina Galata committed
669
    theme(
670
        axis.text.x=element_text(size=12, color="black", angle=90, vjust=0.5, hjust=1)
Valentina Galata's avatar
Valentina Galata committed
671
672
    )

673
mappability_theme <- default_theme + default_theme_axis_text_x
Valentina Galata's avatar
Valentina Galata committed
674

675
crispr_theme <- default_theme + default_theme_axis_text_x
Valentina Galata's avatar
Valentina Galata committed
676

677
plasflow_theme <- default_theme + default_theme_axis_text_x
Valentina Galata's avatar
Valentina Galata committed
678

679
prodigal_theme <- default_theme + default_theme_axis_text_x
680

681
diamondDB_theme <- default_theme + default_theme_axis_text_x
Valentina Galata's avatar
Valentina Galata committed
682

683
684
685
686
687
688
rgi_theme <- default_theme + default_theme_axis_text_x

barrnap_theme <- default_theme + default_theme_axis_text_x

quast_theme <- default_theme + default_theme_axis_text_x

689
ugenes_theme <- default_theme + default_theme_axis_text_x