WORKFLOW
sarscov2_nextstrain
File Path
pipes/WDL/workflows/sarscov2_nextstrain.wdl
WDL Version
1.0
Type
workflow
Imports
Namespace
Path
nextstrain
../tasks/tasks_nextstrain.wdl
utils
../tasks/tasks_utils.wdl
sarscov2_nextstrain_aligned_input
sarscov2_nextstrain_aligned_input.wdl
Workflow: sarscov2_nextstrain
Align assemblies, build trees, and convert to json representation suitable for Nextstrain visualization. See https://nextstrain.org/docs/getting-started/ and https://nextstrain-augur.readthedocs.io/en/stable/
Author: Broad Viral Genomics
Name
Type
Description
Default
ref_fasta
File?
A reference assembly (not included in assembly_fastas) to align assembly_fastas against. Typically from NCBI RefSeq or similar.
-
build_name
String
-
-
builds_yaml
File
-
-
ancestral_traits_to_infer
Array[String]?
-
-
auspice_config
File?
-
-
clades_tsv
File?
-
-
lat_longs_tsv
File?
-
-
lab_highlight_loc
String?
-
-
parameters_yaml
File?
-
-
keep_list
File?
-
-
drop_list
File?
-
-
mask_bed
File?
-
-
exclude_sites
File?
-
-
vcf_reference
File?
-
-
tree_builder_args
String?
-
-
gen_per_year
Int?
-
-
clock_rate
Float?
-
-
clock_std_dev
Float?
-
-
covariance
Boolean?
-
-
precision
Int?
-
-
branch_length_inference
String?
-
-
coalescent
String?
-
-
vcf_reference
File?
-
-
weights
File?
-
-
sampling_bias_correction
Float?
-
-
max_date
Float?
-
-
wide_bandwidth
Float?
-
-
minimal_frequency
Float?
-
-
stiffness
Float?
-
-
inertia
Float?
-
-
vcf_reference
File?
-
-
root_sequence
File?
-
-
output_vcf
File?
-
-
genes
File?
-
-
vcf_reference_output
File?
-
-
vcf_reference
File?
-
-
colors_tsv
File?
-
-
geo_resolutions
Array[String]?
-
-
color_by_metadata
Array[String]?
-
-
description_md
File?
-
-
maintainers
Array[String]?
-
-
title
String?
-
-
75 optional inputs with default values
assembly_fastas
Array[File]+
Set of assembled genomes to align and build trees. These must represent a single chromosome/segment of a genome only. Fastas may be one-sequence-per-individual or a concatenated multi-fasta (unaligned) or a mixture of the two. They may be compressed (gz, bz2, zst, lz4), uncompressed, or a mixture.
["gs://nextstrain-data/files/ncov/open/sequences.fasta.zst"]
sample_metadata_tsvs
Array[File]+
Tab-separated metadata file that contain binning variables and values. Must contain all samples: output will be filtered to the IDs present in this file.
["gs://nextstrain-data/files/ncov/open/metadata.tsv.gz"]
min_unambig_genome
Int
Minimum number of called bases in genome to pass prefilter.
27000
tree_root_seq_id
String
-
"Wuhan-Hu-1/2019"
nextstrain_ncov_repo_commit
String
-
"30435fb9ec8de2f045167fb90adfec12f123e80a"
docker
String
-
"docker.io/nextstrain/base:build-20240318T173028Z"
disk_size
Int
-
50
cpus
Int
-
4
error_on_seq_diff
Boolean
-
false
nextstrain_ncov_repo_commit
String
-
"30435fb9ec8de2f045167fb90adfec12f123e80a"
docker
String
-
"docker.io/nextstrain/base:build-20240318T173028Z"
disk_size
Int
-
750
docker
String
-
"quay.io/broadinstitute/viral-core:2.5.1"
disk_size
Int
-
750
remove_reference
Boolean
-
false
batch_chunk_size
Int
-
2000
threads_per_job
Int
-
2
docker
String
-
"quay.io/broadinstitute/viral-phylo:2.5.1.0"
mem_size
Int
-
32
cpus
Int
-
64
disk_size
Int
-
750
out_suffix
String
-
".txt"
prefer_first
Boolean
-
true
table_map
Array[File]
-
[]
docker
String
-
"quay.io/broadinstitute/viral-core:2.5.1"
disk_size
Int
-
50
machine_mem_gb
Int
-
50
docker
String
-
"docker.io/nextstrain/base:build-20240318T173028Z"
nextstrain_ncov_repo_commit
String
-
"30435fb9ec8de2f045167fb90adfec12f123e80a"
disk_size
Int
-
750
allow_wildcard_bases
Boolean
-
true
docker
String
-
"quay.io/biocontainers/snp-sites:2.5.1--hed695b0_0"
disk_size
Int
-
750
docker
String
-
"docker.io/nextstrain/base:build-20240318T173028Z"
disk_size
Int
-
750
method
String
-
"iqtree"
substitution_model
String
-
"GTR"
cpus
Int
-
64
machine_mem_gb
Int
-
32
docker
String
-
"docker.io/nextstrain/base:build-20240318T173028Z"
disk_size
Int
-
1250
generate_timetree
Boolean
-
true
keep_root
Boolean
-
true
keep_polytomies
Boolean
-
false
date_confidence
Boolean
-
true
date_inference
String?
-
"marginal"
clock_filter_iqd
Int?
-
4
divergence_units
String?
-
"mutations"
docker
String
-
"docker.io/nextstrain/base:build-20240318T173028Z"
disk_size
Int
-
750
machine_mem_gb
Int
-
75
confidence
Boolean
-
true
machine_mem_gb
Int
-
32
docker
String
-
"docker.io/nextstrain/base:build-20240318T173028Z"
disk_size
Int
-
750
method
String
-
"kde"
censored
Boolean
-
false
include_internal_nodes
Boolean
-
false
machine_mem_gb
Int
-
64
docker
String
-
"docker.io/nextstrain/base:build-20240318T173028Z"
disk_size
Int
-
200
inference
String
-
"joint"
keep_ambiguous
Boolean
-
false
infer_ambiguous
Boolean
-
false
keep_overhangs
Boolean
-
false
docker
String
-
"docker.io/nextstrain/base:build-20240318T173028Z"
disk_size
Int
-
300
docker
String
-
"docker.io/nextstrain/base:build-20240318T173028Z"
disk_size
Int
-
300
docker
String
-
"docker.io/nextstrain/base:build-20240318T173028Z"
disk_size
Int
-
300
include_root_sequence
Boolean
-
true
machine_mem_gb
Int
-
64
docker
String
-
"docker.io/nextstrain/base:build-20240318T173028Z"
disk_size
Int
-
300
Outputs
Name
Type
Expression
combined_assemblies
File
zcat.combined
multiple_alignment
File
mafft.aligned_sequences
unmasked_snps
File
snp_sites.snps_vcf
metadata_merged
File
derived_cols.derived_metadata
keep_list
File
fasta_to_ids.ids_txt
subsampled_sequences
File
subsample.subsampled_msa
masked_alignment
File
augur_mask_sites.masked_sequences
sequences_kept
Int
subsample.sequences_out
counts_by_group
Map[String,Int]
subsample.counts_by_group
ml_tree
File
draft_augur_tree.aligned_tree
time_tree
File
refine_augur_tree.tree_refined
node_data_jsons
Array[File]
select_all([refine_augur_tree.branch_lengths, ancestral_traits.node_data_json, ancestral_tree.nt_muts_json, translate_augur_tree.aa_muts_json, assign_clades_to_nodes.node_clade_data_json])
tip_frequencies_json
File
tip_frequencies.node_data_json
root_sequence_json
File
export_auspice_json.root_sequence_json
auspice_input_json
File
export_auspice_json.virus_json
Calls
This workflow calls the following tasks or subworkflows:
No explicit input mappings
Input Mappings (2)
Input
Value
infiles
assembly_fastas
output_name
"all_samples_combined_assembly.fasta.zst"
CALL
TASKS
dedup_seqs
↗
→ nextstrain_deduplicate_sequences
Input Mappings (1)
Input
Value
sequences_fasta
zcat.combined
Input Mappings (2)
Input
Value
sequences_fasta
dedup_seqs.sequences_deduplicated_fasta
min_non_N
min_unambig_genome
CALL
TASKS
mafft
↗
→ mafft_one_chr_chunked
Input Mappings (3)
Input
Value
sequences
filter_sequences_by_length.filtered_fasta
ref_fasta
select_first([ref_fasta, nextstrain_ncov_defaults.reference_fasta])
basename
"all_samples_aligned.fasta"
Input Mappings (4)
Input
Value
input_tsvs
sample_metadata_tsvs
id_col
'strain'
out_basename
"metadata-merged"
machine_mem_gb
30
Input Mappings (1)
Input
Value
metadata_tsv
select_first(flatten([[tsv_join.out_tsv], sample_metadata_tsvs]))
CALL
TASKS
subsample
↗
→ nextstrain_build_subsample
Input Mappings (4)
Input
Value
alignment_msa_fasta
mafft.aligned_sequences
sample_metadata_tsv
derived_cols.derived_metadata
build_name
build_name
builds_yaml
builds_yaml
Input Mappings (1)
Input
Value
sequences_fasta
subsample.subsampled_msa
Input Mappings (1)
Input
Value
msa_fasta
subsample.subsampled_msa
Input Mappings (1)
Input
Value
sequences
subsample.subsampled_msa
Input Mappings (1)
Input
Value
msa_or_vcf
augur_mask_sites.masked_sequences
Input Mappings (4)
Input
Value
raw_tree
draft_augur_tree.aligned_tree
msa_or_vcf
augur_mask_sites.masked_sequences
metadata
derived_cols.derived_metadata
root
tree_root_seq_id
Input Mappings (3)
Input
Value
tree
refine_augur_tree.tree_refined
metadata
derived_cols.derived_metadata
columns
select_first([ancestral_traits_to_infer, []])
Input Mappings (8)
Input
Value
tree
refine_augur_tree.tree_refined
metadata
derived_cols.derived_metadata
min_date
2020.0
pivot_interval
1
pivot_interval_units
"weeks"
narrow_bandwidth
0.05
proportion_wide
0.0
out_basename
"auspice-~{build_name}"
Input Mappings (2)
Input
Value
tree
refine_augur_tree.tree_refined
msa_or_vcf
augur_mask_sites.masked_sequences
Input Mappings (3)
Input
Value
tree
refine_augur_tree.tree_refined
nt_muts
ancestral_tree.nt_muts_json
genbank_gb
nextstrain_ncov_defaults.reference_gb
Input Mappings (5)
Input
Value
tree_nwk
refine_augur_tree.tree_refined
nt_muts_json
ancestral_tree.nt_muts_json
aa_muts_json
translate_augur_tree.aa_muts_json
ref_fasta
select_first([ref_fasta, nextstrain_ncov_defaults.reference_fasta])
clades_tsv
select_first([clades_tsv, nextstrain_ncov_defaults.clades_tsv])
Input Mappings (6)
Input
Value
tree
refine_augur_tree.tree_refined
sample_metadata
derived_cols.derived_metadata
lat_longs_tsv
select_first([lat_longs_tsv, nextstrain_ncov_defaults.lat_longs_tsv])
node_data_jsons
select_all([refine_augur_tree.branch_lengths, ancestral_traits.node_data_json, ancestral_tree.nt_muts_json, translate_augur_tree.aa_muts_json, assign_clades_to_nodes.node_clade_data_json])
auspice_config
select_first([auspice_config, nextstrain_ncov_defaults.auspice_config])
out_basename
"auspice-~{build_name}"
Images
Container images used by tasks in this workflow:
⚙️ Parameterized
Configured via input:
docker
Used by 12 tasks:
nextstrain_ncov_defaults
dedup_seqs
subsample
augur_mask_sites
draft_augur_tree
refine_augur_tree
tip_frequencies
ancestral_tree
translate_augur_tree
assign_clades_to_nodes
export_auspice_json
ancestral_traits
quay.io/broadinstitute/viral-core:2.5.1
Used by 4 tasks:
zcat
filter_sequences_by_length
derived_cols
tsv_join
⚙️ Parameterized
Configured via input:
docker
⚙️ Parameterized
Configured via input:
docker
Zoom In
Zoom Out
Fit
Reset
🖱️ Scroll to zoom • Drag to pan • Double-click to reset • ESC to close
flowchart TD
Start([sarscov2_nextstrain])
N1["nextstrain_ncov_defaults"]
N2["zcat"]
N3["dedup_seqsnextstrain_deduplicate_sequences "]
N4["filter_sequences_by_length"]
N5["mafftmafft_one_chr_chunked "]
subgraph C1 ["↔️ if length(sample_metadata_tsvs) > 1"]
direction TB
N6["tsv_join"]
end
N7["derived_cols"]
N8["subsamplenextstrain_build_subsample "]
N9["fasta_to_ids"]
N10["snp_sites"]
N11["augur_mask_sites"]
N12["draft_augur_tree"]
N13["refine_augur_tree"]
subgraph C2 ["↔️ if defined(ancestral_traits_to_infer) && length(select_first([ancestral_traits_to_infer, []])) > 0"]
direction TB
N14["ancestral_traits"]
end
N15["tip_frequencies"]
N16["ancestral_tree"]
N17["translate_augur_tree"]
N18["assign_clades_to_nodes"]
N19["export_auspice_json"]
N2 --> N3
N3 --> N4
N1 --> N5
N4 --> N5
N6 --> N7
N7 --> N8
N5 --> N8
N8 --> N9
N8 --> N10
N8 --> N11
N11 --> N12
N7 --> N13
N11 --> N13
N12 --> N13
N7 --> N14
N13 --> N14
N7 --> N15
N13 --> N15
N11 --> N16
N13 --> N16
N16 --> N17
N13 --> N17
N1 --> N17
N13 --> N18
N17 --> N18
N16 --> N18
N1 --> N18
N7 --> N19
N18 --> N19
N1 --> N19
N14 --> N19
N13 --> N19
N17 --> N19
N16 --> N19
Start --> N1
Start --> N2
Start --> N6
N19 --> End([End])
N15 --> End([End])
N9 --> End([End])
N10 --> End([End])
classDef taskNode fill:#a371f7,stroke:#8b5cf6,stroke-width:2px,color:#fff
classDef workflowNode fill:#58a6ff,stroke:#1f6feb,stroke-width:2px,color:#fff
version 1.0
#DX_SKIP_WORKFLOW
import "../tasks/tasks_nextstrain.wdl" as nextstrain
import "../tasks/tasks_utils.wdl" as utils
import "sarscov2_nextstrain_aligned_input.wdl"
workflow sarscov2_nextstrain {
meta {
description: "Align assemblies, build trees, and convert to json representation suitable for Nextstrain visualization. See https://nextstrain.org/docs/getting-started/ and https://nextstrain-augur.readthedocs.io/en/stable/"
author: "Broad Viral Genomics"
email: "viral-ngs@broadinstitute.org"
allowNestedInputs: true
}
input {
Array[File]+ assembly_fastas=["gs://nextstrain-data/files/ncov/open/sequences.fasta.zst"]
Array[File]+ sample_metadata_tsvs=["gs://nextstrain-data/files/ncov/open/metadata.tsv.gz"]
File? ref_fasta
Int min_unambig_genome = 27000
String tree_root_seq_id = "Wuhan-Hu-1/2019"
String build_name
File builds_yaml
Array[String]? ancestral_traits_to_infer
File? auspice_config
File? clades_tsv
File? lat_longs_tsv
}
parameter_meta {
assembly_fastas: {
description: "Set of assembled genomes to align and build trees. These must represent a single chromosome/segment of a genome only. Fastas may be one-sequence-per-individual or a concatenated multi-fasta (unaligned) or a mixture of the two. They may be compressed (gz, bz2, zst, lz4), uncompressed, or a mixture.",
patterns: ["*.fasta", "*.fa", "*.fasta.gz", "*.fasta.zst"]
}
sample_metadata_tsvs: {
description: "Tab-separated metadata file that contain binning variables and values. Must contain all samples: output will be filtered to the IDs present in this file.",
patterns: ["*.txt", "*.tsv"]
}
ref_fasta: {
description: "A reference assembly (not included in assembly_fastas) to align assembly_fastas against. Typically from NCBI RefSeq or similar.",
patterns: ["*.fasta", "*.fa"]
}
min_unambig_genome: {
description: "Minimum number of called bases in genome to pass prefilter."
}
}
call nextstrain.nextstrain_ncov_defaults
#### mafft_and_snp
call utils.zcat {
input:
infiles = assembly_fastas,
output_name = "all_samples_combined_assembly.fasta.zst"
}
call nextstrain.nextstrain_deduplicate_sequences as dedup_seqs {
input:
sequences_fasta = zcat.combined
}
call utils.filter_sequences_by_length {
input:
sequences_fasta = dedup_seqs.sequences_deduplicated_fasta,
min_non_N = min_unambig_genome
}
call nextstrain.mafft_one_chr_chunked as mafft {
input:
sequences = filter_sequences_by_length.filtered_fasta,
ref_fasta = select_first([ref_fasta, nextstrain_ncov_defaults.reference_fasta]),
basename = "all_samples_aligned.fasta"
}
#### merge metadata, compute derived cols
if(length(sample_metadata_tsvs)>1) {
call utils.tsv_join {
input:
input_tsvs = sample_metadata_tsvs,
id_col = 'strain',
out_basename = "metadata-merged",
machine_mem_gb = 30
}
}
call nextstrain.derived_cols {
input:
metadata_tsv = select_first(flatten([[tsv_join.out_tsv], sample_metadata_tsvs]))
}
#### subsample sequences with nextstrain yaml file
call nextstrain.nextstrain_build_subsample as subsample {
input:
alignment_msa_fasta = mafft.aligned_sequences,
sample_metadata_tsv = derived_cols.derived_metadata,
build_name = build_name,
builds_yaml = builds_yaml
}
call utils.fasta_to_ids {
input:
sequences_fasta = subsample.subsampled_msa
}
call nextstrain.snp_sites {
input:
msa_fasta = subsample.subsampled_msa
}
#### augur_from_msa
call nextstrain.augur_mask_sites {
input:
sequences = subsample.subsampled_msa
}
call nextstrain.draft_augur_tree {
input:
msa_or_vcf = augur_mask_sites.masked_sequences
}
call nextstrain.refine_augur_tree {
input:
raw_tree = draft_augur_tree.aligned_tree,
msa_or_vcf = augur_mask_sites.masked_sequences,
metadata = derived_cols.derived_metadata,
root = tree_root_seq_id
}
if(defined(ancestral_traits_to_infer) && length(select_first([ancestral_traits_to_infer,[]]))>0) {
call nextstrain.ancestral_traits {
input:
tree = refine_augur_tree.tree_refined,
metadata = derived_cols.derived_metadata,
columns = select_first([ancestral_traits_to_infer,[]])
}
}
call nextstrain.tip_frequencies {
input:
tree = refine_augur_tree.tree_refined,
metadata = derived_cols.derived_metadata,
min_date = 2020.0,
pivot_interval = 1,
pivot_interval_units = "weeks",
narrow_bandwidth = 0.05,
proportion_wide = 0.0,
out_basename = "auspice-~{build_name}"
}
call nextstrain.ancestral_tree {
input:
tree = refine_augur_tree.tree_refined,
msa_or_vcf = augur_mask_sites.masked_sequences
}
call nextstrain.translate_augur_tree {
input:
tree = refine_augur_tree.tree_refined,
nt_muts = ancestral_tree.nt_muts_json,
genbank_gb = nextstrain_ncov_defaults.reference_gb
}
call nextstrain.assign_clades_to_nodes {
input:
tree_nwk = refine_augur_tree.tree_refined,
nt_muts_json = ancestral_tree.nt_muts_json,
aa_muts_json = translate_augur_tree.aa_muts_json,
ref_fasta = select_first([ref_fasta, nextstrain_ncov_defaults.reference_fasta]),
clades_tsv = select_first([clades_tsv, nextstrain_ncov_defaults.clades_tsv])
}
call nextstrain.export_auspice_json {
input:
tree = refine_augur_tree.tree_refined,
sample_metadata = derived_cols.derived_metadata,
lat_longs_tsv = select_first([lat_longs_tsv, nextstrain_ncov_defaults.lat_longs_tsv]),
node_data_jsons = select_all([
refine_augur_tree.branch_lengths,
ancestral_traits.node_data_json,
ancestral_tree.nt_muts_json,
translate_augur_tree.aa_muts_json,
assign_clades_to_nodes.node_clade_data_json]),
auspice_config = select_first([auspice_config, nextstrain_ncov_defaults.auspice_config]),
out_basename = "auspice-~{build_name}"
}
output {
File combined_assemblies = zcat.combined
File multiple_alignment = mafft.aligned_sequences
File unmasked_snps = snp_sites.snps_vcf
File metadata_merged = derived_cols.derived_metadata
File keep_list = fasta_to_ids.ids_txt
File subsampled_sequences = subsample.subsampled_msa
File masked_alignment = augur_mask_sites.masked_sequences
Int sequences_kept = subsample.sequences_out
Map[String, Int] counts_by_group = subsample.counts_by_group
File ml_tree = draft_augur_tree.aligned_tree
File time_tree = refine_augur_tree.tree_refined
Array[File] node_data_jsons = select_all([
refine_augur_tree.branch_lengths,
ancestral_traits.node_data_json,
ancestral_tree.nt_muts_json,
translate_augur_tree.aa_muts_json,
assign_clades_to_nodes.node_clade_data_json])
File tip_frequencies_json = tip_frequencies.node_data_json
File root_sequence_json = export_auspice_json.root_sequence_json
File auspice_input_json = export_auspice_json.virus_json
}
}
version 1.0
#DX_SKIP_WORKFLOW
import "../tasks/tasks_nextstrain.wdl" as nextstrain
import "../tasks/tasks_utils.wdl" as utils
import "sarscov2_nextstrain_aligned_input.wdl"
workflow sarscov2_nextstrain {
meta {
description: "Align assemblies, build trees, and convert to json representation suitable for Nextstrain visualization. See https://nextstrain.org/docs/getting-started/ and https://nextstrain-augur.readthedocs.io/en/stable/"
author: "Broad Viral Genomics"
email: "viral-ngs@broadinstitute.org"
allowNestedInputs: true
}
input {
Array[File]+ assembly_fastas=["gs://nextstrain-data/files/ncov/open/sequences.fasta.zst"]
Array[File]+ sample_metadata_tsvs=["gs://nextstrain-data/files/ncov/open/metadata.tsv.gz"]
File? ref_fasta
Int min_unambig_genome = 27000
String tree_root_seq_id = "Wuhan-Hu-1/2019"
String build_name
File builds_yaml
Array[String]? ancestral_traits_to_infer
File? auspice_config
File? clades_tsv
File? lat_longs_tsv
}
parameter_meta {
assembly_fastas: {
description: "Set of assembled genomes to align and build trees. These must represent a single chromosome/segment of a genome only. Fastas may be one-sequence-per-individual or a concatenated multi-fasta (unaligned) or a mixture of the two. They may be compressed (gz, bz2, zst, lz4), uncompressed, or a mixture.",
patterns: ["*.fasta", "*.fa", "*.fasta.gz", "*.fasta.zst"]
}
sample_metadata_tsvs: {
description: "Tab-separated metadata file that contain binning variables and values. Must contain all samples: output will be filtered to the IDs present in this file.",
patterns: ["*.txt", "*.tsv"]
}
ref_fasta: {
description: "A reference assembly (not included in assembly_fastas) to align assembly_fastas against. Typically from NCBI RefSeq or similar.",
patterns: ["*.fasta", "*.fa"]
}
min_unambig_genome: {
description: "Minimum number of called bases in genome to pass prefilter."
}
}
call nextstrain.nextstrain_ncov_defaults
#### mafft_and_snp
call utils.zcat {
input:
infiles = assembly_fastas,
output_name = "all_samples_combined_assembly.fasta.zst"
}
call nextstrain.nextstrain_deduplicate_sequences as dedup_seqs {
input:
sequences_fasta = zcat.combined
}
call utils.filter_sequences_by_length {
input:
sequences_fasta = dedup_seqs.sequences_deduplicated_fasta,
min_non_N = min_unambig_genome
}
call nextstrain.mafft_one_chr_chunked as mafft {
input:
sequences = filter_sequences_by_length.filtered_fasta,
ref_fasta = select_first([ref_fasta, nextstrain_ncov_defaults.reference_fasta]),
basename = "all_samples_aligned.fasta"
}
#### merge metadata, compute derived cols
if(length(sample_metadata_tsvs)>1) {
call utils.tsv_join {
input:
input_tsvs = sample_metadata_tsvs,
id_col = 'strain',
out_basename = "metadata-merged",
machine_mem_gb = 30
}
}
call nextstrain.derived_cols {
input:
metadata_tsv = select_first(flatten([[tsv_join.out_tsv], sample_metadata_tsvs]))
}
#### subsample sequences with nextstrain yaml file
call nextstrain.nextstrain_build_subsample as subsample {
input:
alignment_msa_fasta = mafft.aligned_sequences,
sample_metadata_tsv = derived_cols.derived_metadata,
build_name = build_name,
builds_yaml = builds_yaml
}
call utils.fasta_to_ids {
input:
sequences_fasta = subsample.subsampled_msa
}
call nextstrain.snp_sites {
input:
msa_fasta = subsample.subsampled_msa
}
#### augur_from_msa
call nextstrain.augur_mask_sites {
input:
sequences = subsample.subsampled_msa
}
call nextstrain.draft_augur_tree {
input:
msa_or_vcf = augur_mask_sites.masked_sequences
}
call nextstrain.refine_augur_tree {
input:
raw_tree = draft_augur_tree.aligned_tree,
msa_or_vcf = augur_mask_sites.masked_sequences,
metadata = derived_cols.derived_metadata,
root = tree_root_seq_id
}
if(defined(ancestral_traits_to_infer) && length(select_first([ancestral_traits_to_infer,[]]))>0) {
call nextstrain.ancestral_traits {
input:
tree = refine_augur_tree.tree_refined,
metadata = derived_cols.derived_metadata,
columns = select_first([ancestral_traits_to_infer,[]])
}
}
call nextstrain.tip_frequencies {
input:
tree = refine_augur_tree.tree_refined,
metadata = derived_cols.derived_metadata,
min_date = 2020.0,
pivot_interval = 1,
pivot_interval_units = "weeks",
narrow_bandwidth = 0.05,
proportion_wide = 0.0,
out_basename = "auspice-~{build_name}"
}
call nextstrain.ancestral_tree {
input:
tree = refine_augur_tree.tree_refined,
msa_or_vcf = augur_mask_sites.masked_sequences
}
call nextstrain.translate_augur_tree {
input:
tree = refine_augur_tree.tree_refined,
nt_muts = ancestral_tree.nt_muts_json,
genbank_gb = nextstrain_ncov_defaults.reference_gb
}
call nextstrain.assign_clades_to_nodes {
input:
tree_nwk = refine_augur_tree.tree_refined,
nt_muts_json = ancestral_tree.nt_muts_json,
aa_muts_json = translate_augur_tree.aa_muts_json,
ref_fasta = select_first([ref_fasta, nextstrain_ncov_defaults.reference_fasta]),
clades_tsv = select_first([clades_tsv, nextstrain_ncov_defaults.clades_tsv])
}
call nextstrain.export_auspice_json {
input:
tree = refine_augur_tree.tree_refined,
sample_metadata = derived_cols.derived_metadata,
lat_longs_tsv = select_first([lat_longs_tsv, nextstrain_ncov_defaults.lat_longs_tsv]),
node_data_jsons = select_all([
refine_augur_tree.branch_lengths,
ancestral_traits.node_data_json,
ancestral_tree.nt_muts_json,
translate_augur_tree.aa_muts_json,
assign_clades_to_nodes.node_clade_data_json]),
auspice_config = select_first([auspice_config, nextstrain_ncov_defaults.auspice_config]),
out_basename = "auspice-~{build_name}"
}
output {
File combined_assemblies = zcat.combined
File multiple_alignment = mafft.aligned_sequences
File unmasked_snps = snp_sites.snps_vcf
File metadata_merged = derived_cols.derived_metadata
File keep_list = fasta_to_ids.ids_txt
File subsampled_sequences = subsample.subsampled_msa
File masked_alignment = augur_mask_sites.masked_sequences
Int sequences_kept = subsample.sequences_out
Map[String, Int] counts_by_group = subsample.counts_by_group
File ml_tree = draft_augur_tree.aligned_tree
File time_tree = refine_augur_tree.tree_refined
Array[File] node_data_jsons = select_all([
refine_augur_tree.branch_lengths,
ancestral_traits.node_data_json,
ancestral_tree.nt_muts_json,
translate_augur_tree.aa_muts_json,
assign_clades_to_nodes.node_clade_data_json])
File tip_frequencies_json = tip_frequencies.node_data_json
File root_sequence_json = export_auspice_json.root_sequence_json
File auspice_input_json = export_auspice_json.virus_json
}
}