#
#
# Copyright (c) 2019 10x Genomics, Inc. All rights reserved.
#
filetype json;
filetype bam;
#
# @include "_sort_and_mark_dups_stages.mro"
#
#
# Copyright (c) 2019 10x Genomics, Inc. All rights reserved.
#
filetype bam;
filetype bam.bai;
filetype tsv.gz;
filetype tsv.gz.tbi;
filetype json;
filetype csv;
#
# @include "_peak_caller_stages.mro"
#
#
# Copyright (c) 2019 10x Genomics, Inc. All rights reserved.
#
filetype bedgraph;
filetype pickle;
filetype tsv.gz;
filetype tsv.gz.tbi;
filetype bed;
filetype json;
#
# @include "_basic_sc_atac_counter_stages.mro"
#
#
# Copyright (c) 2019 10x Genomics, Inc. All rights reserved.
#
filetype tsv.gz;
filetype tsv.gz.tbi;
filetype csv;
filetype json;
filetype bed;
filetype pickle;
filetype h5;
#
# @include "_produce_cell_barcodes_stages.mro"
#
#
# Copyright (c) 2019 10x Genomics, Inc. All rights reserved.
#
filetype tsv.gz;
filetype tsv.gz.tbi;
filetype csv;
filetype json;
filetype bed;
filetype pickle;
filetype h5;
filetype npy.gz;
#
# @include "_sc_atac_metric_collector_stages.mro"
#
#
# Copyright (c) 2019 10x Genomics, Inc. All rights reserved.
#
filetype tsv.gz;
filetype tsv.gz.tbi;
filetype bed;
filetype bam;
filetype csv;
filetype json;
filetype h5;
filetype txt;
filetype pickle;
#
# @include "_peak_annotator_stages.mro"
#
#
# Copyright (c) 2019 10x Genomics, Inc. All rights reserved.
#
filetype bed;
filetype tsv;
filetype h5;
filetype gz;
filetype pickle;
#
# @include "_sc_atac_analyzer_stages.mro"
#
#
# Copyright (c) 2019 10x Genomics, Inc. All rights reserved.
#
filetype tsv;
filetype h5;
filetype pickle;
filetype gz;
filetype bed;
filetype csv;
#
# @include "_sc_atac_reporter_stages.mro"
#
#
# Copyright (c) 2019 10x Genomics, Inc. All rights reserved.
#
filetype json;
filetype html;
filetype csv;
filetype h5;
filetype bam;
#
# @include "_atac_cloupe_stages.mro"
#
#
# Copyright (c) 2019 10x Genomics, Inc. All rights reserved.
#
filetype cloupe;
filetype csv;
filetype json;
filetype h5;
filetype bed;
filetype tsv.gz.tbi;
#
# @include "_preflight_stages.mro"
#
#
# Copyright (c) 2019 10x Genomics, Inc. All rights reserved.
#
filetype csv;
filetype bed;
filetype tsv.gz;
filetype tsv.gz.tbi;
#
# @include "_aligner_stages.mro"
#
# SETUP_CHUNKS chunks up the input fastq data into sets of matched R1, R2, SI, and BC fastq files.
# input_mode specifies how FASTQs were generated. There are two modes:
#
# 1. "BCL_PROCESSOR"
#
# FASTQs produced by the 10X BCL_PROCESSOR pipeline. This mode assumes the FASTQ files obey the internal
# naming conventions and the reads have been interleaved into RA FASTQ files.
#
# 2. "ILMN_BCL2FASTQ"
#
# FASTQs produced directly by Illumina BCL2FASTQ v1.8.4. For this mode, BCL2FASTQ must be configured to emit the
# index2 read, rather than using it for dual-index demultiplexing:
#
# configureBclToFastq.pl --no-eamss --use-bases-mask=Y100,I8,Y14,Y100 --input-dir=<basecalls_dir> \
# --output-dir=<output_dir> --sample-sheet=<sample_sheet.csv>
#
# The sample sheet must be formatted as per the BCL2FASTQ documentation (10 column csv), and must contain a row for
# each sample index used. The sequencer must have been run in dual index mode, with the second index read (used to
# read the 10X barcode) emitted as the R2 output file. The --use-bases-mask argument should be set to the read
# length used.
stage SETUP_CHUNKS(
in string sample_id "id of the sample",
in map[] sample_def "list of dictionary specifying input data",
in string input_mode "configuration of the input fastqs",
in map downsample "map specifies either subsample_rate (float) or gigabases (int)",
out map[] chunks "map has barcode, barcode_reverse_complement, sample_index, read1, read2, gem_group, and read_group fields",
out string[] read_groups "list of strings representing read groups",
out json downsample_info "info about downsampling result",
src py "stages/processing/setup_chunks",
)
# Trims adapter sequences from reads and massages fastq output into a fixed format (interleaved R1 file, etc.)
stage TRIM_READS(
in map[] chunks,
in string barcode_whitelist,
in int max_read_num,
in map trim_def,
in map adapters,
out map[] chunks,
out json bc_counts,
out json lot_info,
out json read_counts,
src py "stages/processing/trim_reads",
) split (
in map chunk,
) using (
volatile = strict,
)
# Aligns the reads to the input reference, producing chunked bam files
stage ALIGN_READS(
in map[] chunks,
in string aligner,
in string aligner_method,
in string reference_path,
in string read_group_sample,
in int num_threads,
out bam[],
src py "stages/processing/align_reads",
) split (
in map chunk,
) using (
# N.B. No index files are generated for the bam
volatile = strict,
)
#
# @include "_aligner.mro"
#
# Takes input fastqs and chunks them, trims them, and aligns the trimmed reads to a reference
pipeline _ALIGNER(
in string sample_id,
in string fastq_mode "configuration of the input fastqs",
in map[] sample_def,
in string reference_path "this is the reference_path",
in string barcode_whitelist "name of barcode whitelist file",
in map trim_def,
in map adapters,
in string read_group_sample "sample header for BAM file",
in map downsample,
out bam[] align,
out map[] chunks,
out json bc_counts,
out json lot_info "gelbead lot detected",
out json read_counts "total # of read pairs before and after adapter trimming",
out json downsample_info "info on downsampling",
)
{
call SETUP_CHUNKS(
sample_id = self.sample_id,
input_mode = self.fastq_mode,
sample_def = self.sample_def,
downsample = self.downsample,
) using (
volatile = true,
)
call TRIM_READS(
chunks = SETUP_CHUNKS.chunks,
max_read_num = 5000000,
trim_def = self.trim_def,
adapters = self.adapters,
barcode_whitelist = self.barcode_whitelist,
) using (
volatile = true,
)
call ALIGN_READS(
chunks = TRIM_READS.chunks,
aligner = "bwa",
aligner_method = "MEM",
reference_path = self.reference_path,
read_group_sample = self.read_group_sample,
num_threads = 4,
) using (
volatile = true,
)
return (
align = ALIGN_READS,
chunks = TRIM_READS.chunks,
bc_counts = TRIM_READS.bc_counts,
lot_info = TRIM_READS.lot_info,
read_counts = TRIM_READS.read_counts,
downsample_info = SETUP_CHUNKS.downsample_info,
)
}
#
# @include "_sort_and_mark_dups_stages.mro"
#
# Attaches raw and corrected barcode sequences to the aligned reads
stage ATTACH_BCS(
in string barcode_whitelist,
in bam[] align,
in map[] chunks,
in bool paired_end,
in bool exclude_non_bc_reads,
in float bc_confidence_threshold,
in json bc_counts,
out bam[] output,
out int perfect_read_count,
src py "stages/processing/attach_bcs",
) split (
in bam align_chunk,
in map chunk,
) using (
# N.B. No index files are generated for the bam
volatile = strict,
)
stage SORT_READS_BY_POS(
in bam[] input,
out bam tagsorted_bam,
src py "stages/processing/sort_reads_by_pos",
) split (
in bam chunk_input,
) using (
# N.B. No index files are generated for the bam
volatile = strict,
)
# Marks duplicates in the reads using barcodes and fragment alignments to detect PCR and optical/diffusion duplicates
stage MARK_DUPLICATES(
in bam input,
in string reference_path,
in json raw_barcode_counts,
in string barcode_whitelist,
out bam output,
out bam.bai index,
out csv singlecell_mapping,
out tsv.gz fragments,
out tsv.gz.tbi fragments_index,
src py "stages/processing/mark_duplicates",
) split (
in map lane_map,
in string chunk_start,
in string chunk_end,
in int chunk_num,
) using (
# N.B. BAM/BED index files are explicitly bound where used
volatile = strict,
)
#
# @include "_sort_and_mark_dups.mro"
#
# Attaches barcodes to the aligned reads, marks duplicate reads, and produces a barcode-sorted and position-sorted
# output BAM
pipeline _SORT_AND_MARK_DUPS(
in bam[] align,
in map[] chunks,
in string barcode_whitelist,
in json bc_counts,
in string reference_path,
out bam possorted_bam "bam file sorted by position",
out bam.bai possorted_bam_index "position-sorted bam index",
out tsv.gz fragments,
out tsv.gz.tbi fragments_index,
out csv singlecell_mapping,
out bam[] read_paired_bam,
)
{
call ATTACH_BCS(
align = self.align,
chunks = self.chunks,
paired_end = true,
barcode_whitelist = self.barcode_whitelist,
exclude_non_bc_reads = false,
bc_confidence_threshold = 0.975,
bc_counts = self.bc_counts,
) using (
volatile = true,
)
call SORT_READS_BY_POS(
input = ATTACH_BCS.output,
) using (
volatile = true,
)
call MARK_DUPLICATES(
input = SORT_READS_BY_POS.tagsorted_bam,
reference_path = self.reference_path,
barcode_whitelist = self.barcode_whitelist,
raw_barcode_counts = self.bc_counts,
) using (
volatile = true,
)
return (
possorted_bam = MARK_DUPLICATES.output,
possorted_bam_index = MARK_DUPLICATES.index,
singlecell_mapping = MARK_DUPLICATES.singlecell_mapping,
fragments = MARK_DUPLICATES.fragments,
fragments_index = MARK_DUPLICATES.fragments_index,
read_paired_bam = ATTACH_BCS.output,
)
}
#
# @include "_peak_caller_stages.mro"
#
stage COUNT_CUT_SITES(
in path reference_path,
in tsv.gz fragments,
in tsv.gz.tbi fragments_index,
out bedgraph cut_sites,
out pickle count_dict,
src py "stages/processing/count_cut_sites",
) split (
in string contig,
) using (
# N.B. We explicitly bind the index file
volatile = strict,
)
stage DETECT_PEAKS(
in bedgraph cut_sites,
in path reference_path,
in pickle count_dict,
out bed peaks,
out json peak_metrics,
src py "stages/processing/detect_peaks",
) split (
in string contig,
in float[] params,
in float threshold,
) using (
mem_gb = 6,
# N.B. We explicitly bind the index file
volatile = strict,
)
#
# @include "_peak_caller.mro"
#
pipeline _PEAK_CALLER(
in path reference_path,
in tsv.gz fragments,
in tsv.gz.tbi fragments_index,
out bedgraph cut_sites,
out bed peaks,
out json peak_metrics,
)
{
call COUNT_CUT_SITES(
reference_path = self.reference_path,
fragments = self.fragments,
fragments_index = self.fragments_index,
)
call DETECT_PEAKS(
reference_path = self.reference_path,
cut_sites = COUNT_CUT_SITES.cut_sites,
count_dict = COUNT_CUT_SITES.count_dict,
)
return (
cut_sites = COUNT_CUT_SITES.cut_sites,
peaks = DETECT_PEAKS.peaks,
peak_metrics = DETECT_PEAKS.peak_metrics,
)
}
#
# @include "_basic_sc_atac_counter_stages.mro"
#
stage GENERATE_PEAK_MATRIX(
in string reference_path,
in tsv.gz fragments,
in bed peaks,
out h5 raw_matrix,
out path raw_matrix_mex,
src py "stages/processing/generate_peak_matrix",
) split (
in file barcodes,
) using (
mem_gb = 4,
# N.B. we don't explicitly need the fragment index
volatile = strict,
)
stage FILTER_PEAK_MATRIX(
in h5 raw_matrix,
in int num_analysis_bcs,
in int random_seed,
in csv cell_barcodes,
out h5 filtered_matrix,
out path filtered_matrix_mex,
src py "stages/processing/filter_peak_matrix",
) split (
) using (
volatile = strict,
)
#
# @include "_produce_cell_barcodes_stages.mro"
#
stage REMOVE_LOW_TARGETING_BARCODES(
in bed peaks,
in tsv.gz fragments,
in tsv.gz.tbi fragments_index,
in string reference_path,
out json barcode_counts,
out json low_targeting_barcodes,
out json low_targeting_summary,
out json fragment_lengths,
out json covered_bases,
src py "stages/processing/cell_calling/remove_low_targeting_barcodes",
) split (
in string contig,
out pickle fragment_counts,
out pickle targeted_counts,
out int peak_coverage,
) using (
mem_gb = 4,
volatile = strict,
)
stage REMOVE_GEL_BEAD_DOUBLET_BARCODES(
in tsv.gz fragments,
in tsv.gz.tbi fragments_index,
in string reference_path,
in json barcode_counts,
out json gel_bead_doublet_barcodes,
out json gel_bead_doublet_summary,
out csv connect_matrix,
src py "stages/processing/cell_calling/remove_gel_bead_doublet_barcodes",
) split (
in string contig,
in file valid_barcodes,
) using (
mem_gb = 4,
volatile = strict,
)
stage REMOVE_BARCODE_MULTIPLETS(
in tsv.gz fragments,
in tsv.gz.tbi fragments_index,
in string reference_path,
in string barcode_whitelist,
in json barcode_counts,
out json barcode_multiplets,
out json barcode_multiplets_summary,
src py "stages/processing/cell_calling/remove_barcode_multiplets",
) split (
in string contig,
in string gem_group,
out npy.gz part_a_linkage_matrix,
out npy.gz part_b_linkage_matrix,
) using (
mem_gb = 4,
volatile = strict,
)
stage MERGE_EXCLUDED_BARCODES(
in json[] barcode_exclusions,
out json excluded_barcodes,
src py "stages/processing/cell_calling/merge_excluded_barcodes",
)
stage DETECT_CELL_BARCODES(
in tsv.gz fragments,
in tsv.gz.tbi fragments_index,
in string barcode_whitelist,
in json excluded_barcodes,
in map force_cells,
in string reference_path,
in bed peaks,
out csv cell_barcodes,
out csv singlecell,
out json cell_calling_summary,
src py "stages/processing/cell_calling/detect_cell_barcodes",
) split (
in string contig,
out pickle barcode_counts,
out pickle targeted_counts,
out int fragment_depth,
) using (
mem_gb = 4,
volatile = strict,
)
# TODO: This should be in mro/common for general use
stage MERGE_SUMMARY_METRICS(
in json[] summary_jsons,
out json merged_summary,
src py "stages/processing/cell_calling/merge_summary_metrics",
)
#
# @include "_produce_cell_barcodes.mro"
#
pipeline _PRODUCE_CELL_BARCODES(
in bed peaks,
in tsv.gz fragments,
in tsv.gz.tbi fragments_index,
in string reference_path,
in string barcode_whitelist,
in map force_cells,
out csv cell_barcodes,
out csv singlecell,
out json cell_calling_summary,
out json excluded_barcodes,
out json fragment_lengths,
out json covered_bases,
)
{
call REMOVE_LOW_TARGETING_BARCODES(
fragments = self.fragments,
fragments_index = self.fragments_index,
peaks = self.peaks,
reference_path = self.reference_path,
)
call REMOVE_GEL_BEAD_DOUBLET_BARCODES(
fragments = self.fragments,
fragments_index = self.fragments_index,
reference_path = self.reference_path,
barcode_counts = REMOVE_LOW_TARGETING_BARCODES.barcode_counts,
)
call REMOVE_BARCODE_MULTIPLETS(
fragments = self.fragments,
fragments_index = self.fragments_index,
reference_path = self.reference_path,
barcode_whitelist = self.barcode_whitelist,
barcode_counts = REMOVE_LOW_TARGETING_BARCODES.barcode_counts,
)
call MERGE_EXCLUDED_BARCODES(
barcode_exclusions = [
REMOVE_BARCODE_MULTIPLETS.barcode_multiplets,
REMOVE_GEL_BEAD_DOUBLET_BARCODES.gel_bead_doublet_barcodes,
REMOVE_LOW_TARGETING_BARCODES.low_targeting_barcodes,
],
)
call DETECT_CELL_BARCODES(
fragments = self.fragments,
fragments_index = self.fragments_index,
barcode_whitelist = self.barcode_whitelist,
force_cells = self.force_cells,
excluded_barcodes = MERGE_EXCLUDED_BARCODES.excluded_barcodes,
reference_path = self.reference_path,
peaks = self.peaks,
)
call MERGE_SUMMARY_METRICS as MERGE_CELL_METRICS(
summary_jsons = [
REMOVE_LOW_TARGETING_BARCODES.low_targeting_summary,
REMOVE_GEL_BEAD_DOUBLET_BARCODES.gel_bead_doublet_summary,
REMOVE_BARCODE_MULTIPLETS.barcode_multiplets_summary,
DETECT_CELL_BARCODES.cell_calling_summary,
],
)
return (
cell_barcodes = DETECT_CELL_BARCODES.cell_barcodes,
excluded_barcodes = MERGE_EXCLUDED_BARCODES.excluded_barcodes,
singlecell = DETECT_CELL_BARCODES.singlecell,
cell_calling_summary = MERGE_CELL_METRICS.merged_summary,
fragment_lengths = REMOVE_LOW_TARGETING_BARCODES.fragment_lengths,
covered_bases = REMOVE_LOW_TARGETING_BARCODES.covered_bases,
)
}
#
# @include "_basic_sc_atac_counter.mro"
#
pipeline _BASIC_SC_ATAC_COUNTER(
in string sample_id,
in string fastq_mode "configuration of the input fastqs",
in map[] sample_def,
in string reference_path "this is the reference_path",
in string barcode_whitelist "name of barcode whitelist file",
in map trim_def,
in map adapters,
in map downsample,
in map force_cells,
out bam possorted_bam "bam file sorted by position",
out bam.bai possorted_bam_index "position-sorted bam index",
out tsv.gz fragments,
out tsv.gz.tbi fragments_index,
out json lot_info "gelbead lot detected",
out json read_counts "total # of read pairs before and after adapter trimming",
out json downsample_info "info on downsampling",
out csv cell_barcodes,
out json excluded_barcodes,
out json cell_calling_summary,
out bed peaks,
out bedgraph cut_sites,
out csv singlecell_mapping,
out csv singlecell_cells,
out json peak_metrics,
out bam[] read_paired_bam,
out h5 raw_peak_bc_matrix,
out path raw_peak_bc_matrix_mex,
out h5 filtered_peak_bc_matrix,
out path filtered_peak_bc_matrix_mex,
)
{
call _ALIGNER(
sample_id = self.sample_id,
fastq_mode = self.fastq_mode,
sample_def = self.sample_def,
read_group_sample = self.sample_id,
trim_def = self.trim_def,
adapters = self.adapters,
reference_path = self.reference_path,
barcode_whitelist = self.barcode_whitelist,
downsample = self.downsample,
)
call _SORT_AND_MARK_DUPS(
align = _ALIGNER.align,
chunks = _ALIGNER.chunks,
reference_path = self.reference_path,
barcode_whitelist = self.barcode_whitelist,
bc_counts = _ALIGNER.bc_counts,
)
call _PEAK_CALLER(
fragments = _SORT_AND_MARK_DUPS.fragments,
fragments_index = _SORT_AND_MARK_DUPS.fragments_index,
reference_path = self.reference_path,
)
call _PRODUCE_CELL_BARCODES(
fragments = _SORT_AND_MARK_DUPS.fragments,
fragments_index = _SORT_AND_MARK_DUPS.fragments_index,
peaks = _PEAK_CALLER.peaks,
force_cells = self.force_cells,
reference_path = self.reference_path,
barcode_whitelist = self.barcode_whitelist,
)
call GENERATE_PEAK_MATRIX(
reference_path = self.reference_path,
fragments = _SORT_AND_MARK_DUPS.fragments,
peaks = _PEAK_CALLER.peaks,
)
call FILTER_PEAK_MATRIX(
num_analysis_bcs = null,
cell_barcodes = _PRODUCE_CELL_BARCODES.cell_barcodes,
raw_matrix = GENERATE_PEAK_MATRIX.raw_matrix,
random_seed = null,
)
return (
possorted_bam = _SORT_AND_MARK_DUPS.possorted_bam,
possorted_bam_index = _SORT_AND_MARK_DUPS.possorted_bam_index,
singlecell_mapping = _SORT_AND_MARK_DUPS.singlecell_mapping,
singlecell_cells = _PRODUCE_CELL_BARCODES.singlecell,
lot_info = _ALIGNER.lot_info,
read_counts = _ALIGNER.read_counts,
downsample_info = _ALIGNER.downsample_info,
cell_barcodes = _PRODUCE_CELL_BARCODES.cell_barcodes,
excluded_barcodes = _PRODUCE_CELL_BARCODES.excluded_barcodes,
cell_calling_summary = _PRODUCE_CELL_BARCODES.cell_calling_summary,
peak_metrics = _PEAK_CALLER.peak_metrics,
cut_sites = _PEAK_CALLER.cut_sites,
peaks = _PEAK_CALLER.peaks,
fragments = _SORT_AND_MARK_DUPS.fragments,
fragments_index = _SORT_AND_MARK_DUPS.fragments_index,
read_paired_bam = _SORT_AND_MARK_DUPS.read_paired_bam,
raw_peak_bc_matrix = GENERATE_PEAK_MATRIX.raw_matrix,
raw_peak_bc_matrix_mex = GENERATE_PEAK_MATRIX.raw_matrix_mex,
filtered_peak_bc_matrix = FILTER_PEAK_MATRIX.filtered_matrix,
filtered_peak_bc_matrix_mex = FILTER_PEAK_MATRIX.filtered_matrix_mex,
)
}
#
# @include "_sc_atac_metric_collector_stages.mro"
#
stage ESTIMATE_LIBRARY_COMPLEXITY(
in json sequencing_summary,
in tsv.gz fragments,
in csv cell_barcodes,
out json bulk_complexity,
out json complexity_summary,
out json singlecell_complexity,
src py "stages/metrics/estimate_library_complexity",
) split (
in file barcodes,
) using (
mem_gb = 6,
volatile = strict,
)
stage GENERATE_SEQUENCING_METRICS(
in bam[] input,
out txt misc_sm,
out json summary,
src py "stages/metrics/generate_sequencing_metrics",
) split (
in bam chunk_bam,
) using (
volatile = strict,
)
stage GENERATE_SINGLECELL_TARGETING(
in tsv.gz fragments,
in tsv.gz.tbi fragments_index,
in bed peaks,
in string reference_path,
out csv singlecell,
out json summary,
out csv tss_relpos,
out csv ctcf_relpos,
src py "stages/metrics/generate_singlecell_targeting",
) split (
in string contig,
out int read_count,
out pickle target_counts_by_barcode,
out pickle chunk_tss,
out pickle chunk_ctcf,
) using (
mem_gb = 6,
volatile = strict,
)
stage MERGE_SINGLECELL_METRICS(
in string reference_path,
in csv singlecell_mapping,
in csv singlecell_targets,
in csv singlecell_cells,
out csv singlecell,
out json summary,
src py "stages/metrics/merge_singlecell_metrics",
) using (
mem_gb = 8,
volatile = strict,
)
stage REPORT_INSERT_SIZES(
in tsv.gz fragments,
in bool exclude_non_nuclear,
in string reference_path,
out csv insert_sizes,
out json insert_summary,
src py "stages/metrics/report_insert_sizes",
) split (
in file barcode,
out file total,
) using (
volatile = strict,
)
stage REPORT_TSS_CTCF(
in csv tss_relpos,
in csv ctcf_relpos,
out json summary_metrics,
src py "stages/metrics/report_tss_ctcf",
) using (
volatile = strict,
)
#
# @include "_sc_atac_metric_collector.mro"
#
pipeline _SC_ATAC_METRIC_COLLECTOR(
in bam[] read_paired_bam,
in tsv.gz fragments,
in tsv.gz.tbi fragments_index,
in bed peaks,
in string reference_path "this is the reference_path",
in csv cell_barcodes,
in csv singlecell_mapping,
in csv singlecell_cells,
out json singlecell_results,
out csv singlecell,
out json enrichment_results,
out json basic_summary,
out json insert_summary,
out csv insert_sizes,
out json bulk_complexity,
out json singlecell_complexity,
out json complexity_summary,
out csv tss_relpos,
out csv ctcf_relpos,
)
{
call GENERATE_SINGLECELL_TARGETING(
fragments = self.fragments,
fragments_index = self.fragments_index,
peaks = self.peaks,
reference_path = self.reference_path,
)
call MERGE_SINGLECELL_METRICS(
reference_path = self.reference_path,
singlecell_mapping = self.singlecell_mapping,
singlecell_cells = self.singlecell_cells,
singlecell_targets = GENERATE_SINGLECELL_TARGETING.singlecell,
)
call GENERATE_SEQUENCING_METRICS(
input = self.read_paired_bam,
)
call ESTIMATE_LIBRARY_COMPLEXITY(
sequencing_summary = GENERATE_SEQUENCING_METRICS.summary,
fragments = self.fragments,
cell_barcodes = self.cell_barcodes,
)
call REPORT_INSERT_SIZES(
fragments = self.fragments,
reference_path = self.reference_path,
exclude_non_nuclear = true,
)
call REPORT_TSS_CTCF(
tss_relpos = GENERATE_SINGLECELL_TARGETING.tss_relpos,
ctcf_relpos = GENERATE_SINGLECELL_TARGETING.ctcf_relpos,
)
return (
###
singlecell = MERGE_SINGLECELL_METRICS.singlecell,
singlecell_results = MERGE_SINGLECELL_METRICS.summary,
###
enrichment_results = REPORT_TSS_CTCF.summary_metrics,
basic_summary = GENERATE_SEQUENCING_METRICS.summary,
insert_summary = REPORT_INSERT_SIZES.insert_summary,
insert_sizes = REPORT_INSERT_SIZES.insert_sizes,
bulk_complexity = ESTIMATE_LIBRARY_COMPLEXITY.bulk_complexity,
singlecell_complexity = ESTIMATE_LIBRARY_COMPLEXITY.singlecell_complexity,
complexity_summary = ESTIMATE_LIBRARY_COMPLEXITY.complexity_summary,
tss_relpos = GENERATE_SINGLECELL_TARGETING.tss_relpos,
ctcf_relpos = GENERATE_SINGLECELL_TARGETING.ctcf_relpos,
)
}
#
# @include "_peak_annotator_stages.mro"
#
stage ANNOTATE_PEAKS(
in bed peaks,
in string reference_path,
out tsv peak_annotation,
src py "stages/analysis/annotate_peaks",
) split (
in int chunk_start,
in int chunk_end,
) using (
mem_gb = 5,
volatile = strict,
)
stage COMPUTE_GC_DISTRIBUTION(
in bed peaks,
in string reference_path,
out pickle GCdict,
src py "stages/analysis/compute_gc_dist",
) split (
) using (
volatile = strict,
)
stage SCAN_MOTIFS(
in pickle globalGCdict,
in bed peaks,
in string reference_path,
in float pwm_threshold,
out bed peak_motif_hits,
src py "stages/analysis/scan_motifs",
) split (
in file GCdict,
) using (
volatile = strict,
)
stage GENERATE_TF_MATRIX(
in path reference_path,
in bed peaks,
in bed peak_motif_hits,
in h5 filtered_matrix,
out h5 filtered_tf_bc_matrix,
out path filtered_tf_bc_matrix_mex,
out gz tf_propZ_matrix,
src py "stages/analysis/generate_tf_matrix",
) split (
) using (
volatile = strict,
)
#
# @include "_peak_annotator.mro"
#
pipeline _PEAK_ANNOTATOR(
in string reference_path,
in bed peaks,
in h5 filtered_peak_bc_matrix,
in float pwm_threshold,
out h5 filtered_tf_bc_matrix,
out path filtered_tf_bc_matrix_mex,
out gz tf_propZ_matrix,
out tsv peak_annotation,
)
{
call ANNOTATE_PEAKS(
peaks = self.peaks,
reference_path = self.reference_path,
)
call COMPUTE_GC_DISTRIBUTION(
peaks = self.peaks,
reference_path = self.reference_path,
)
call SCAN_MOTIFS(
globalGCdict = COMPUTE_GC_DISTRIBUTION.GCdict,
peaks = self.peaks,
reference_path = self.reference_path,
pwm_threshold = self.pwm_threshold,
)
call GENERATE_TF_MATRIX(
reference_path = self.reference_path,
peaks = self.peaks,
filtered_matrix = self.filtered_peak_bc_matrix,
peak_motif_hits = SCAN_MOTIFS.peak_motif_hits,
)
return (
filtered_tf_bc_matrix = GENERATE_TF_MATRIX.filtered_tf_bc_matrix,
filtered_tf_bc_matrix_mex = GENERATE_TF_MATRIX.filtered_tf_bc_matrix_mex,
tf_propZ_matrix = GENERATE_TF_MATRIX.tf_propZ_matrix,
peak_annotation = ANNOTATE_PEAKS.peak_annotation,
)
}
#
# @include "_sc_atac_analyzer_stages.mro"
#
stage ANALYZER_PREFLIGHT(
in bed peaks,
in h5 filtered_peak_bc_matrix,
in string[] factorization,
in int tsne_perplexity,
in int random_seed,
in float tsne_theta,
in int tsne_mom_switch_iter,
in int tsne_stop_lying_iter,
in int tsne_max_dims,
in int tsne_input_pcs,
in int tsne_max_iter,
in int max_clusters,
in int num_components,
in int num_dr_bcs,
in int num_dr_features,
in float neighbor_a,
in float neighbor_b,
in int graphclust_neighbors,
src py "stages/preflight/atac_analyzer",
)
stage REDUCE_DIMENSIONS(
in h5 filtered_matrix,
in string[] factorization,
in int num_dims,
in int num_bcs,
in int num_features,
in int random_seed,
out path reduced_data,
out map reduction_summary,
src py "stages/analysis/reduce_dimensions",
) split (
in string method,
) using (
volatile = strict,
)
stage CLUSTER_CELLS(
in h5 filtered_matrix,
in path reduced_data,
in map reduction_summary,
in string[] factorization,
in int minclusters,
in int maxclusters,
in int num_dims,
in int random_seed,
out path clustered_data,
out map clustering_summary,
src py "stages/analysis/cluster_cells",
) split (
in int n_clusters,
) using (
volatile = strict,
)
stage PROJECT_TSNE(
in h5 filtered_matrix,
in path reduced_data,
in map reduction_summary,
in int tsne_perplexity,
in int tsne_max_dims,
in int tsne_input_pcs,
in float tsne_theta,
in int tsne_max_iter,
in int tsne_stop_lying_iter,
in int tsne_mom_switch_iter,
in int random_seed,
in string[] factorization,
out path tsne,
out map tsne_summary,
src py "stages/analysis/project_tsne",
) split (
in string method,
in int tsne_dims,
) using (
volatile = strict,
)
stage RUN_GRAPH_CLUSTERING(
in h5 matrix_h5 "Processed matrix",
in string[] factorization,
in path reduced_data,
in map reduction_summary,
in int num_neighbors "Use this many neighbors",
in float neighbor_a "Use larger of (a+b*log10(n_cells) neighbors or num_neighbors",
in float neighbor_b "Use larger of (a+b*log10(n_cells) neighbors or num_neighbors",
in int balltree_leaf_size,
in string similarity_type "Type of similarity to use (nn or snn)",
out h5 chunked_neighbors,
out path knn_clusters,
out map graph_clustering_summary,
src py "stages/analysis/run_graph_clustering",
) split (
in string method,
in pickle neighbor_index,
in h5 submatrix,
in int row_start,
in int total_rows,
in int k_nearest,
in h5 use_bcs,
) using (
volatile = strict,
)
stage COMBINE_CLUSTERING(
in h5 filtered_matrix,
in map clustering_summary,
in path clustered_data,
in map graph_clustering_summary,
in path knn_clusters,
out path clustering,
out map clustering_summary,
src py "stages/analysis/combine_clustering",
) using (
volatile = strict,
)
stage SUMMARIZE_ANALYSIS(
in tsv peak_annotation,
in h5 filtered_peak_bc_matrix,
in h5 filtered_tf_bc_matrix,
in gz tf_propZ_matrix,
in path reduced_data,
in map reduction_summary,
in path clustering,
in map clustering_summary,
in path tsne,
in map tsne_summary,
in path enrichment_analysis,
in map enrichment_analysis_summary,
out h5 analysis,
out path analysis_csv,
out h5 feature_bc_matrix,
src py "stages/analysis/summarize_analysis",
) split (
) using (
volatile = strict,
)
stage PERFORM_DIFFERENTIAL_ANALYSIS(
in bed peaks,
in string reference_path,
in h5 filtered_peak_bc_matrix,
in h5 filtered_tf_bc_matrix,
in string[] factorization,
in path clustering,
in map clustering_summary,
out path enrichment_analysis,
out map enrichment_analysis_summary,
src py "stages/analysis/perform_differential_analysis",
) split (
in string method,
in string clustering_key,
in int cluster,
out csv tmp_diffexp,
) using (
volatile = strict,
)
#
# @include "_sc_atac_analyzer.mro"
#
pipeline _SC_ATAC_ANALYZER(
in string reference_path,
in bed peaks,
in h5 filtered_peak_bc_matrix,
in string[] factorization,
in int tsne_perplexity,
in int random_seed,
in float tsne_theta,
in int tsne_mom_switch_iter,
in int tsne_stop_lying_iter,
in int tsne_max_dims,
in int tsne_input_pcs,
in int tsne_max_iter,
in int max_clusters,
in int num_components,
in int num_dr_bcs,
in int num_dr_features,
in float neighbor_a,
in float neighbor_b,
in int graphclust_neighbors,
out h5 analysis,
out path analysis_csv,
out h5 filtered_tf_bc_matrix,
out path filtered_tf_bc_matrix_mex,
out h5 feature_bc_matrix,
out tsv peak_annotation,
)
{
call ANALYZER_PREFLIGHT(
peaks = self.peaks,
filtered_peak_bc_matrix = self.filtered_peak_bc_matrix,
factorization = self.factorization,
tsne_perplexity = self.tsne_perplexity,
random_seed = self.random_seed,
tsne_theta = self.tsne_theta,
tsne_mom_switch_iter = self.tsne_mom_switch_iter,
tsne_stop_lying_iter = self.tsne_stop_lying_iter,
tsne_max_dims = self.tsne_max_dims,
tsne_input_pcs = self.tsne_input_pcs,
tsne_max_iter = self.tsne_max_iter,
max_clusters = self.max_clusters,
num_components = self.num_components,
num_dr_bcs = self.num_dr_bcs,
num_dr_features = self.num_dr_features,
neighbor_a = self.neighbor_a,
neighbor_b = self.neighbor_b,
graphclust_neighbors = self.graphclust_neighbors,
) using (
volatile = true,
)
call _PEAK_ANNOTATOR(
reference_path = self.reference_path,
peaks = self.peaks,
filtered_peak_bc_matrix = self.filtered_peak_bc_matrix,
pwm_threshold = null,
)
call REDUCE_DIMENSIONS(
filtered_matrix = self.filtered_peak_bc_matrix,
factorization = self.factorization,
num_dims = self.num_components,
num_bcs = self.num_dr_bcs,
num_features = self.num_dr_features,
random_seed = self.random_seed,
)
call CLUSTER_CELLS(
filtered_matrix = self.filtered_peak_bc_matrix,
reduced_data = REDUCE_DIMENSIONS.reduced_data,
reduction_summary = REDUCE_DIMENSIONS.reduction_summary,
factorization = self.factorization,
minclusters = 2,
maxclusters = self.max_clusters,
num_dims = null,
random_seed = self.random_seed,
)
call PROJECT_TSNE(
filtered_matrix = self.filtered_peak_bc_matrix,
reduced_data = REDUCE_DIMENSIONS.reduced_data,
reduction_summary = REDUCE_DIMENSIONS.reduction_summary,
tsne_perplexity = self.tsne_perplexity,
tsne_max_dims = self.tsne_max_dims,
tsne_input_pcs = self.tsne_input_pcs,
tsne_theta = self.tsne_theta,
tsne_max_iter = self.tsne_max_iter,
tsne_stop_lying_iter = self.tsne_stop_lying_iter,
tsne_mom_switch_iter = self.tsne_mom_switch_iter,
random_seed = self.random_seed,
factorization = self.factorization,
)
call RUN_GRAPH_CLUSTERING(
matrix_h5 = self.filtered_peak_bc_matrix,
factorization = self.factorization,
reduced_data = REDUCE_DIMENSIONS.reduced_data,
reduction_summary = REDUCE_DIMENSIONS.reduction_summary,
num_neighbors = self.graphclust_neighbors,
neighbor_a = self.neighbor_a,
neighbor_b = self.neighbor_b,
balltree_leaf_size = null,
similarity_type = "nn",
)
call COMBINE_CLUSTERING(
filtered_matrix = self.filtered_peak_bc_matrix,
clustering_summary = CLUSTER_CELLS.clustering_summary,
clustered_data = CLUSTER_CELLS.clustered_data,
graph_clustering_summary = RUN_GRAPH_CLUSTERING.graph_clustering_summary,
knn_clusters = RUN_GRAPH_CLUSTERING.knn_clusters,
)
call PERFORM_DIFFERENTIAL_ANALYSIS(
reference_path = self.reference_path,
peaks = self.peaks,
filtered_peak_bc_matrix = self.filtered_peak_bc_matrix,
filtered_tf_bc_matrix = _PEAK_ANNOTATOR.filtered_tf_bc_matrix,
factorization = self.factorization,
clustering = COMBINE_CLUSTERING.clustering,
clustering_summary = COMBINE_CLUSTERING.clustering_summary,
)
call SUMMARIZE_ANALYSIS(
peak_annotation = _PEAK_ANNOTATOR.peak_annotation,
filtered_peak_bc_matrix = self.filtered_peak_bc_matrix,
filtered_tf_bc_matrix = _PEAK_ANNOTATOR.filtered_tf_bc_matrix,
tf_propZ_matrix = _PEAK_ANNOTATOR.tf_propZ_matrix,
reduced_data = REDUCE_DIMENSIONS.reduced_data,
reduction_summary = REDUCE_DIMENSIONS.reduction_summary,
clustering = COMBINE_CLUSTERING.clustering,
clustering_summary = COMBINE_CLUSTERING.clustering_summary,
tsne = PROJECT_TSNE.tsne,
tsne_summary = PROJECT_TSNE.tsne_summary,
enrichment_analysis = PERFORM_DIFFERENTIAL_ANALYSIS.enrichment_analysis,
enrichment_analysis_summary = PERFORM_DIFFERENTIAL_ANALYSIS.enrichment_analysis_summary,
)
return (
analysis = SUMMARIZE_ANALYSIS.analysis,
analysis_csv = SUMMARIZE_ANALYSIS.analysis_csv,
filtered_tf_bc_matrix = _PEAK_ANNOTATOR.filtered_tf_bc_matrix,
filtered_tf_bc_matrix_mex = _PEAK_ANNOTATOR.filtered_tf_bc_matrix_mex,
feature_bc_matrix = SUMMARIZE_ANALYSIS.feature_bc_matrix,
peak_annotation = _PEAK_ANNOTATOR.peak_annotation,
)
}
#
# @include "_sc_atac_reporter_stages.mro"
#
stage SUMMARIZE_REPORTS_SINGLECELL(
in string reference_path,
in json complexity_summary,
in json cell_calling_summary,
in json peak_results,
in json basic_results,
in json error_results_summary,
in json insert_summary,
in json singlecell_results,
in json contam_results,
in json downsample_info,
in json enrichment_results,
out json analysis_params,
out json summary,
out csv summary_csv,
src py "stages/reporter/summarize_reports_singlecell",
) using (
mem_gb = 4,
)
stage CREATE_WEBSUMMARY(
in string reference_path,
in string barcode_whitelist,
in json summary_results,
in json bulk_complexity,
in json singlecell_complexity,
in string sample_id,
in string sample_desc,
in map[] sample_def,
in bool debug,
in csv singlecell,
in csv insert_sizes,
in csv tss_relpos,
in csv ctcf_relpos,
in h5 filtered_peak_bc_matrix,
in h5 analysis,
in json excluded_barcodes,
out html web_summary,
src py "stages/reporter/create_websummary",
) using (
mem_gb = 16,
)
#
# @include "_sc_atac_reporter.mro"
#
pipeline _SC_ATAC_REPORTER(
in string reference_path,
in string barcode_whitelist,
in json bulk_complexity,
in json cell_calling_summary,
in json complexity_summary,
in json basic_summary,
in json peak_summary,
in json singlecell_results,
in json insert_summary,
in json downsample_info,
in json singlecell_complexity,
in csv singlecell,
in csv tss_relpos,
in csv ctcf_relpos,
in string sample_id,
in string sample_desc,
in map[] sample_def,
in csv sc_insert_sizes,
in json enrichment_results,
in h5 filtered_peak_bc_matrix,
in h5 analysis,
in json excluded_barcodes,
#
out json summary,
out html web_summary,
out csv summary_csv,
)
{
call SUMMARIZE_REPORTS_SINGLECELL(
reference_path = self.reference_path,
complexity_summary = self.complexity_summary,
cell_calling_summary = self.cell_calling_summary,
peak_results = self.peak_summary,
basic_results = self.basic_summary,
error_results_summary = null,
insert_summary = self.insert_summary,
singlecell_results = self.singlecell_results,
contam_results = null,
downsample_info = self.downsample_info,
enrichment_results = self.enrichment_results,
)
call CREATE_WEBSUMMARY(
reference_path = self.reference_path,
barcode_whitelist = self.barcode_whitelist,
singlecell = self.singlecell,
tss_relpos = self.tss_relpos,
ctcf_relpos = self.ctcf_relpos,
sample_id = self.sample_id,
sample_desc = self.sample_desc,
sample_def = self.sample_def,
insert_sizes = self.sc_insert_sizes,
summary_results = SUMMARIZE_REPORTS_SINGLECELL.summary,
bulk_complexity = self.bulk_complexity,
singlecell_complexity = self.singlecell_complexity,
analysis = self.analysis,
filtered_peak_bc_matrix = self.filtered_peak_bc_matrix,
excluded_barcodes = self.excluded_barcodes,
debug = false,
)
return (
summary = SUMMARIZE_REPORTS_SINGLECELL.summary,
web_summary = CREATE_WEBSUMMARY.web_summary,
summary_csv = SUMMARIZE_REPORTS_SINGLECELL.summary_csv,
)
}
#
# @include "_atac_cloupe_stages.mro"
#
stage CLOUPE_PREPROCESS(
in string pipestance_type,
in string sample_id,
in string sample_desc,
in string reference_path,
in h5 analysis,
in h5 feature_barcode_matrix,
in bed peaks,
in tsv.gz.tbi fragments_index,
in json metrics_json,
in csv aggregation_csv,
in json gem_group_index_json,
in bool no_secondary_analysis,
out cloupe output_for_cloupe,
out json gem_group_index_json,
src py "stages/cloupe/atac_cloupe_preprocess",
) split (
)
#
# @include "_preflight_stages.mro"
#
stage ATAC_COUNTER_PREFLIGHT(
in string sample_id,
in string fastq_mode,
in map[] sample_def,
in string reference_path,
in map force_cells,
in string[] factorization,
in map downsample,
in bool check_executables,
in map trim_def,
src py "stages/preflight/atac_counter",
) split (
)
stage ATAC_AGGR_PREFLIGHT(
in string sample_id,
in string reference_path,
in csv aggr_csv,
in string normalization,
in string[] factorization,
in bool check_executables,
src py "stages/preflight/atac_aggr",
) split (
)
stage ATAC_REANALYZER_PREFLIGHT(
in string sample_id,
in string reference_path,
in string barcode_whitelist,
in bed peaks,
in csv parameters,
in map force_cells,
in csv cell_barcodes,
in tsv.gz fragments,
in tsv.gz.tbi fragments_index,
in csv aggregation_csv,
in bool check_executables,
src py "stages/preflight/atac_reanalyzer",
) split (
)
#
# @include "sc_atac_counter.mro"
#
pipeline SC_ATAC_COUNTER(
in string fastq_mode,
in string sample_id,
in map[] sample_def,
in map downsample,
in string sample_desc,
in string reference_path,
in map trim_def,
in string barcode_whitelist,
in map adapters,
in string[] factorization,
in map force_cells,
#
out csv singlecell,
out bam possorted_bam,
out bam.bai possorted_bam_index,
out json summary,
out html web_summary,
out bed peaks,
out h5 raw_peak_bc_matrix,
out path raw_peak_bc_matrix_mex,
out path analysis_csv,
out h5 filtered_peak_bc_matrix,
out path filtered_peak_bc_matrix_mex,
out tsv.gz fragments,
out tsv.gz.tbi fragments_index,
out h5 filtered_tf_bc_matrix,
out path filtered_tf_bc_matrix_mex,
out cloupe cloupe,
out csv summary_csv,
out tsv peak_annotation,
)
{
call ATAC_COUNTER_PREFLIGHT as ATAC_COUNTER_PREFLIGHT_LOCAL(
sample_id = self.sample_id,
fastq_mode = self.fastq_mode,
sample_def = self.sample_def,
reference_path = self.reference_path,
force_cells = self.force_cells,
factorization = self.factorization,
downsample = self.downsample,
trim_def = self.trim_def,
check_executables = false,
) using (
local = true,
preflight = true,
)
call ATAC_COUNTER_PREFLIGHT(
sample_id = self.sample_id,
fastq_mode = self.fastq_mode,
sample_def = self.sample_def,
reference_path = self.reference_path,
force_cells = self.force_cells,
factorization = self.factorization,
downsample = self.downsample,
trim_def = self.trim_def,
check_executables = true,
) using (
preflight = true,
)
call _BASIC_SC_ATAC_COUNTER(
sample_id = self.sample_id,
fastq_mode = self.fastq_mode,
sample_def = self.sample_def,
trim_def = self.trim_def,
adapters = self.adapters,
reference_path = self.reference_path,
barcode_whitelist = self.barcode_whitelist,
downsample = self.downsample,
force_cells = self.force_cells,
)
call _SC_ATAC_METRIC_COLLECTOR(
read_paired_bam = _BASIC_SC_ATAC_COUNTER.read_paired_bam,
fragments = _BASIC_SC_ATAC_COUNTER.fragments,
fragments_index = _BASIC_SC_ATAC_COUNTER.fragments_index,
peaks = _BASIC_SC_ATAC_COUNTER.peaks,
reference_path = self.reference_path,
cell_barcodes = _BASIC_SC_ATAC_COUNTER.cell_barcodes,
singlecell_cells = _BASIC_SC_ATAC_COUNTER.singlecell_cells,
singlecell_mapping = _BASIC_SC_ATAC_COUNTER.singlecell_mapping,
)
call _SC_ATAC_ANALYZER(
peaks = _BASIC_SC_ATAC_COUNTER.peaks,
filtered_peak_bc_matrix = _BASIC_SC_ATAC_COUNTER.filtered_peak_bc_matrix,
reference_path = self.reference_path,
factorization = self.factorization,
tsne_perplexity = 30,
tsne_max_dims = null,
tsne_input_pcs = null,
tsne_max_iter = null,
tsne_stop_lying_iter = null,
tsne_mom_switch_iter = null,
tsne_theta = null,
random_seed = null,
max_clusters = 10,
neighbor_a = null,
neighbor_b = null,
graphclust_neighbors = null,
num_components = 15,
num_dr_bcs = null,
num_dr_features = null,
)
call CLOUPE_PREPROCESS(
pipestance_type = "SC_ATAC_COUNTER_CS",
reference_path = self.reference_path,
sample_id = self.sample_id,
sample_desc = self.sample_desc,
analysis = _SC_ATAC_ANALYZER.analysis,
feature_barcode_matrix = _SC_ATAC_ANALYZER.feature_bc_matrix,
metrics_json = _SC_ATAC_METRIC_COLLECTOR.basic_summary,
peaks = _BASIC_SC_ATAC_COUNTER.peaks,
fragments_index = _BASIC_SC_ATAC_COUNTER.fragments_index,
aggregation_csv = null,
gem_group_index_json = null,
no_secondary_analysis = false,
)
call _SC_ATAC_REPORTER(
reference_path = self.reference_path,
barcode_whitelist = self.barcode_whitelist,
bulk_complexity = _SC_ATAC_METRIC_COLLECTOR.bulk_complexity,
singlecell_complexity = _SC_ATAC_METRIC_COLLECTOR.singlecell_complexity,
cell_calling_summary = _BASIC_SC_ATAC_COUNTER.cell_calling_summary,
complexity_summary = _SC_ATAC_METRIC_COLLECTOR.complexity_summary,
basic_summary = _SC_ATAC_METRIC_COLLECTOR.basic_summary,
peak_summary = _BASIC_SC_ATAC_COUNTER.peak_metrics,
singlecell_results = _SC_ATAC_METRIC_COLLECTOR.singlecell_results,
insert_summary = _SC_ATAC_METRIC_COLLECTOR.insert_summary,
downsample_info = _BASIC_SC_ATAC_COUNTER.downsample_info,
singlecell = _SC_ATAC_METRIC_COLLECTOR.singlecell,
tss_relpos = _SC_ATAC_METRIC_COLLECTOR.tss_relpos,
ctcf_relpos = _SC_ATAC_METRIC_COLLECTOR.ctcf_relpos,
sample_id = self.sample_id,
sample_desc = self.sample_desc,
sample_def = self.sample_def,
sc_insert_sizes = _SC_ATAC_METRIC_COLLECTOR.insert_sizes,
enrichment_results = _SC_ATAC_METRIC_COLLECTOR.enrichment_results,
filtered_peak_bc_matrix = _BASIC_SC_ATAC_COUNTER.filtered_peak_bc_matrix,
analysis = _SC_ATAC_ANALYZER.analysis,
excluded_barcodes = _BASIC_SC_ATAC_COUNTER.excluded_barcodes,
)
return (
singlecell = _SC_ATAC_METRIC_COLLECTOR.singlecell,
possorted_bam = _BASIC_SC_ATAC_COUNTER.possorted_bam,
possorted_bam_index = _BASIC_SC_ATAC_COUNTER.possorted_bam_index,
summary = _SC_ATAC_REPORTER.summary,
web_summary = _SC_ATAC_REPORTER.web_summary,
peaks = _BASIC_SC_ATAC_COUNTER.peaks,
raw_peak_bc_matrix = _BASIC_SC_ATAC_COUNTER.raw_peak_bc_matrix,
raw_peak_bc_matrix_mex = _BASIC_SC_ATAC_COUNTER.raw_peak_bc_matrix_mex,
analysis_csv = _SC_ATAC_ANALYZER.analysis_csv,
filtered_peak_bc_matrix = _BASIC_SC_ATAC_COUNTER.filtered_peak_bc_matrix,
filtered_peak_bc_matrix_mex = _BASIC_SC_ATAC_COUNTER.filtered_peak_bc_matrix_mex,
fragments = _BASIC_SC_ATAC_COUNTER.fragments,
fragments_index = _BASIC_SC_ATAC_COUNTER.fragments_index,
filtered_tf_bc_matrix = _SC_ATAC_ANALYZER.filtered_tf_bc_matrix,
filtered_tf_bc_matrix_mex = _SC_ATAC_ANALYZER.filtered_tf_bc_matrix_mex,
cloupe = CLOUPE_PREPROCESS.output_for_cloupe,
summary_csv = _SC_ATAC_REPORTER.summary_csv,
peak_annotation = _SC_ATAC_ANALYZER.peak_annotation,
)
}
#
# @include "sc_atac_counter_cs.mro"
#
# Customer-facing (CS) pipeline
pipeline SC_ATAC_COUNTER_CS(
in string fastq_mode "Input fastq configuration",
in string sample_id,
in map[] sample_def,
in map downsample,
in string sample_desc "Sample description",
in string reference_path "Path to 10X reference package",
in string[] factorization "Dimensionality reduction method (lsa, plsa, or pca)",
in map force_cells "Force cell calling to a fixed number",
#
out csv singlecell "Per-barcode fragment counts & metrics",
out bam possorted_bam "Position sorted BAM file" "possorted_bam.bam",
out bam.bai possorted_bam_index "Position sorted BAM index" "possorted_bam.bam.bai",
out json summary "Summary of all data metrics",
out html web_summary "HTML file summarizing data & analysis",
out bed peaks "Bed file of all called peak locations",
out h5 raw_peak_bc_matrix "Raw peak barcode matrix in hdf5 format",
out path raw_peak_bc_matrix_mex "Raw peak barcode matrix in mex format" "raw_peak_bc_matrix",
out path analysis_csv "Directory of analysis files" "analysis",
out h5 filtered_peak_bc_matrix "Filtered peak barcode matrix in hdf5 format",
out path filtered_peak_bc_matrix_mex "Filtered peak barcode matrix in mex format" "filtered_peak_bc_matrix",
out tsv.gz fragments "Barcoded and aligned fragment file" "fragments.tsv.gz",
out tsv.gz.tbi fragments_index "Fragment file index" "fragments.tsv.gz.tbi",
out h5 filtered_tf_bc_matrix "Filtered tf barcode matrix in hdf5 format",
out path filtered_tf_bc_matrix_mex "Filtered tf barcode matrix in mex format" "filtered_tf_bc_matrix",
out cloupe cloupe "Loupe Cell Browser input file",
out csv summary_csv "csv summarizing important metrics and values" "summary.csv",
out tsv peak_annotation "Annotation of peaks with genes",
)
{
call SC_ATAC_COUNTER(
fastq_mode = self.fastq_mode,
sample_id = self.sample_id,
sample_def = self.sample_def,
downsample = self.downsample,
sample_desc = self.sample_desc,
reference_path = self.reference_path,
trim_def = {
"R1": {
"3prime": ["MErc"],
},
"R2": {
"3prime": ["MErc"],
},
"discard_untrimmed": false,
},
barcode_whitelist = "737K-cratac-v1",
adapters = {
"ME": "AGATGTGTATAAGAGACAG",
"MErc": "CTGTCTCTTATACACATCT",
},
factorization = self.factorization,
force_cells = self.force_cells,
)
return (
singlecell = SC_ATAC_COUNTER.singlecell,
possorted_bam = SC_ATAC_COUNTER.possorted_bam,
possorted_bam_index = SC_ATAC_COUNTER.possorted_bam_index,
summary = SC_ATAC_COUNTER.summary,
web_summary = SC_ATAC_COUNTER.web_summary,
peaks = SC_ATAC_COUNTER.peaks,
raw_peak_bc_matrix = SC_ATAC_COUNTER.raw_peak_bc_matrix,
raw_peak_bc_matrix_mex = SC_ATAC_COUNTER.raw_peak_bc_matrix_mex,
analysis_csv = SC_ATAC_COUNTER.analysis_csv,
filtered_peak_bc_matrix = SC_ATAC_COUNTER.filtered_peak_bc_matrix,
filtered_peak_bc_matrix_mex = SC_ATAC_COUNTER.filtered_peak_bc_matrix_mex,
fragments = SC_ATAC_COUNTER.fragments,
fragments_index = SC_ATAC_COUNTER.fragments_index,
filtered_tf_bc_matrix = SC_ATAC_COUNTER.filtered_tf_bc_matrix,
filtered_tf_bc_matrix_mex = SC_ATAC_COUNTER.filtered_tf_bc_matrix_mex,
cloupe = SC_ATAC_COUNTER.cloupe,
summary_csv = SC_ATAC_COUNTER.summary_csv,
peak_annotation = SC_ATAC_COUNTER.peak_annotation,
)
}
#
# @include "__PBMC_P.mro"
#
call SC_ATAC_COUNTER_CS(
fastq_mode = "ILMN_BCL2FASTQ",
sample_id = "PBMC_P",
sample_def = [{
"bc_in_read": 1,
"bc_length": 16,
"gem_group": null,
"lanes": null,
"library": "LibraryNotSpecified",
"read_path": "/data/isshamie/dropbox/ATACseq/2020_11_18_Croker/igm-storage2.ucsd.edu/201113_A00953_0185_AHN7TMDSXY",
"sample_indices": ["any"],
"sample_names": ["BC_10xATAC_PMBC_P"],
}],
reference_path = "/data/isshamie/mito_lineage/data/external/GRCh38_MT_blacklist",
downsample = null,
sample_desc = "",
factorization = ["lsa"],
force_cells = null,
)
ew file mode 100644
ndex 0000000..9c51e87
++ b/Analysis/mtscATAC/2020_11_18_Croker/PBMC_P/_sitecheck