Source code for counting.__main__

from typing import Annotated

import typer

from wasp2.cli import create_version_callback, verbosity_callback

from .run_counting import run_count_variants
from .run_counting_sc import run_count_variants_sc


def _get_counting_deps() -> dict[str, str]:
    """Get counting-specific dependency versions."""
    import polars
    import pysam

    return {"polars": polars.__version__, "pysam": pysam.__version__}


_version_callback = create_version_callback(_get_counting_deps)

app = typer.Typer(
    pretty_exceptions_short=False,
    rich_markup_mode="rich",
    help="[bold]WASP2 Counting[/bold] - Count alleles at variant positions in BAM files.",
    epilog="[dim]Example: wasp2-count sample.bam variants.vcf.gz -o counts.tsv[/dim]",
)


[docs] @app.callback(invoke_without_command=True) def main( ctx: typer.Context, version: Annotated[ bool, typer.Option( "--version", "-V", callback=_version_callback, is_eager=True, help="Show version and dependency information.", ), ] = False, verbose: Annotated[ bool, typer.Option("--verbose", "-v", help="Enable verbose output with detailed progress."), ] = False, quiet: Annotated[ bool, typer.Option("--quiet", "-q", help="Suppress all output except errors."), ] = False, ) -> None: """WASP2 allele counting commands.""" verbosity_callback(verbose, quiet)
[docs] @app.command() def count_variants( bam: Annotated[str, typer.Argument(help="BAM file")], variants: Annotated[str, typer.Argument(help="Variant file (VCF, VCF.GZ, BCF, or PGEN)")], samples: Annotated[ list[str] | None, typer.Option( "--samples", "--sample", "--samps", "-s", help=( "One or more samples to use in variant file. " "Accepts comma delimited string " "or file with one sample per line" ), ), ] = None, region_file: Annotated[ str | None, typer.Option( "--region", "--regions", "--region_file", "--regions_file", "-r", help=( "Only use variants overlapping regions in file. " "Accepts BED or MACS2 formatted .(narrow/broad)Peak files. " ), ), ] = None, out_file: Annotated[ str | None, typer.Option( "--out_file", "--outfile", "--out", "-o", help=("Output file for counts. Defaults to counts.tsv"), ), ] = None, temp_loc: Annotated[ str | None, typer.Option( "--temp_loc", "--temp", "-t", help=( "Directory for keeping intermediary files. " "Defaults to removing intermediary files using temp directory" ), ), ] = None, use_region_names: Annotated[ bool, typer.Option( "--use_region_names", help=( "Use region names instead of coordinates. " "Names are denoted in fourth column of BED. " "Ignored if no name column in file. " "Defaults to using coordinates." ), ), ] = False, gene_feature: Annotated[ str | None, typer.Option( "--gene_feature", "--feature", "--feat", help=( "Feature type in gtf/gff3 for counting intersecting SNPs. " "Defaults to 'exon' for snp counting" ), ), ] = None, gene_attribute: Annotated[ str | None, typer.Option( "--gene_attribute", "--attribute", "--attributes", "--attrs", "--attr", help=( "Attribute name from gtf/gff3 attribute column to use as ID. " "Defaults to '<feature>_id' in gtf and 'ID' in gff3" ), ), ] = None, gene_parent: Annotated[ str | None, typer.Option( "--gene_parent", "--parent", "--parent_feature", "--parent_attribute", help=( "Parent attribute in gtf/gff3 for feature used in counting" "Defaults to 'transcript_id' in gtf and 'Parent' in gff3" ), ), ] = None, use_rust: Annotated[ bool, typer.Option( "--use-rust/--no-rust", help=( "Use Rust acceleration for BAM counting (requires wasp2_rust extension). " "Defaults to True if extension is available." ), ), ] = True, vcf_bed: Annotated[ str | None, typer.Option("--vcf-bed", help="Optional precomputed VCF bed file to skip vcf_to_bed."), ] = None, intersect_bed: Annotated[ str | None, typer.Option( "--intersect-bed", help="Optional precomputed intersect bed file to skip bedtools intersect.", ), ] = None, include_indels: Annotated[ bool, typer.Option( "--include-indels/--no-indels", help=( "Include indels in addition to SNPs for variant processing. Default is SNPs only." ), ), ] = False, ) -> None: sample_str = samples[0] if samples else None run_count_variants( bam_file=bam, variant_file=variants, region_file=region_file, samples=sample_str, use_region_names=use_region_names, out_file=out_file, temp_loc=temp_loc, gene_feature=gene_feature, gene_attribute=gene_attribute, gene_parent=gene_parent, use_rust=use_rust, precomputed_vcf_bed=vcf_bed, precomputed_intersect=intersect_bed, include_indels=include_indels, )
[docs] @app.command() def count_variants_sc( bam: Annotated[str, typer.Argument(help="BAM file")], variants: Annotated[str, typer.Argument(help="Variant file (VCF, VCF.GZ, BCF, or PGEN)")], barcodes: Annotated[str, typer.Argument(help="File with one barcode per line. Used as index")], samples: Annotated[ list[str] | None, typer.Option( "--samples", "--sample", "--samps", "-s", help=( "One or more samples to use in variant file. " "Accepts comma delimited string " "or file with one sample per line. " "RECOMMENDED TO USE ONE SAMPLE AT A TIME." ), ), ] = None, feature_file: Annotated[ str | None, typer.Option( "--feature", "--features", "--feat", "-f", "--region", "--regions", "-r", help=( "Features used in single-cell experiment. " "Only use variants overlapping features in file. " "Accepts BED or MACS2 formatted .(narrow/broad)Peak files. " "TODO: Implement genes gtf/gff format" ), ), ] = None, out_file: Annotated[ str | None, typer.Option( "--out_file", "--outfile", "--out", "-o", help=( "Output file to write Anndata allele counts. " "H5ad file format. " "Defaults to allele_counts.h5ad" ), ), ] = None, temp_loc: Annotated[ str | None, typer.Option( "--temp_loc", "--temp", "-t", help=( "Directory for keeping intermediary files. " "Defaults to removing intermediary files using temp directory" ), ), ] = None, ) -> None: sample_str = samples[0] if samples else None run_count_variants_sc( bam_file=bam, variant_file=variants, barcode_file=barcodes, feature_file=feature_file, samples=sample_str, out_file=out_file, temp_loc=temp_loc, )