Task 1: Map Trimmed Reads Using BWA-MEM

Reference genome and indexes from: Zenodo Record

Install BWA

conda create -n bwa -c bioconda bwa
conda activate bwa

Single Sample Alignment

bwa mem GCA_021130815.1_PanTigT.MC.v3_genomic.fna \
BEN_NW10_sub_1_val_1.fq.gz BEN_NW10_sub_2_val_2.fq.gz \
> BEN_NW_10_aligned_reads.sam

Batch Alignment

for file1 in *_sub_1_val_1.fq.gz; do
    file2=${file1/_sub_1_val_1.fq.gz/_sub_2_val_2.fq.gz}
    sample_name=$(basename "$file1" _sub_1_val_1.fq.gz)
    bwa mem GCA_021130815.1_PanTigT.MC.v3_genomic.fna "$file1" "$file2" \
        > "${sample_name}_aligned_reads.sam"
done
conda deactivate

Task 2: Convert SAM to BAM

Install and Activate Samtools

conda create -n samtools -c bioconda samtools
conda activate samtools

Convert and Sort

samtools view -S -b BEN_NW_10_aligned_reads.sam | \
samtools sort -o BEN_NW_10_sorted_reads.bam

Batch SAM to Sorted BAM

for file in *.sam; do 
    samtools view -S -b "$file" | \
    samtools sort -o "${file%.sam}_sorted.bam"
done
conda deactivate

Task 4: Mark Duplicates using GATK4

Install and Activate GATK

conda create -n gatk4 -c bioconda gatk4
conda activate gatk4

Single Sample

gatk MarkDuplicates \
-I BEN_NW_10_sorted_reads.bam \
-O BEN_NW_10_deduplicated.bam \
-M BEN_NW_10_duplication_metrics.txt \
--REMOVE_DUPLICATES true

Parallel MarkDuplicates (Optional)

conda create -n parallel -c bioconda parallel
conda activate parallel

parallel 'source ~/miniconda3/etc/profile.d/conda.sh && \
conda activate bwa && \
gatk MarkDuplicates -I {} -O {.}_deduplicated.bam \
-M {.}_duplication_metrics.txt --REMOVE_DUPLICATES true' ::: *_sorted.bam

Loop-based MarkDuplicates

for file in *_sorted.bam; do
    base=${file%_sorted.bam}
    gatk MarkDuplicates \
        -I "$file" \
        -O "${base}_deduplicated.bam" \
        -M "${base}_duplication_metrics.txt" \
        --REMOVE_DUPLICATES true
done

Task 5: Index Deduplicated BAM Files

samtools index BEN_NW_10_deduplicated.bam

Batch Indexing

samtools index *_deduplicated.bam
# or
parallel 'samtools index {}' ::: *_deduplicated.bam

Generate Alignment Statistics

parallel 'samtools stats {} > {.}_stats.txt' ::: *_deduplicated.bam

Task 6: Generate Sequencing Statistics with Qualimap

Install and Activate Qualimap

conda create -n qualimap -c bioconda qualimap
conda activate qualimap

Run BAMQC for a Single Sample

qualimap bamqc \
-bam BEN_NW12_aligned_reads_sorted_deduplicated.bam \
-outdir qualimap_results \
-outformat HTML

Run for All Samples

qualimap bamqc \
-bam *_aligned_reads_sorted_deduplicated.bam \
-outdir qualimap_results \
-outformat HTML

View Coverage Summary

cd qualimap_results
cat genome_results.txt

Look for the section “Chromosome-wise coverage” or similar.