Task 1: Map Trimmed Reads Using BWA MEM

Install BWA

conda create -n bwa -c bioconda bwa
conda activate bwa

Map Reads

bwa mem GCF_018350215.1_P.leo_Ple1_pat1.1_genomic.fna Benin_1_subset_val_1.fastq.gz Benin_2_subset_val_2.fastq.gz > Benin_aligned_reads.sam

Loop for All Samples

for file1 in *subset_val_1.fastq.gz; do
  file2=${file1/subset_val_1.fastq.gz/subset_val_2.fastq.gz}
  sample_name=$(basename "$file1" _1_subset_val_1.fastq.gz)
  bwa mem GCF_018350215.1_P.leo_Ple1_pat1.1_genomic.fna "$file1" "$file2" > "${sample_name}_aligned_reads.sam"
done

Deactivate BWA Environment

conda deactivate

Task 2: Convert SAM to BAM and Sort

Install and Activate Samtools

conda create -n samtools -c bioconda samtools
conda activate samtools

Convert and Sort

samtools sort SRR10764405.sam -o SRR10764405_sorted.bam

Batch Conversion

for file in *.sam; do 
  samtools sort "$file" -o "${file%.sam}_sorted.bam"
done

Deactivate Samtools Environment

conda deactivate

Task 3: Mark Duplicates Using GATK4

Install and Activate GATK4


  conda create -n gatk4 -c bioconda gatk4
  conda activate gatk4
  

Add readgroup

 
  for file in *_sorted.bam
  do
  gatk AddOrReplaceReadGroups \
    I=${file} \
    O=${file/_sorted.bam/_sorted_RG.bam} \
    RGID=${file/_sorted.bam/} \
    RGPL=ILLUMINA \
    RGPU=${file/_sorted.bam/} \
    RGSM=${file/_sorted.bam/} \
    RGLB=${file/_sorted.bam/}
  done
  

Mark Duplicates

gatk MarkDuplicates -I Benin_sorted_reads.bam -O Benin_deduplicated.bam -M Benin_duplication_metrics.txt --REMOVE_DUPLICATES true

Loop Version

for file in *_sorted_reads.bam; do
  base=${file%_sorted_reads.bam}
  gatk MarkDuplicates \
    -I "$file" \
    -O "${base}_deduplicated.bam" \
    -M "${base}_duplication_metrics.txt" \
    --REMOVE_DUPLICATES true
done

Task 4: Index Deduplicated Files

samtools index Benin_deduplicated.bam

Batch Indexing

samtools index *_deduplicated.bam


  

Task 5: Estimate Coverage using Qualimap

conda create -n qualimap -c bioconda qualimap
conda activate qualimap

Run on Single File

qualimap bamqc -bam Benin_deduplicated.bam -outdir qualimap_results -outformat HTML

Run on All Files

qualimap bamqc -bam *_deduplicated.bam -outdir qualimap_results -outformat HTML

Check Output

cd qualimap_results
cat genome_results.txt