Install BWA
conda create -n bwa -c bioconda bwa
conda activate bwa
Map Reads
bwa mem GCF_018350215.1_P.leo_Ple1_pat1.1_genomic.fna Benin_1_subset_val_1.fastq.gz Benin_2_subset_val_2.fastq.gz > Benin_aligned_reads.sam
Loop for All Samples
for file1 in *subset_val_1.fastq.gz; do
file2=${file1/subset_val_1.fastq.gz/subset_val_2.fastq.gz}
sample_name=$(basename "$file1" _1_subset_val_1.fastq.gz)
bwa mem GCF_018350215.1_P.leo_Ple1_pat1.1_genomic.fna "$file1" "$file2" > "${sample_name}_aligned_reads.sam"
done
Deactivate BWA Environment
conda deactivate
Install and Activate Samtools
conda create -n samtools -c bioconda samtools
conda activate samtools
Convert and Sort
samtools sort SRR10764405.sam -o SRR10764405_sorted.bam
Batch Conversion
for file in *.sam; do
samtools sort "$file" -o "${file%.sam}_sorted.bam"
done
Deactivate Samtools Environment
conda deactivate
Install and Activate GATK4
conda create -n gatk4 -c bioconda gatk4
conda activate gatk4
Add readgroup
for file in *_sorted.bam
do
gatk AddOrReplaceReadGroups \
I=${file} \
O=${file/_sorted.bam/_sorted_RG.bam} \
RGID=${file/_sorted.bam/} \
RGPL=ILLUMINA \
RGPU=${file/_sorted.bam/} \
RGSM=${file/_sorted.bam/} \
RGLB=${file/_sorted.bam/}
done
Mark Duplicates
gatk MarkDuplicates -I Benin_sorted_reads.bam -O Benin_deduplicated.bam -M Benin_duplication_metrics.txt --REMOVE_DUPLICATES true
Loop Version
for file in *_sorted_reads.bam; do
base=${file%_sorted_reads.bam}
gatk MarkDuplicates \
-I "$file" \
-O "${base}_deduplicated.bam" \
-M "${base}_duplication_metrics.txt" \
--REMOVE_DUPLICATES true
done
samtools index Benin_deduplicated.bam
Batch Indexing
samtools index *_deduplicated.bam
Task 5: Estimate Coverage using Qualimap
conda create -n qualimap -c bioconda qualimap
conda activate qualimap
Run on Single File
qualimap bamqc -bam Benin_deduplicated.bam -outdir qualimap_results -outformat HTML
Run on All Files
qualimap bamqc -bam *_deduplicated.bam -outdir qualimap_results -outformat HTML
Check Output
cd qualimap_results
cat genome_results.txt