1. PCA: Principal Component Analysis
Convert VCF to PLINK Binary Format
conda activate plink
plink --vcf machali_...vcf \
--make-bed \
--double-id \
--allow-extra-chr \
--out machali_...
Edit .fam file to add region info
awk '{
id = $1
if (id ~ /_CI/) region = "CenIndia"
else if (id ~ /_SI/) region = "SouIndia"
else if (id ~ /_NE/) region = "NorEasIndia"
else if (id ~ /_NW/) region = "NorWesIndia"
else if (id ~ /_NOR/) region = "NorIndia"
else if (id ~ /_SU/) region = "Sunderban"
else if (id ~ /^LGS/) region = "CenIndia"
else region = "Unknown"
$2 = region
print
}' input.fam > tmp.fam && mv tmp.fam input.fam
Fix chromosome names in .bim
awk '{$1 = 1; print}' input.bim > tmp && mv tmp input.bim
Run PCA
plink --bfile input --pca 5 --allow-extra-chr --out input_pca
conda deactivate
Plot PCA in R
conda activate R_env
R
fam <- read.table("input.fam")
eigenvec <- read.table("input_pca.eigenvec")
colnames(eigenvec) <- c("FID", "IID", paste0("PC", 1:5))
colnames(fam)[2] <- "Region"
eigenvec$Region <- fam$V2
library(ggplot2)
ggplot(eigenvec, aes(x = PC1, y = PC2, color = Region)) +
geom_point(size = 3, alpha = 0.8) +
theme_minimal() +
labs(title = "PCA Plot by Region", x = "PC1", y = "PC2")
ggsave("pca_by_region.png")
ggsave("pca_by_region.pdf")
q()
2. ADMIXTURE Analysis
conda create -n admixture -c bioconda admixture
conda activate admixture
for K in {2..4}
do
admixture input.bed $K
done
Plot Admixture Results in R
conda activate R_env
R
library(ggplot2)
library(reshape2)
library(dplyr)
q3 <- read.table("input.3.Q")
fam <- read.table("input.fam")
q3$ID <- fam$V1
q3$Group <- gsub(".*_(.*)", "\\1", q3$ID)
q3_long <- melt(q3, id.vars = c("ID", "Group"))
q3_long <- q3_long %>%
arrange(Group, ID) %>%
mutate(ID = factor(ID, levels = unique(ID)))
p <- ggplot(q3_long, aes(x = ID, y = value, fill = variable)) +
geom_bar(stat = "identity", width = 1) +
facet_grid(~Group, scales = "free_x", space = "free_x") +
theme_minimal() +
labs(x = "Individuals", y = "Ancestry Proportion", title = "ADMIXTURE Plot (K=3)") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
panel.spacing = unit(0.5, "lines"),
strip.text.x = element_text(angle = 0, face = "bold"),
legend.position = "right") +
scale_fill_brewer(palette = "Set1")
ggsave("admixture_K3_grouped.png", p, width = 12, height = 6, dpi = 300)
q()