######################################################################## # IMAF - Index of Major Allele Frequencies # # Naris Pojskic (naris.pojskic@ingeb.unsa.ba) # # University of Sarajevo # # Institute for Genetic Engineering and Biotechnology # # Laboratory for Bioinformatics and Biostatistics # # Bioinfo Research Group - www.bioinfo.ba # ######################################################################## ## This function calculates deviation Fst of loci from overall average accross loci or haplotype diversity from average values ## The input file should be named input.csv ## Results will be saved as output.csv ## Graphical results will be as JPEG files #File manipulation message("R script and CSV input data file must be in the same directory") message("Select CSV input data file") cfi <- file.choose(new = FALSE) cfd <-dirname(cfi) setwd(cfd) getwd() #Import data mydata <- read.csv2(cfi) a <- mydata[[1]] loc <- as.character(a) b <- mydata[[2]] bb <- factor(b) fM <- as.numeric(sub(",", ".", paste(bb), fixed = TRUE)) print(fM) c <- mydata[[3]] cc <- factor(c) An <- as.numeric(sub(",", ".", paste(cc), fixed = TRUE)) print(An) #iM <- round(fM/(1/An),3) #iM <- round(fM/(1/An),3)-1 k<-round(1/An,3) print(k) iM <- round(k/fM,3) print(iM) #Z value iMo<-rep(c(1),times=ncol(t(iM))) z<-(iM-iMo)/sqrt((iM*(1-iM)/2)) #P value pval <-pnorm(-abs(z)) pv <-round(pval,5) print(pv) #Graph and data tabela <- cbind(loc, fM, An, k, iM, pv) colnames(tabela) <- c("Loc","fM","An", "k", "iM", "P") print(tabela) write.table(format(tabela), file = "output.txt", dec = ",", sep = "\t", row.names = FALSE, quote=FALSE) tabgraf <- cbind(fM, iM) colnames(tabgraf) <- c("fM","iM") jpeg("plot1.jpg") barplot(as.matrix(tabgraf), beside=TRUE, legend.text=loc, args.legend=list(bty="n",horiz=FALSE, "topright"),border="white", ylim=c(0,1),ylab="Values") dev.off() #jpeg('plot2.jpg') #plot(An, Ae, col='blue',main='An vs Ae', pch=1, cex=0.9, type='p',xlab='Observed number of alleles',ylab='Effective number of alleles') #dev.off() message("Statistical significance level at P<0.01") h <- "File (output.txt) and graphs (plot1.jpg; plot2.jpg) are saved in" print(paste(h, cfd)) #tabgraf <- t(rbind(He, t(R), t(Rp))) #colnames(tabgraf) <- c("He","R", "R'") #barplot(tabgraf, ylim=c(0,1), legend =TRUE,axes=TRUE,beside=TRUE) #legend("bottom", loc); #jpeg("plot1.jpg") #barplot(as.matrix(tabgraf), beside=TRUE, legend.text=loc, args.legend=list(bty="n",horiz=FALSE, "topright"),border="white", ylim=c(0,1),ylab="Values", main="Expected heterozygosity, Ae/An ratio") #dev.off() #jpeg('plot2.jpg') #plot(An, Ae, col='blue',main='An vs Ae', pch=1, cex=0.9, type='p',xlab='Observed number of alleles',ylab='Effective number of alleles') #lines(x = c(0,100), y = c(0,100), col="red") #lines(lowess(An,Ae), col="red") # lowess line (x,y) #dev.off()