# MSAP_calc_session2.R   11.03.2015
# MSAP_calc_session2v3.R 07.02.2017
################################################################################
# MSAP_calc.r
# Functions to transform and analzyse MSAP data
# Walter Durka - October/November/December 2012     v. 1.0
#                March 2014                         v. 1.1 correcting some bugs
#                March 2015                         v. 1.2 error in barplot
################################################################################
# Input file format to be read by function Extract_MSAP_epigenotypes:
# column 1 = population ID
# column 2 = sample ID
# column 3 = restriction enzyme "H" or "M"
# columns 4 to n = 0/1 for presence/absence of marker bands
# row 1 = table head containing column names (pop, sample, HM, locusIDs)
# rows 2 to 2*N + 1 = data
# --::
# pop sample HM marker1 marker2
# 1   1      H  1       0
# 1   1      M  1       0
# 1   2      H  1       0
# 1   2      M  0       1
# 2   1      H  0       1
# 2   1      M  1       0
# 2   2      H  0       1
# 2   2      M  0       1
#
################################################################################


################################################################################
#  BEGIN SESSION
################################################################################
rm(list=ls())

# set working directory
setwd('C:/A/MSAP')       # go to working directory

# load MSAP_calc functions
source("MSAP_calc_1_3.r")

################################################################################
# Data transformation + descriptive parameters
#
#                               (inputfile,      coding,   outputfile, Minimum polymorphism threshold, DeleteMonomorphicLoci?)
Extract_MSAP_epigenotypes       ("MSAP_data.txt","Mix1",   "MSAP_Mix1.txt",1,TRUE) #
r.Mix1 <- descriptive_parameters("MSAP_Mix1.txt",          "MSAP_Mix1_descr.txt")
Extract_MSAP_epigenotypes       ("MSAP_data.txt","Mix2",   "MSAP_Mix2.txt",1,TRUE) #
r.Mix2 <- descriptive_parameters("MSAP_Mix2.txt",          "MSAP_Mix2_descr.txt")
Extract_MSAP_epigenotypes       ("MSAP_data.txt","Paun",   "MSAP_Paun.txt",1,TRUE) #
r.Paun <- descriptive_parameters("MSAP_Paun.txt",          "MSAP_Paun_descr.txt")
Extract_MSAP_epigenotypes       ("MSAP_data.txt","Herrera","MSAP_Herrera.txt",1,TRUE) #
r.Herr <- descriptive_parameters("MSAP_Herrera.txt",       "MSAP_Herrera_descr.txt")
Extract_MSAP_epigenotypes       ("MSAP_data.txt","Vergeer","MSAP_Vergeer.txt",1,TRUE) #
r.Verg <- descriptive_parameters("MSAP_Vergeer.txt",       "MSAP_Vergeer_descr.txt")
Extract_MSAP_epigenotypes       ("MSAP_data.txt","Salmon", "MSAP_Salmon.txt",1,TRUE) #
r.Salm <- descriptive_parameters("MSAP_Salmon.txt",        "MSAP_Salmon_descr.txt")
Extract_MSAP_epigenotypes       ("MSAP_data.txt","Lira-M1","MSAP_Lira-M1.txt",1,TRUE) #
r.Lir1 <- descriptive_parameters("MSAP_Lira-M1.txt",       "MSAP_Lira-M1_descr.txt")
Extract_MSAP_epigenotypes       ("MSAP_data.txt","Lira-M2","MSAP_Lira-M2.txt",1,TRUE) #
r.Lir2 <- descriptive_parameters("MSAP_Lira-M2.txt",       "MSAP_Lira-M2_descr.txt")

### Add descriptive parameters of the whole data set to files generated above; not very elegant, but works
### Thus you get values for, e.g. overall Shannon diversity
d <- read.table       ("MSAP_Mix1.txt",head=T);d$pop<-"Total";write.table(d,"MSAP_Mix1_T.txt",sep="\t",quote=F,row.names=F)
descriptive_parameters("MSAP_Mix1_T.txt","MSAP_Mix1_descr.txt",TRUE);file.remove("MSAP_Mix1_T.txt")
d <- read.table       ("MSAP_Mix2.txt",head=T);d$pop<-"Total";write.table(d,"MSAP_Mix2_T.txt",sep="\t",quote=F,row.names=F)
descriptive_parameters("MSAP_Mix2_T.txt","MSAP_Mix2_descr.txt",TRUE);file.remove("MSAP_Mix2_T.txt")
d <- read.table       ("MSAP_Paun.txt",head=T);d$pop<-"Total";write.table(d,"MSAP_Paun_T.txt",sep="\t",quote=F,row.names=F)
descriptive_parameters("MSAP_Paun_T.txt","MSAP_Paun_descr.txt",TRUE);file.remove("MSAP_Paun_T.txt")
d <- read.table       ("MSAP_Herrera.txt",head=T);d$pop<-"Total";write.table(d,"MSAP_Herrera_T.txt",sep="\t",quote=F,row.names=F)
descriptive_parameters("MSAP_Herrera_T.txt","MSAP_Herrera_descr.txt",TRUE);file.remove("MSAP_Herrera_T.txt")
d <- read.table       ("MSAP_Vergeer.txt",head=T);d$pop<-"Total";write.table(d,"MSAP_Vergeer_T.txt",sep="\t",quote=F,row.names=F)
descriptive_parameters("MSAP_Vergeer_T.txt","MSAP_Vergeer_descr.txt",TRUE);file.remove("MSAP_Vergeer_T.txt")
d <- read.table       ("MSAP_Salmon.txt",head=T);d$pop<-"Total";write.table(d,"MSAP_Salmon_T.txt",sep="\t",quote=F,row.names=F)
descriptive_parameters("MSAP_Salmon_T.txt","MSAP_Salmon_descr.txt",TRUE);file.remove("MSAP_Salmon_T.txt")
d <- read.table       ("MSAP_Lira-M1.txt",head=T);d$pop<-"Total";write.table(d,"MSAP_Lira-M1_T.txt",sep="\t",quote=F,row.names=F)
descriptive_parameters("MSAP_Lira-M1_T.txt","MSAP_Lira-M1_descr.txt",TRUE);file.remove("MSAP_Lira-M1_T.txt")
d <- read.table       ("MSAP_Lira-M2.txt",head=T);d$pop<-"Total";write.table(d,"MSAP_Lira-M2_T.txt",sep="\t",quote=F,row.names=F)
descriptive_parameters("MSAP_Lira-M2_T.txt","MSAP_Lira-M2_descr.txt",TRUE);file.remove("MSAP_Lira-M2_T.txt")


################################################################################
# END OF DATA TRANSFORMATION SESSION
################################################################################

################################################################################
# Barplot of Shannon diversity
r.Mix2 <- descriptive_parameters("MSAP_Mix2.txt",          "MSAP_Mix2_descr.txt")

par(mfrow=c(2,1))
# Mean overall Shannon diversity
barplot(r.Mix2$Shannon_diversity,beside=T,names.arg=r.Mix2$PopID,
          ylab="Mean Shannon diversity", xlab="Population",
          ylim=c(0,max(r.Mix2$Shannon_diversity)*1.2), main="Mixed2 scoring")
# Seperate estimates for the different types of epi-loci
barplot(t(rbind(r.Mix2$u_Shannon_diversity,r.Mix2$m_Shannon_diversity,r.Mix2$h_Shannon_diversity))   ## 11.03.2015 changed "cbind" to "rbind" Thanks to Dafni Anastasiadi 
          ,beside=T,names.arg=r.Mix2$PopID,
          ylab="Mean Shannon diversity", xlab="Population",
          ylim=c(0,max(cbind(r.Mix2$u_Shannon_diversity,r.Mix2$m_Shannon_diversity,r.Mix2$h_Shannon_diversity))*1.4),# main="Mixed2 scoring",
          legend.text= c("unmeth.","MeCpG","HMe-CCG"),args.legend=list(horiz=T))


################################################################################
# PCO comparison of MSAP transformations
.trPaths <- paste(paste(Sys.getenv('APPDATA'), '\\Tinn-R\\tmp\\', sep=''), c('', 'search.txt', 'objects.txt', 'file.r', 'selection.r', 'block.r','lines.r'), sep='')
library(labdsv)

Mix2<- Extract_MSAP_epigenotypes ("MSAP_data.txt","Mix2",   "MSAP_Mix2.txt",1,TRUE) #
Paun<- Extract_MSAP_epigenotypes ("MSAP_data.txt","Paun",   "MSAP_Paun.txt",1,TRUE) #
Mix1<- Extract_MSAP_epigenotypes ("MSAP_data.txt","Mix1",   "MSAP_Mix1.txt",1,TRUE) #
Herr<- Extract_MSAP_epigenotypes ("MSAP_data.txt","Herrera","MSAP_Herrera.txt",1,TRUE) #
Verg<- Extract_MSAP_epigenotypes ("MSAP_data.txt","Vergeer","MSAP_Vergeer.txt",1,TRUE) #
Salm<- Extract_MSAP_epigenotypes ("MSAP_data.txt","Salmon", "MSAP_Salmon.txt",1,TRUE) #
Lir1<- Extract_MSAP_epigenotypes ("MSAP_data.txt","Lira-M1","MSAP_Lira-M1.txt",1,TRUE) #
Lir2<- Extract_MSAP_epigenotypes ("MSAP_data.txt","Lira-M2","MSAP_Lira-M2.txt",1,TRUE) #


# compute pco; not possible for matrices with NA
Mix2.pco<-pco(dsvdis(Mix2[,-c(1:2)],index="sorensen"),k=10)
Mix1.pco<-pco(dsvdis(Mix1[,-c(1:2)],index="sorensen"),k=10)
Paun.pco<-pco(dsvdis(Paun[,-c(1:2)],index="sorensen"),k=10)
#Herr.pco<-pco(dsvdis(Herr[,-c(1:2)],index="sorensen"),k=10)
#Verg.pco<-pco(dsvdis(Verg[,-c(1:2)],index="sorensen"),k=10)
Salm.pco<-pco(dsvdis(Salm[,-c(1:2)],index="sorensen"),k=10)
Lir1.pco<-pco(dsvdis(Lir1[,-c(1:2)],index="sorensen"),k=10)
Lir2.pco<-pco(dsvdis(Lir2[,-c(1:2)],index="sorensen"),k=10)

plot_my_pco<- function(p,Title="",sym){
 plot(p$points[,2]~p$points[,1], type="n",
                  xlab=paste("PCO1 ",format(p$eig[1]/sum(p$eig)*100,digits=3),"%"),
                  ylab=paste("PCO2 ",format(p$eig[2]/sum(p$eig)*100,digits=3),"%"),
                  main=Title)
text(p$points[,1],p$points[,2],sym,cex=0.8)
} # End of function plot_my_pco

par(mfrow=c(4,2),mar=c(3.5,3.5,2,2),mgp=c(1.5,0.5,0))
plot_my_pco(Mix1.pco,"Mixed1",Mix1[,1])
plot_my_pco(Mix2.pco,"Mixed2",Mix2[,1])
plot_my_pco(Paun.pco,"Paun",  Paun[,1])
plot_my_pco(Salm.pco,"Salmon",Salm[,1])
plot_my_pco(Lir1.pco,"Lira-Medeiros",Lir1[,1])
plot_my_pco(Lir2.pco,"Lira-Medeiros+HpaII",Lir2[,1])


# compare eigenvalues
plot(Mix1.pco$eig/sum(Mix1.pco$eig)*100,type="b",xlab="PCoA Axis Number",ylab="Percent explained variation",
xlim=c(1,6),ylim=c(0,80))
lines(Mix2.pco$eig/sum(Mix2.pco$eig)*100,type="b",col=2)
lines(Paun.pco$eig/sum(Paun.pco$eig)*100,type="b",col=3)
lines(Salm.pco$eig/sum(Salm.pco$eig)*100,type="b",col=4)
lines(Lir1.pco$eig/sum(Lir1.pco$eig)*100,type="b",col=5)
lines(Lir2.pco$eig/sum(Lir2.pco$eig)*100,type="b",col=6)
 text(4,75,"Mixed1")
 text(4,65,"Mixed2",col=2)
 text(4,55,"Paun",col=3)
 text(4,45,"Salmon",col=4)
 text(4,35,"Lira-Medeiros",col=5)
 text(4,25,"Lira-Medeiros+HpaII",col=6)
#################################################################################

