#include "includes.h" #include "grid.h" #include "vector.h" #include extern ofstream errorfs; //declarations #include "declare.h" void onehap(dvector &resultsone, const int &C, int &LE, int &RE, const igrid &data, const ivector &anchap, const dvector &x, const dvector &y, const igrid &freqallele, const dgrid &freqfreq, const dgrid &jointfreq2, const dgrid &condfreq2l, const dgrid &condfreq2r, const dgrid &condfreq2C, const dvector &condfreq3l, const dvector &condfreq3r, const ivector &allelecounts, dgrid &obsprobs, dgrid &obsprobsCnum, dgrid &obsprobsCden, const dvector &mut, dvector &mutmatch, dvector &mutnomatch, dgrid &initstore, dgrid &transstore, dgrid &transstoreC, dgrid &alpha, dgrid &beta, dgrid &gamma, dvector &cstar, dvector &cstarmut, dvector &loglikvec, const int &estp, const double &pinit, const double &m) { //this function maximizes the likelihood over 1/tau and p, given the //location of the gene and the ancestral haplotype, for haplotype //data int i,j,k; i=j=k=0; const int numhap=data.dimx(); //reset alpha,beta,gamma alpha.clear(0); beta.clear(0); gamma.clear(0); /*make parts of observation probabilities that do not depend on parameters*/ //store null sharing probabilities makenullshare(obsprobs, data, freqallele, freqfreq, condfreq2l, condfreq2r, condfreq2C, allelecounts, LE, RE, C); makenullshareCnum(obsprobsCnum, data, freqallele, freqfreq, allelecounts, jointfreq2, condfreq3l,condfreq3r, C, LE,RE,m, anchap); makenullshareCden(obsprobsCden, data, freqallele, freqfreq, C, LE, RE); //give initial values for parameters double tau=5.0; double tau1; double p=pinit; double p1; //compute likelihood at initial values of the parameters double modellik=0; double modellikgs= 0; makemutmatch(mutmatch, mutnomatch, mut, allelecounts, tau, C); makemodlshare(obsprobs,data,mutmatch,mutnomatch,anchap,C, LE, RE); makemodlshareCnum(obsprobsCnum, data, obsprobs, C, LE, RE); makemodlshareCden(obsprobsCden, data, obsprobs, C, LE, RE); makeinit(initstore,x,tau,p,C,LE,RE); maketrans(transstore, x, tau, p, C, LE, RE); maketransC(transstoreC, x, tau, p, C, LE, RE); makealpha(alpha, initstore, transstore, transstoreC, obsprobs, obsprobsCnum, obsprobsCden, C, LE, RE); makeloglik(loglikvec,alpha, initstore, C, RE); modellik=0; for (i=0;i0) { makemutmatch(mutmatch, mutnomatch, mut, allelecounts, taugs, C); makemodlshare(obsprobs,data,mutmatch,mutnomatch,anchap,C, LE, RE); makemodlshareCnum(obsprobsCnum, data, obsprobs, C, LE, RE); makemodlshareCden(obsprobsCden, data, obsprobs, C, LE, RE); } if (estp==0) { makeinit(initstore,x,taugs,p,C,LE,RE); maketrans(transstore, x, taugs, p, C, LE, RE); maketransC(transstoreC, x, taugs, p, C, LE, RE); makealpha(alpha, initstore, transstore, transstoreC, obsprobs, obsprobsCnum, obsprobsCden, C, LE, RE); makeloglik(loglikvec,alpha, initstore, C, RE); modellikgs=0; for (k=0;kmodellik) { modellik=modellikgs; tau=taugs; } } else { for (j=0;j<3;j++) { pgs=0.05+0.45*j; makeinit(initstore,x,taugs,pgs,C,LE,RE); maketrans(transstore, x, taugs, pgs, C, LE, RE); maketransC(transstoreC, x, taugs, pgs, C, LE, RE); makealpha(alpha, initstore, transstore, transstoreC, obsprobs, obsprobsCnum, obsprobsCden, C, LE, RE); makeloglik(loglikvec,alpha, initstore, C, RE); modellikgs=0; for (k=0;kmodellik) { modellik=modellikgs; tau=taugs; p=pgs; } } } } //run EM algorithm to maximize likelihood over parameters int iter; int maxiter=200; for (iter=0;iter0 && iter>0) { //make mutation and no-mutation rates for each locus makemutmatch(mutmatch, mutnomatch, mut, allelecounts, tau, C); //fill in observation probabilities for ancestral state makemodlshare(obsprobs,data,mutmatch,mutnomatch,anchap,C, LE, RE); makemodlshareCnum(obsprobsCnum, data, obsprobs, C, LE, RE); makemodlshareCden(obsprobsCden, data, obsprobs, C, LE, RE); } makeinit(initstore,x,tau,p,C,LE,RE); //initstore.dump(); //for (i=0;i0) makecstarmut(cstarmut, cstar,gamma,data,anchap,mutmatch,C,LE,RE); //for (i=0;i=maxiter) { errorfs << "Warning: Algorithm did not converge for anc. hap" << endl; for (i=LE;i<=RE;i++) cout << anchap[i] << " "; cout << endl; } */ //make model loglikelihood makemutmatch(mutmatch, mutnomatch, mut, allelecounts, tau, C); makemodlshare(obsprobs,data,mutmatch,mutnomatch,anchap,C, LE, RE); makemodlshareCnum(obsprobsCnum, data, obsprobs, C, LE, RE); makemodlshareCden(obsprobsCden, data, obsprobs, C, LE, RE); makeinit(initstore,x,tau,p,C,LE,RE); maketrans(transstore, x, tau, p, C, LE, RE); maketransC(transstoreC, x, tau, p, C, LE, RE); makealpha(alpha, initstore, transstore, transstoreC, obsprobs, obsprobsCnum, obsprobsCden, C, LE, RE); makeloglik(loglikvec,alpha, initstore, C, RE); modellik=0; for (i=0;i0) { makemutmatch(mutmatch, mutnomatch, mut, allelecounts, taugs, C); makemodlsharegen(obsprobs,nullprobs, data,mutmatch,mutnomatch,anchap,C,LE,RE); makemodlshareCnumgen(obsprobsCnum, nullprobsCnum, data, anchap, mutmatch, mutnomatch, C, LE, RE); makemodlshareCdengen(obsprobsCden, nullprobsCden, data, anchap, mutmatch, mutnomatch, C, LE, RE); makemodlshareCltREgen(obsprobsCltRE, nullprobsCltRE, data,anchap, mutmatch, mutnomatch, C, RE); } if (estp==0) { makeinitgen(initstore,scratchvec,x,taugs,p,freqC,C,LE,RE); maketransgen(transstore, scratchvec,x, taugs, p, freqC, C, LE, RE); maketransCgen(transstoreC, scratchvec,x, taugs, p, C, LE, RE); makealphagen(alpha, initstore, transstore, transstoreC, obsprobs, obsprobsCnum, obsprobsCden, C, LE, RE); makeloglikgen(loglikvec,alpha, initstore, C, RE); modellikgs=0; for (g=0;gmodellik) { modellik=modellikgs; tau=taugs; } } else { for (j=0;j<3;j++) { pgs=0.05+0.45*j; makeinitgen(initstore,scratchvec,x,taugs,pgs,freqC,C,LE,RE); maketransgen(transstore, scratchvec,x, taugs, pgs, freqC, C, LE, RE); maketransCgen(transstoreC, scratchvec,x, taugs, pgs, C, LE, RE); makealphagen(alpha, initstore, transstore, transstoreC, obsprobs, obsprobsCnum, obsprobsCden, C, LE, RE); makeloglikgen(loglikvec,alpha, initstore, C, RE); modellikgs=0; for (g=0;gmodellik) { modellik=modellikgs; tau=taugs; p=pgs; } } } } //cout << tau << " " << p << " " << modellik << endl; //run EM algorithm to maximize likelihood over parameters int iter; int maxiter=100; for (iter=0;iter0 && iter>0) { //make mutation and no-mutation rates for each locus makemutmatch(mutmatch, mutnomatch, mut, allelecounts, tau, C); //fill in observation probabilities for ancestral state makemodlsharegen(obsprobs,nullprobs, data,mutmatch,mutnomatch,anchap,C,LE,RE); makemodlshareCnumgen(obsprobsCnum, nullprobsCnum, data, anchap, mutmatch, mutnomatch, C, LE, RE); makemodlshareCdengen(obsprobsCden, nullprobsCden, data, anchap, mutmatch, mutnomatch, C, LE, RE); makemodlshareCltREgen(obsprobsCltRE, nullprobsCltRE, data,anchap, mutmatch, mutnomatch, C, RE); } makeinitgen(initstore,scratchvec,x,tau,p,freqC,C,LE,RE); //initstore.dump(); // for (i=0;i0) makecstarmutgen(cstarmut, cstar,gamma,data,anchap, mutmatch,C,LE,RE); //for (i=0;i=maxiter) { errorfs << "Warning: Algorithm did not converge for anc. hap" << endl; for (i=LE;i<=RE;i++) cout << anchap[i] << " "; cout << endl; } */ //make model loglikelihood makemutmatch(mutmatch, mutnomatch, mut, allelecounts, tau, C); makemodlsharegen(obsprobs,nullprobs, data,mutmatch,mutnomatch,anchap,C,LE,RE); makemodlshareCnumgen(obsprobsCnum, nullprobsCnum, data, anchap, mutmatch, mutnomatch, C, LE, RE); makemodlshareCdengen(obsprobsCden, nullprobsCden, data, anchap, mutmatch, mutnomatch, C, LE, RE); makemodlshareCltREgen(obsprobsCltRE, nullprobsCltRE, data,anchap, mutmatch, mutnomatch, C, RE); makeinitgen(initstore,scratchvec,x,tau,p,freqC,C,LE,RE); maketransgen(transstore, scratchvec,x, tau, p, freqC, C, LE, RE); maketransCgen(transstoreC, scratchvec,x, tau, p, C, LE, RE); makealphagen(alpha, initstore, transstore, transstoreC, obsprobs, obsprobsCnum, obsprobsCden, C, LE, RE); makeloglikgen(loglikvec,alpha, initstore, C, RE); modellik=0; for (g=0;g0) //cout << C << " " << E_copy << " " << LE << " " << RE << " " // << d2LE << " " << d2RE << " (updated values)" << endl; /*we now estimate parameters for each candidate ancestral haplotype*/ for (k=0; kmapest && mapest>0) numkeep=mapest; else if (mapest>0 && numkeep==0) { if (numcand<=mapest) numkeep=numcand; else numkeep=mapest; } //when not mapping, the ancestral haplotype may not be completely //reconstructed if (numkeep==0) { //cout << "extension?" << endl; numkeep=findnumkeep(candres, -dirjump, threshfix, threshvar, numcand); if (numkeep==0 || LE-dirjump<0 || RE-dirjump>=data.dimy()) { //cout << "No" << endl; break; } else { //cout << "Yes" << endl; dirjump=-dirjump; } } cleantemps(candanc, candres, numcand, numkeep, LE, RE); } for (i=LE;i<=RE;i++) anchap[i]=candanc(0,i); for (j=0;j<10;j++) resultsmax[j]=candres(0,j); // resultsmax[10]=0; // resultsmax[11]=0; }//max void onehapmkv2(dvector &resultsone, const int &C, int &LE, int &RE, const igrid &data, const ivector &anchap, const dvector &x, const dvector &y, const igrid &freqallele, const dgrid &freqfreq, const dgrid &jointfreq2, const dgrid &jointfreq3mkv2, const dgrid &jointfreq3fmkv2, const dgrid &jointfreq4mkv2, const dgrid &jointfreq4fmkv2, const dgrid &condfreq2l, const dgrid &condfreq2r, const dgrid &condfreq3fmkv2, const dgrid &condfreq3lmkv2, const dgrid &condfreq3rmkv2, const ivector &allelecounts, dgrid &obsprobs, dvector &obsprobsAstore, const dvector &mut, dvector &mutmatch, dvector &mutnomatch, dgrid &initstore, dgrid &transstore, dgrid &transstoreC, dgrid &alpha, dgrid &beta, dgrid &gamma, dvector &cstar, dvector &cstarmut, dvector &loglikvec, const int &estp, const double &pinit, const double &m) { //this function maximizes the likelihood over 1/tau and p, given the //location of the gene and the ancestral haplotype, for haplotype //data int i,j,k; i=j=k=0; const int numhap=data.dimx(); //reset alpha,beta,gamma alpha.clear(0); beta.clear(0); gamma.clear(0); /*make parts of observation probabilities that do not depend on parameters*/ // for (i=0;i0) { makemutmatch(mutmatch, mutnomatch, mut, allelecounts, taugs, C); makemodlshare(obsprobs,data,mutmatch,mutnomatch,anchap,C, LE, RE); } if (estp==0) { makeinit(initstore,x,taugs,p,C,LE,RE); maketrans(transstore, x, taugs, p, C, LE, RE); maketransC(transstoreC, x, taugs, p, C, LE, RE); makeloglik(loglikvec,alpha, initstore, C, RE); modellikgs=0; for (k=0;kmodellik) { modellik=modellikgs; tau=taugs; } } else { for (j=0;j<3;j++) { pgs=0.05+0.45*j; makeinit(initstore,x,taugs,pgs,C,LE,RE); maketrans(transstore, x, taugs, pgs, C, LE, RE); maketransC(transstoreC, x, taugs, pgs, C, LE, RE); // makealpha(alpha, initstore, transstore, transstoreC, // obsprobs, obsprobsCnum, obsprobsCden, // C, LE, RE); makeloglik(loglikvec,alpha, initstore, C, RE); modellikgs=0; for (k=0;kmodellik) { modellik=modellikgs; tau=taugs; p=pgs; } } } } //run EM algorithm to maximize likelihood over parameters int iter; int maxiter=2000;//was 200 for (iter=0;iter0 && iter>0) { //make mutation and no-mutation rates for each locus makemutmatch(mutmatch, mutnomatch, mut, allelecounts, tau, C); //fill in observation probabilities for ancestral state makemodlshare(obsprobs,data,mutmatch,mutnomatch,anchap,C, LE, RE); //makemodlshareCnum(obsprobsCnum, data, obsprobs, C, LE, RE); //makemodlshareCden(obsprobsCden, data, obsprobs, C, LE, RE); } makeinit(initstore,x,tau,p,C,LE,RE); //initstore.dump(); //for (i=0;i0) makecstarmut(cstarmut, cstar,gamma,data,anchap,mutmatch,C,LE,RE); //for (i=0;i=maxiter) { errorfs << "Warning: Algorithm did not converge for anc. hap" << endl; for (i=LE;i<=RE;i++) cout << anchap[i] << " "; cout << endl; } */ //make model loglikelihood makemutmatch(mutmatch, mutnomatch, mut, allelecounts, tau, C); makemodlshare(obsprobs,data,mutmatch,mutnomatch,anchap,C, LE, RE); //makemodlshareCnum(obsprobsCnum, data, obsprobs, C, LE, RE); //makemodlshareCden(obsprobsCden, data, obsprobs, C, LE, RE); makeinit(initstore,x,tau,p,C,LE,RE); maketrans(transstore, x, tau, p, C, LE, RE); maketransC(transstoreC, x, tau, p, C, LE, RE); makealphamkv2(alpha, initstore, transstore, obsprobs, obsprobsAstore, C, LE, RE); makeloglik(loglikvec,alpha, initstore, C, RE); modellik=0; for (i=0;i0) { makemutmatch(mutmatch, mutnomatch, mut, allelecounts, taugs, C); makeobsgenmkv2(obsprobs,nullprobs, data,mutmatch,mutnomatch,anchap,C,LE,RE,0); makeobsCnumgenmkv2(obsCnum, nullCnum, data, mutmatch, mutnomatch, anchap, C, LE, RE,0); makeobsCdengenmkv2(obsCden, nullCden, data, mutmatch, mutnomatch, anchap, C, LE, RE,0); } if (estp==0) { makeinitgenmkv2(initstore,scratchvec,x,tau,p,C,LE,RE,0); maketransgenmkv2(transstore, scratchvec,x, tau, p, C, LE, RE,0); maketransCgenmkv2(transstoreC, scratchvec,y, tau, p, C, LE, RE,0); makealphagenmkv2(alpha, initstore, transstore, transstoreC, obsprobs, obsCnum, obsCden, C, LE, RE); makeloglikgenmkv2(loglikvec,alpha, initstore, obsprobs, C, RE); modellikgs=0; for (g=0;gmodellik) { modellik=modellikgs; tau=taugs; } } else { for (j=0;j<3;j++) { pgs=0.05+0.45*j; makeinitgenmkv2(initstore,scratchvec,x,tau,pgs,C,LE,RE,0); maketransgenmkv2(transstore, scratchvec,x, tau, pgs, C, LE, RE,0); maketransCgenmkv2(transstoreC, scratchvec,y, tau, pgs, C, LE, RE,0); makealphagenmkv2(alpha, initstore, transstore, transstoreC, obsprobs, obsCnum, obsCden, C, LE, RE); makeloglikgenmkv2(loglikvec,alpha, initstore, obsprobs, C, RE); modellikgs=0; for (g=0;gmodellik) { modellik=modellikgs; tau=taugs; p=pgs; } } } } // cout << "gs " <0 && iter>0) { //make mutation and no-mutation rates for each locus makemutmatch(mutmatch, mutnomatch, mut, allelecounts, tau, C); //fill in observation probabilities for ancestral state makeobsgenmkv2(obsprobs,nullprobs, data,mutmatch,mutnomatch,anchap,C,LE,RE,0); makeobsCnumgenmkv2(obsCnum, nullCnum, data, mutmatch, mutnomatch, anchap, C, LE, RE,0); makeobsCdengenmkv2(obsCden, nullCden, data, mutmatch, mutnomatch, anchap, C, LE, RE,0); } makeinitgenmkv2(initstore,scratchvec,x,tau,p,C,LE,RE,0); //initstore.dump(); // for (i=0;i0) makecstarmutgenmkv2(cstarmut, cstar,gamma,data,anchap, mutmatch,C,LE,RE); //for (i=0;i=maxiter) { //errorfs << "Warning: Algorithm did not converge for anc. hap" //<< endl; //for (i=LE;i<=RE;i++) cout << anchap[i] << " "; cout << endl; } //make model loglikelihood makemutmatch(mutmatch, mutnomatch, mut, allelecounts, tau, C); makeobsgenmkv2(obsprobs,nullprobs, data,mutmatch,mutnomatch,anchap,C,LE,RE,0); makeobsCnumgenmkv2(obsCnum, nullCnum, data, mutmatch, mutnomatch, anchap, C, LE, RE,0); makeobsCdengenmkv2(obsCden, nullCden, data, mutmatch, mutnomatch, anchap, C, LE, RE,0); makeinitgenmkv2(initstore,scratchvec,x,tau,p,C,LE,RE,0); maketransgenmkv2(transstore, scratchvec,x, tau, p, C, LE, RE,0); maketransCgenmkv2(transstoreC, scratchvec,y, tau, p, C, LE, RE,0); makealphagenmkv2(alpha, initstore, transstore, transstoreC, obsprobs, obsCnum, obsCden, C, LE, RE); makeloglikgenmkv2(loglikvec,alpha, initstore, obsprobs, C, RE); modellik=0; for (g=0;g0) //cout << C << " " << E_copy << " " << LE << " " << RE << " " // << d2LE << " " << d2RE << " (updated values)" << endl; /*we now estimate parameters for each candidate ancestral haplotype*/ for (k=0; kmapest && mapest>0) numkeep=mapest; else if (mapest>0 && numkeep==0) { if (numcand<=mapest) numkeep=numcand; else numkeep=mapest; } //when not mapping, the ancestral haplotype may not be completely //reconstructed if (numkeep==0) { //cout << "extension?" << endl; numkeep=findnumkeep(candres, -dirjump, threshfix, threshvar, numcand); if (numkeep==0 || LE-dirjump<0 || RE-dirjump>=data.dimy()) { //cout << "No" << endl; break; } else { //cout << "Yes" << endl; dirjump=-dirjump; } } cleantemps(candanc, candres, numcand, numkeep, LE, RE); } for (i=LE;i<=RE;i++) anchap[i]=candanc(0,i); for (j=0;j<10;j++) resultsmax[j]=candres(0,j); // resultsmax[10]=0; // resultsmax[11]=0; }//maxmkv2