/* MQLS.c C-program for the MQLS test, the WQLS test, and the corrected chi-squared test for case-control association testing in samples with related individuals * * Version 1.5 * * *********************************************** 12-03-2010 Timothy Thornton a) All three association statistics are now calculated using the variance estimator of Equation (3) of Thornton and McPeek (2010), which is a robust variance estimator that relaxes the Hardy-Weinberg Equilibrium (HWE) assumption under the null hypothesis. The previous versions use a variance estimator that assumes HWE in the founders. b) The p-values for every SNP are printed to a file named "MQLStest.pvalues" ************************************************* 09-18-2009 Timothy Thornton Fixed major bug that previously crashed the program when there are no members of a family with available genotyped data at a SNP. ****************************** 09-18-2009 Mark Kvale (kvalem@humgen.ucsf.edu) a) Fixed memory problem where family struct is now allocated only once for each family instead of for every marker b) Deallocated memory for temporary matrices in the makeFreqMat subroutine ************************************************* 02-23-2009 Mary Sara McPeek a) Fixed another memory allocation problem. b) Changed printing of p-values so that they will be in either scientific or floating point notation, whichever is shorter. This is intended to eliminate the problem of very small p-values being reported as .000000 in floating point notation. c) Changed the way MZ twins are identified. *************************************************************** 03-18-2008 Daniel E. Weeks (weeks@pitt.edu) -- a) Memory allocation has been corrected, so that MQLS should not as easily run out of memory when analyzing datasets with a lot of people b) Better handling of files with too many markers: It now prints an error message and exits if the maximum number of markers (MAXMARK) has not been set large enough. ************************************************************ 07-20-2007 Timothy Thornton - Added the MQLS test to the program **************************************************************** 09-13-2002 Catherine Bourgain 07-31-2003 Catherine Bourgain - correct small bug on lines 341 and 354 08-6-2003 Catherine Bourgain - add s.d. for allele frequency estimates based on QL and print S_allsample=2*(the sum of all entries in the inverse of the covariance matrix) - add the output of the score value for each allele when the p-value is smaller than 5%. The sign of this score allows to know which allele is associated with the cases 10-22-2003 Catherine Bourgain -name of data and kinship files must be entered in the command line. File name chosen by the user. 10-24-2003 Catherine Bourgain -each independent family is treated separatly. Kinship coefficients should be computed within each family only 11-05-2003 Catherine Bourgain -may analyze several markers at the same time. -creates an CC-QLStest.err file listing the individuals from the marker data file and from the kinship coefficient file that cannot be used in the analysis. -kinship coefficients do not need to be ordered following the order of the individuals in the marker data file. 01-07-04 Catherine Bourgain - correct a bug in the readkincoef function - able to compute the statistics when an allele numbered between 1 and M, has frequency zero 06-02-04 Catherine Bourgain - correct another bug in the readkincoef function 06-11-04 Catherine Bourgain - change several memory allocations to allow the analysis of larger family sets 06-13-04 Catherine Bourgain - add a parameter to check that QL-frequency estimates are non negative. Skip QL computation for such markers 08-09-05 Catherine Bourgain -correct memory allocation procedures and add a test that the number of families does not exceed MAXFAM 03-14-06 Catherine Bourgain - correct a bug in the makeFreqMat function - correct memory allocation problems throughout the program Two files are required : a linkage data file a file with the kinship coefficient of all ind of the markid file. Notes : marker alleles should be named from 1 to M Output is a CC-QLStest.out file with - allele frequency estimates in cases, controls, overall sample of cases and controls obtained by both naive counting and using the allele-based quasi-likelihood estimator - the statistic value of the corrected chi2 test for case-control association and the corresponding p-value - the statistic value of the Case-control quasi-likelihood score test and and the corresponding p-value Note that both p-values are computed using the chi2 distribution as the null distribution of the statistics, which is true as long as the allele frequency are not too small. In pratice, the chi2 approximation might hold as long as non of the observed counts of alleles are greater than 5 in cases and in controls. A warning message is printed in the output file if such a situation occurs */ #include #include #include #include "nrutil.h" #include #include "nrutil.c" #include "cholesky.c" #include "chisq.c" #define MAXFAM 500 #define MAXMARK 6500 #define MAXTOP 20 struct FAM { int N; //total number of individuals in a family int Pheno; //number of individuals phenotyped in a family int NoPheno; // number of individuals not phenotyped in a family int Unknown; // number of individuals removed from the analysis double **cholent; double **chol; int *MissingVec; // list of individuals with missing genotype int *descri; // array of correspondance between rank in the family and Id's double **MZ; //THIS IS FOR INDIVIDUALS THAT ARE MZ TWINS double **AVEC; //THIS IS THE A PHENOTYPE VECTOR USING EVERYONE }; typedef struct Nsize { int N; //total number of ind int Nc; // number of cases int Nt; //number of controls }Nsize; struct MARKER { int Nball; //number of alleles int Nc; //number of cases with known genotype int Nt; //number of controls with known genotype int Nu; //number of unkown controls with known genotype int ***mark; //3-dim array. 1st dimension: family number; 2nd dim: indiv rank; 3rd dim has 3 components [1]:allele1,[2]:allele2 and [3]:status Nsize *typed; }; /* M is the number of alleles N is the number of individuals in the marker data file NPheno is the number of phenotyped individuals NnoPheno the number of not phenotyped individuals NnoUnknown is the number of not individuals that were removed from the anlysis (this is for option 2 since individuals that are not phenotyped are removed) F number of independent families outfile is the result file, genofile is the marker data file and idfile is the kinshipcoef file famdata is an array of FAM with particular information regarding each family Mark is an array of MARKER with particular information for each marker Storekin is a 3-dim array storing the inbreeding and kinship coefficients in each family. */ int M=0,N,F,NbMark,NPheno=0,NnoPheno=0,NUnknown=0,Naffected=0,NUnaffected=0; FILE *outfile,*genofile,*idfile,*errfile,*penfile,*sigfile,*pvfile; struct FAM famdata[MAXFAM]; struct MARKER Mark[MAXMARK]; double ***Storekin; double Kp; int TOTALMZCOUNT=0; int Option; int BIGMARKER; char **TOPSNPNAME,**TOPSNPNAME2,**TOPSNPNAME3; void readpen(char *name); void readcar(char *name); void readdata (struct MARKER *Mark,struct FAM *famdata,FILE *errfile); void readGenotypes(int fam,int m,int length,double **cholaug,double **cholaugCase,double **cholaugControl,int *MissingVec,int miss,double **kincoefmatrix,double **kincoefmatrixCase,double **kincoefmatrixControl); void readkincoef(char *name,FILE *errfile,struct FAM *famdata,double ***Storekin); void alleleFreq(double **cholaug,double *frequency,int size, double *denominator); void naiveCount(double **cholaug,double *Naive,double *Naivefreq,int size); void getfrequency(double *Naivefreq,double *NaivefreqCase,double *NaivefreqControl,double *frequency,double *frequencyCase,double *frequencyControl,double denominator,double denomcases,double denomcontrols,int Nall,int Ncase,int Ncontrol); void modifcholaug(double **cholent,double **cholaug,double **chol,double *frequency,int size); int makeFreqMat(double **freqMatrix,double *frequency); void comput_info_score(double **cholaug,double **freqMatrix,double *infoQL_rr,double *infoQL_rf,double *infoQL_ff,double *Rvector,int size); void from_info2_chi2(double info_rr,double info_rf,double info_ff,double *Naive,double *Naivefreq,double **freqNaive,double *chi2val,int Nall,int Ncase); void from_info2_score(double **freqMatrix,double infoQL_rr,double infoQL_rf,double infoQL_ff,double *Rvector,double *testval); void comput_info_chi2(double **kincoefMatrix,double **cholaug,double *info_rr,double *info_rf,double *info_ff,int size); int findInd(int Id1,int family); void comput_info_scoreMQLS(double **cholaug,double **freqMatrix,double *infoQL_rr,double *infoQL_rf,double *infoQL_ff,double *Rvector,int size); void GetMZtwins(int fam,int length); void vecsrt(double *d, int *M,int n); void robustvar(double **cholaug,double *ROBUST_11,double *YYvector,double *Y1vector,int size,int *Nstu); void from_info2_scoreROBUST(double **freqMatrix,double infoQL_rr,double infoQL_rf,double infoQL_ff,double *Rvector,double *testval,double ROBUST_11,double *YYvector,double *Y1vector, int Nstu,double *ROBUSTVAR); void from_info2_chi2_ROBUST(double info_rr, double info_rf, double info_ff, double *Naive,double *Naivefreq,double **freqNaive,double *chi2val,int Nall,int Ncase,double ROBUST_11,double *YYvector,double *Y1vector, int Nstu,double *ROBUSTVAR); void vecsrt2(double *d, int *M,char **NAME,int n); int main (int argc, char *argv[]) { /* cholent stores the allelic information in the M first columns. Column M+1 contains 2, column M+2 contains 2 for cases and 0 for controls cholaug stores cholent times the inverse of the cholesky decompo of the covariance matrix chol stores the cholesky decomposition of kincoefMatrix kincoefMatrix stores the kinship based covariance Matrix frequency stores the frequency estimates freqMatrix is a matrix that is a function of the M-1 allele frequencies MissingVec lists all individual number for whom genotype or phenotype info is missing dmatrix and dvector are available from nrutil.c */ double **cholaug,**cholentCase,**cholentControl; double **kincoefMatrix,**kincoefMatrixCase,**kincoefMatrixControl; double *frequency,*frequencyCase,*frequencyControl; double **freqMatrix; double testval=0,chi2val=0, denominator=0, denomcases=0, denomcontrols=0; double *Naive,*Naivefreq,**freqNaive,*NaiveCase,*NaiveControl,*NaivefreqCase,*NaivefreqControl,*Rvector; double info_rr=0,info_rf=0,info_ff=0,infoQL_rr=0,infoQL_rf=0,infoQL_ff=0; int i,j,fam,m,followcase=0,followcontrol=0,followall=0,df=0,Npas,negfreq=0; double MQLSval=0,MQLSvalrobust=0,chi2valrobust=0,testvalrobust=0; double *Pvalues,*Pvalues2,*Pvalues3; int *Porder,*Porder2,*Porder3,top; double *YYvector,*Y1vector,ROBUST_11,*ROBUSTVAR; int Nstu; int *TOPPorder,*TOPPorder2,*TOPPorder3; double *TOPPvalues,*TOPPvalues2,*TOPPvalues3,pval1,pval2,pval3; char tempname[100]; if (argc!=5) {printf("Wrong usage. Should be ./MQLStest markerdata_file kinshipcoef_file prevelance_file 1 or ./MQLStest markerdata_file kinshipcoef_file prevelance_file 2 \n"); exit(1); } Kp=0; readpen(argv[3]); Option = atoi( argv[4] ); if ( Option != 1 && Option != 2 ) { printf("\nIncorrect input: %s\n\n", argv[4]); printf("Wrong usage. Should be ./MQLStest markerdata_file kinshipcoef_file prevelance_file 1 (or 2)\n\n"); printf(" use 1, if you want individuals who are not phenotyped to be included in the analysis\n"); printf(" use 2, if you want to exclude individuals who are not phenotyped\n\n"); exit(0); } readcar(argv[1]); if (NbMark > MAXMARK) {printf("ERROR: The number of markers (%d) exceeds MAXMARK (%d)\n",NbMark,MAXMARK); exit(2); } for (m=1;m<=NbMark;m++) { Mark[NbMark].Nball=0; Mark[NbMark].Nc=0; Mark[NbMark].Nt=0; Mark[NbMark].Nu=0; Mark[m].typed = (Nsize *)malloc((size_t) ((MAXFAM+1)*sizeof(Nsize))); if (!Mark[m].typed) {printf("Error in memory allocation 1\n"); exit(1);} Mark[m].mark = (int ***)malloc((size_t) ((MAXFAM+1)*sizeof(int**))); if (!Mark[m].mark) {printf("Error in memory allocation 2\n"); exit(1);} for (i=0;i<=F;i++) { Mark[m].typed[i].N=0; Mark[m].typed[i].Nc=0; Mark[m].typed[i].Nt=0; Mark[m].mark[i] = (int **)malloc((size_t)(((famdata[i].N)+1)*sizeof(int*))); if (!Mark[m].mark[i]) {printf("Error in memory allocation 3\n"); exit(1);} for (j=0;j<=famdata[i].N;j++) {Mark[m].mark[i][j]= (int *)malloc((size_t) ((5)*sizeof(int))); if (!Mark[m].mark[i][j]) {printf("Error in memory allocation 4 %d %d %d\n",famdata[i].N,i,j); exit(1);} } /* ONLY NEED THIS MEMORY ALLOCATION ONCE FOR EACH FAMILY */ if(m==1) { famdata[i].NoPheno=0; famdata[i].Pheno=0; famdata[i].Unknown=0; famdata[i].descri=(int*)malloc(((famdata[i].N)+1)*sizeof(int)); if (!famdata[i].descri) {printf("Error in memory allocation 5\n"); exit(1);} famdata[i].MZ=(double **)malloc((size_t)(((famdata[i].N)+1)*sizeof(double*))); for (j=0;j<=famdata[i].N;j++) {famdata[i].MZ[j]=(double *)malloc((size_t) ((4)*sizeof(double)));} famdata[i].AVEC=(double **)malloc((size_t)(((famdata[i].N)+1)*sizeof(double*))); for (j=0;j<=famdata[i].N;j++) {famdata[i].AVEC[j]=(double *)malloc((size_t) ((4)*sizeof(double)));} } } } if((errfile=fopen("MQLStest.err", "w"))==NULL) { printf("Can't open MQLStest.err .\n"); exit(1); } readdata(Mark,famdata,errfile); Storekin = (double ***)malloc((size_t) ((F+1)*sizeof(double**))); for (i=0;i<=F;i++) { if (i==0) Npas=N; else Npas=famdata[i].Pheno+famdata[i].NoPheno; Storekin[i] = (double **)malloc((size_t) ((Npas+1)*sizeof(double*))); if (!Storekin[i]) {printf("Error in memory allocation for kinship coefficient storage\n"); exit(1);} for (j=0;j<=Npas;j++) { Storekin[i][j]= (double *)malloc((size_t) ((Npas+1)*sizeof(double))); if (!Storekin[i][j]) {printf("Error in memory allocation for kinship coefficient storage\n"); exit(1);} } } readkincoef(argv[2],errfile,famdata,Storekin); TOTALMZCOUNT=0; for (fam=1;fam<=F;fam++) {GetMZtwins(fam,famdata[fam].Pheno);} if((sigfile=fopen("MQLStest.top", "w"))==NULL) { printf("Can't open MQLStest.top.\n"); exit(1); } if((outfile=fopen("MQLStest.out", "w"))==NULL) { printf("Can't open MQLStest.out .\n"); exit(1); } fprintf(outfile,"******Results of the Case-control Association tests ******\n\n"); if(Option==1) {fprintf(outfile,"There are %d individuals from %d independent families. %d of the individuals are affected, %d of the individuals are unaffected, and %d of the individuals are of unknown phenotype.\n\n",NPheno,F,Naffected,NUnaffected,NUnknown); } if(Option==2) {fprintf(outfile,"There are %d individuals from %d independent families. %d of the individuals are affected, %d of the individuals are unaffected, and %d are of unknown phenotype. Option 2 was chosen by the user so individuals with unknown phenotypes were not used in the analysis.\n\n",NPheno,F,Naffected,NUnaffected,NUnknown); } fprintf(outfile,"The prevalence value used in the MQLS calculations is %lf \n\n",Kp); if(TOTALMZCOUNT>0) {fprintf(outfile,"There are %d MZ twin pairs \n\n",TOTALMZCOUNT);} TOPPvalues=dvector(0,MAXTOP); TOPPvalues2=dvector(0,MAXTOP); TOPPvalues3=dvector(0,MAXTOP); TOPPorder=ivector(0,MAXTOP); TOPPorder2=ivector(0,MAXTOP); TOPPorder3=ivector(0,MAXTOP); TOPSNPNAME=(char **)malloc((size_t) ((MAXTOP+1)*sizeof(char*))); TOPSNPNAME2=(char **)malloc((size_t) ((MAXTOP+1)*sizeof(char*))); TOPSNPNAME3=(char **)malloc((size_t) ((MAXTOP+1)*sizeof(char*))); for(j=1;j<=MAXTOP;j++) {TOPSNPNAME[j]=(char *)malloc((size_t) ((100)*sizeof(char))); TOPSNPNAME2[j]=(char *)malloc((size_t) ((100)*sizeof(char))); TOPSNPNAME3[j]=(char *)malloc((size_t) ((100)*sizeof(char))); } if((pvfile=fopen("MQLStest.pvalues", "w"))==NULL) { printf("Can't open MQLStest.pvalue.\n"); exit(1); } fprintf(pvfile,"Marker_Number \t MQLS_Robust \t CCHI_Robust \t WQLS_Robust\n"); for (m=1;m<=NbMark;m++) { BIGMARKER=m; negfreq=0; M=Mark[m].Nball; freqMatrix=dmatrix(1,M,1,M); frequency=dvector(1,M); frequencyCase=dvector(1,M); frequencyControl=dvector(1,M); Naive=dvector(1,M); NaiveCase=dvector(1,M); NaiveControl=dvector(1,M); Naivefreq=dvector(1,M); Rvector=dvector(1,M); NaivefreqCase=dvector(1,M); NaivefreqControl=dvector(1,M); freqNaive=dmatrix(1,M,1,M); testval=0; chi2val=0; denominator=0; denomcases=0; denomcontrols=0; info_rr=0; info_rf=0; info_ff=0; infoQL_rr=0; infoQL_rf=0; infoQL_ff=0; MQLSval=0; ROBUSTVAR=dvector(1,M); YYvector=dvector(1,M); Y1vector=dvector(1,M); ROBUST_11=0; Nstu=0; for (i=1;i<=M;i++) { frequency[i]=0; frequencyCase[i]=0; frequencyControl[i]=0; Naive[i]=0; NaiveCase[i]=0; NaiveControl[i]=0; Naivefreq[i]=0; NaivefreqCase[i]=0; NaivefreqControl[i]=0; YYvector[i]=0; Y1vector[i]=0; ROBUSTVAR[i]=0; if (i0) { followall=1; famdata[fam].cholent=dmatrix(1,Mark[m].typed[fam].N,1,M+3); famdata[fam].chol=dmatrix(1,Mark[m].typed[fam].N,1,Mark[m].typed[fam].N); cholaug=dmatrix(1,Mark[m].typed[fam].N,1,M+3); kincoefMatrix=dmatrix(1,Mark[m].typed[fam].N,1,Mark[m].typed[fam].N); } if ((famdata[fam].Pheno-Mark[m].typed[fam].N)>0) famdata[fam].MissingVec=ivector(1,famdata[fam].Pheno-Mark[m].typed[fam].N); if (Mark[m].typed[fam].Nc>0){ followcase=1; cholentCase=dmatrix(1,Mark[m].typed[fam].Nc,1,M+2); kincoefMatrixCase=dmatrix(1,Mark[m].typed[fam].Nc,1,Mark[m].typed[fam].Nc); } if (Mark[m].typed[fam].Nt>0){ followcontrol=1; cholentControl=dmatrix(1,Mark[m].typed[fam].Nt,1,M+2); kincoefMatrixControl=dmatrix(1,Mark[m].typed[fam].Nt,1,Mark[m].typed[fam].Nt); } readGenotypes(fam,m,famdata[fam].Pheno,famdata[fam].cholent,cholentCase,cholentControl,famdata[fam].MissingVec,famdata[fam].Pheno-Mark[m].typed[fam].N,kincoefMatrix,kincoefMatrixCase,kincoefMatrixControl); /*computing allele frequencies in the cases... */ if (followcase==1) { naiveCount(cholentCase,NaiveCase,NaivefreqCase,Mark[m].typed[fam].Nc); if (cholesky(kincoefMatrixCase,Mark[m].typed[fam].Nc,cholentCase,M+1,kincoefMatrixCase,cholentCase,1)!=1) {printf("cholesky decomposition of the case cov matrix failed for family %d. Might be due to inconsistent kinship coefficient values...\n",fam); exit(1); } alleleFreq(cholentCase,frequencyCase,Mark[m].typed[fam].Nc, &denomcases); } /*computing allele frequencies in the controls... */ if (followcontrol==1) { naiveCount(cholentControl,NaiveControl,NaivefreqControl,Mark[m].typed[fam].Nt); if (cholesky(kincoefMatrixControl,Mark[m].typed[fam].Nt,cholentControl,M+1,kincoefMatrixControl,cholentControl,1)!=1) { printf("cholesky decomposition of the control cov matrix failed for family %d. Might be due to inconsistent kinship coefficient values...\n",fam); exit(1); } alleleFreq(cholentControl,frequencyControl,Mark[m].typed[fam].Nt, &denomcontrols); } /*computing overall allele frequencies and statistics...*/ if (followall==1) { naiveCount(famdata[fam].cholent,Naive,Naivefreq,Mark[m].typed[fam].N); comput_info_chi2(kincoefMatrix,famdata[fam].cholent,&info_rr,&info_rf,&info_ff,Mark[m].typed[fam].N); if (cholesky(kincoefMatrix,Mark[m].typed[fam].N,famdata[fam].cholent,M+2,famdata[fam].chol,cholaug,1)!=1) {printf("cholesky decomposition of the cov matrix failed for family %d. Might be due to inconsistent kinship coefficient values...\n",fam); exit(1); } alleleFreq(cholaug,frequency,Mark[m].typed[fam].N, &denominator); robustvar(cholaug,&ROBUST_11,YYvector,Y1vector,Mark[m].typed[fam].N,&Nstu); } if (Mark[m].typed[fam].N>0) free_dmatrix(cholaug,1,Mark[m].typed[fam].N,1,M+2); if (Mark[m].typed[fam].N>0) free_dmatrix(kincoefMatrix,1,Mark[m].typed[fam].N,1,Mark[m].typed[fam].N); if (Mark[m].typed[fam].Nc>0) free_dmatrix(kincoefMatrixCase,1,Mark[m].typed[fam].Nc,1,Mark[m].typed[fam].Nc); if (Mark[m].typed[fam].Nt>0) free_dmatrix(kincoefMatrixControl,1,Mark[m].typed[fam].Nt,1,Mark[m].typed[fam].Nt); if (Mark[m].typed[fam].Nc>0) free_dmatrix(cholentCase,1,Mark[m].typed[fam].Nc,1,M+1); if (Mark[m].typed[fam].Nt>0) free_dmatrix(cholentControl,1,Mark[m].typed[fam].Nt,1,M+1); } getfrequency(Naivefreq,NaivefreqCase,NaivefreqControl,frequency, frequencyCase,frequencyControl,denominator,denomcases,denomcontrols,Mark[m].Nc+Mark[m].Nt,Mark[m].Nc,Mark[m].Nt); makeFreqMat(freqNaive,Naivefreq); /* Remove the below comment to calculate the orignial Corrected chi-squared that assumes HWE in the variance calculation */ /* from_info2_chi2(info_rr,info_rf,info_ff,Naive,Naivefreq,freqNaive,&chi2val,Mark[m].Nc+Mark[m].Nt,Mark[m].Nc); */ if (makeFreqMat(freqMatrix,frequency)==1) {negfreq=1;} if (negfreq==0) { for (fam=1;fam<=F;fam++) { if (Mark[m].typed[fam].N>0) { cholaug=dmatrix(1,Mark[m].typed[fam].N,1,M+3); modifcholaug(famdata[fam].cholent,cholaug,famdata[fam].chol,frequency,Mark[m].typed[fam].N); comput_info_scoreMQLS(cholaug,freqMatrix,&infoQL_rr,&infoQL_rf,&infoQL_ff,Rvector,Mark[m].typed[fam].N); free_dmatrix(cholaug,1,Mark[m].typed[fam].N,1,M+3); } } /* Remove the below comment to calculate the orignal MQLS that assumes HWE in the variance calculation */ /* from_info2_score(freqMatrix,infoQL_rr,infoQL_rf,infoQL_ff,Rvector,&MQLSval); */ from_info2_scoreROBUST(freqMatrix,infoQL_rr,infoQL_rf,infoQL_ff,Rvector,&MQLSvalrobust,ROBUST_11,YYvector,Y1vector,Nstu,ROBUSTVAR); from_info2_chi2_ROBUST(info_rr,info_rf,info_ff,Naive,Naivefreq,freqNaive,&chi2valrobust,Mark[m].Nc+Mark[m].Nt,Mark[m].Nc,ROBUST_11,YYvector,Y1vector,Nstu,ROBUSTVAR); } /*output printing ... */ sprintf(tempname,"%d",BIGMARKER); fprintf(outfile,"****************************************"); fprintf(outfile,"\n\nAnalysis of Marker %d \n\n",m); fprintf(outfile,"****************************************\n"); if(Option==1) {fprintf(outfile,"There are %d affected individuals, %d unaffected individuals, and %d individuals of unknown phenotype available. \n\n",Mark[m].Nc,Mark[m].Nt-Mark[m].Nu,Mark[m].Nu); } if(Option==2) {fprintf(outfile,"%d affected individuals and %d unaffected individuals available.\n\n",Mark[m].Nc,Mark[m].Nt); } fprintf(outfile,"*****************************************\n"); pval1=1; pval2=1; pval3=1; if (Mark[m].Nt!=0 && Mark[m].Nc!=0) {df=0; for (i=1;i<=M-1;i++) if (frequency[i]!=0) df++; fprintf(outfile,"MQLS test using robust variance estimate\n\n"); if (negfreq==0) { /* Remove the below comment to print the orignal MQLS output that assumes HWE in the variance calculation */ /* fprintf(outfile,"MQLS statistic value = %f\t pvalue = %g df = %d\n\n",MQLSval,pochisq(MQLSval,df),df); */ fprintf(outfile,"MQLS statistic value = %f\t pvalue = %g df = %d\n\n",MQLSvalrobust,pochisq(MQLSvalrobust,df),df); pval1=pochisq(MQLSvalrobust,df); if (pochisq(MQLSvalrobust,df)<=0.05) { for (i=1;i<=M-1;i++) { if (Rvector[i]>0) fprintf(outfile,"Frequency of allele %d is increased in the cases (quasi-score associated to this allele is %.4f)\n\n",i,Rvector[i]); if (Rvector[i]<0) fprintf(outfile,"Frequency of allele %d is increased in the controls (quasi-score associated to this allele is %.4f)\n\n",i,Rvector[i]); if (Rvector[i]==0) fprintf(outfile,"Frequency of allele %d is the same in cases and controls (quasi-score associated to this allele is 0)\n\n",i); } } for (i=1;i<=M;i++) {if (((Mark[m].Nc)*frequencyCase[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of type %d alleles in cases\n",i); if (((Mark[m].Nt)*frequencyControl[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of type %d alleles in controls\n",i); } } else printf("Computation of the MQLS statistic is not possible\n\n"); df=0; for (i=1;i<=M-1;i++) if (frequency[i]!=0) df++; fprintf(outfile,"\n*****************************************\n"); fprintf(outfile,"Case-control corrected chi-squared test using robust variance estimate \n\n"); /* Remove the below comment to print the orignal Corrected chi-squared output that assumes HWE in the variance calculation */ /* fprintf(outfile,"Corrected chi2 statistic value = %f\t pvalue = %g df = %d\n\n",chi2val,pochisq(chi2val,df),df); */ fprintf(outfile,"Corrected chi-squared statistic value = %f\t pvalue = %g df = %d \n\n",chi2valrobust,pochisq(chi2valrobust,df),df); pval2=pochisq(chi2valrobust,df); for (i=1;i<=M;i++) {if (((Mark[m].Nc)*NaivefreqCase[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of allele %d in cases\n\n",i); if (((Mark[m].Nt)*NaivefreqControl[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of allele %d in controls\ni\n",i); } /********* WQLS CALCULATIONS *********/ info_rr=0; info_rf=0; info_ff=0; infoQL_rr=0; infoQL_rf=0; infoQL_ff=0; testval=0; for(j=1;j<=M;j++) {Rvector[j]=0;} for (fam=1;fam<=F;fam++) { if (Mark[m].typed[fam].N>0) { cholaug=dmatrix(1,Mark[m].typed[fam].N,1,M+3); modifcholaug(famdata[fam].cholent,cholaug,famdata[fam].chol,frequency,Mark[m].typed[fam].N); comput_info_score(cholaug,freqMatrix,&infoQL_rr,&infoQL_rf,&infoQL_ff,Rvector,Mark[m].typed[fam].N); free_dmatrix(cholaug,1,Mark[m].typed[fam].N,1,M+3); free_dmatrix(famdata[fam].cholent,1,Mark[m].typed[fam].N,1,M+3); free_dmatrix(famdata[fam].chol,1,Mark[m].typed[fam].N,1,Mark[m].typed[fam].N); } } /* Remove the below comment to calculate the orignal WQLS that assumes HWE in the variance calculation */ /* from_info2_score(freqMatrix,infoQL_rr,infoQL_rf,infoQL_ff,Rvector,&testval); */ from_info2_scoreROBUST(freqMatrix,infoQL_rr,infoQL_rf,infoQL_ff,Rvector,&testvalrobust,ROBUST_11,YYvector,Y1vector,Nstu,ROBUSTVAR); fprintf(outfile,"*****************************************\n"); fprintf(outfile,"WQLS test using robust variance estimate \n\n"); if (negfreq==0) { /* Remove the below comment to print the orignal WQLS output that assumes HWE in the variance calculation */ /* fprintf(outfile,"WQLS statistic value = %f\t pvalue = %g df = %d\n\n",testval,pochisq(testval,df),df); */ fprintf(outfile,"WQLS statistic value = %f\t pvalue = %g df = %d\n\n",testvalrobust,pochisq(testvalrobust,df),df); pval3=pochisq(testvalrobust,df); if (pochisq(testvalrobust,df)<=0.05) { for (i=1;i<=M-1;i++) { if (Rvector[i]>0) fprintf(outfile,"Frequency of allele %d is increased in the cases (quasi-score associated to this allele is %.4f)\n\n",i,Rvector[i]); if (Rvector[i]<0) fprintf(outfile,"Frequency of allele %d is increased in the controls (quasi-score associated to this allele is %.4f)\n\n",i,Rvector[i]); if (Rvector[i]==0) fprintf(outfile,"Frequency of allele %d is the same in cases and controls (quasi-score associated to this allele is 0)\n\n",i); } } for (i=1;i<=M;i++) {if (((Mark[m].Nc)*frequencyCase[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of type %d alleles in cases\n",i); if (((Mark[m].Nt)*frequencyControl[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of type %d alleles in controls\n",i); } } else printf("Computation of the WQLS statistic is not possible\n\n"); } else { if ((Mark[m].Nc)==0) fprintf(outfile,"\n\nTest statistics not computed because there are no cases in the sample \n\n"); else fprintf(outfile,"\n\nTest statistics not computed because there are no controls in the sample \n\n"); } fprintf(outfile,"\n*****************************************\n"); if (negfreq==0) { fprintf(outfile,"allele frequency estimates using the Best Linear Unbiased Estimator (BLUE) in \n"); fprintf(outfile,"\t\t\t cases \t\t\t controls \t\t\t all sample \n"); for (i=1;i<=M;i++) fprintf(outfile,"allele %d : freq = %.4f sd = %.4f\t freq = %.4f sd = %.4f\t\t freq = %.4f sd = %.4f\n",i,frequencyCase[i],sqrt(2*frequencyCase[i]*(1-frequencyCase[i])/denomcases),frequencyControl[i],sqrt(2*frequencyControl[i]*(1-frequencyControl[i])/denomcontrols),frequency[i],sqrt(2*frequency[i]*(1-frequency[i])/denominator)); } else fprintf(outfile,"QL computation of allele frequencies gives negative values....\n\nskipped... \n\nUse naive estimates\n\n"); fprintf(outfile,"*****************************************\n"); fprintf(outfile,"allele frequency estimates using naive counting in\n"); fprintf(outfile,"\t\t cases \t\t controls \t\t all sample \n"); for (i=1;i<=M;i++) fprintf(outfile,"allele %d : freq = %.4f\t freq = %.4f\t\t freq = %.4f\n",i,NaivefreqCase[i],NaivefreqControl[i],Naivefreq[i]); fprintf(outfile,"*****************************************\n"); fprintf(outfile,"\n\n\n\n"); fprintf(pvfile,"%d \t \t %g \t %g \t %g \n",BIGMARKER,pval1,pval2,pval3); if(BIGMARKER==1) {TOPPvalues[BIGMARKER]=pval1; TOPPorder[BIGMARKER]=BIGMARKER; strncpy(TOPSNPNAME[BIGMARKER],tempname,100); TOPPvalues2[BIGMARKER]=pval2; TOPPorder2[BIGMARKER]=BIGMARKER; strncpy(TOPSNPNAME2[BIGMARKER],tempname,100); TOPPvalues3[BIGMARKER]=pval3; TOPPorder3[BIGMARKER]=BIGMARKER; strncpy(TOPSNPNAME3[BIGMARKER],tempname,100); } else {if(BIGMARKER<=MAXTOP) { TOPPvalues[BIGMARKER]=pval1; TOPPorder[BIGMARKER]=BIGMARKER; strncpy(TOPSNPNAME[BIGMARKER],tempname,100); vecsrt2(TOPPvalues,TOPPorder,TOPSNPNAME,BIGMARKER); TOPPvalues2[BIGMARKER]=pval2; TOPPorder2[BIGMARKER]=BIGMARKER; strncpy(TOPSNPNAME2[BIGMARKER],tempname,100); vecsrt2(TOPPvalues2,TOPPorder2,TOPSNPNAME2,BIGMARKER); TOPPvalues3[BIGMARKER]=pval3; TOPPorder3[BIGMARKER]=BIGMARKER; strncpy(TOPSNPNAME3[BIGMARKER],tempname,100); vecsrt2(TOPPvalues3,TOPPorder3,TOPSNPNAME3,BIGMARKER); } else {if(pval1=BIGMARKER) {top=BIGMARKER;} fprintf(sigfile,"Below is a list of the top %d markers with the smallest p-values using the MQLS Robust test statistic \n\n",top); fprintf(sigfile,"**************************************************\n\n"); for (m=1;m<=top;m++) {fprintf(sigfile,"MARKER %d: %s has a p-value of %g\n",TOPPorder[m],TOPSNPNAME[m],TOPPvalues[m]);} fprintf(sigfile,"\n\n############################################### \n\n"); fprintf(sigfile,"Below is a list of the top %d markers with the smallest p-values using the CCHI Robust test statistic \n\n",top); fprintf(sigfile,"**************************************************\n\n"); for (m=1;m<=top;m++) {fprintf(sigfile,"MARKER %d: %s has a p-value of %g\n",TOPPorder2[m],TOPSNPNAME2[m],TOPPvalues2[m]);} fprintf(sigfile,"\n\n############################################### \n\n"); fprintf(sigfile,"Below is a list of top %d markers with the smallest p-values using the WQLS Robust test statistic \n\n",top); fprintf(sigfile,"**************************************************\n\n"); for (m=1;m<=top;m++) {fprintf(sigfile,"MARKER %d: %s has a p-value of %g\n",TOPPorder3[m],TOPSNPNAME3[m],TOPPvalues3[m]);} fclose(sigfile); fclose(outfile); return 0; } /****************************************************** Reads the number of markers (NbMark), of families (F) and the total number of individuals listed in the linkage data file (N). The number of markers is calculated from the number of columns in the first line. *****************************************************/ void readcar (char *name) { int all1=0, all2=0,fam=0,famold=1,n=0; int indnumber=0,ind=0; char line[4*(MAXMARK+3)]; int length=4*(MAXMARK+3); if((genofile=fopen(name, "r"))==NULL) { printf("Can't open %s\n",name); exit(1); } NbMark=0; fscanf(genofile,"%*d %*d %*d %*d %*d %*d "); fgets(line,length,genofile); n=sscanf(line,"%d %d %[^\n]",&all1,&all2,line); if (n<2) { printf("No marker to test. Please check first line in marker data file\n\n"); exit(1); } else { NbMark++; while (n==3) { n=sscanf(line,"%d %d %[^\n]",&all1,&all2,line); if (n>=2) { NbMark++; } } } rewind(genofile); famdata[1].N=0; while((fscanf(genofile, "%d %d %*d %*d %*d %*[^\n]",&fam,&ind))==2) { if (fam==famold) famdata[fam].N++; else famdata[fam].N=1; if (fam!=famold && fam!=(famold+1)) {printf("Problems with family %d.\n Family should have following Id numbers from 1 to N\n",fam); exit(1);} indnumber++; famold=fam; } N=indnumber; F=fam; if (F>MAXFAM) { printf("Number of families exceeds the maximum number of families allowed\n. You should try to change the value of MAXFAM in the CC-QLStest.c file and recompile.\n\n"); exit(1);} rewind(genofile); } /****************************************************** Reads the marker data file. Family and individual information is stored in famdata[fam] and Mark[m].mark[fam] ranking the individuals in each family. famdata[fam].descri[rank]=Id gives the correspondence between rank and Id. *****************************************************/ void readdata (struct MARKER *Mark,struct FAM *famdata,FILE *errfile) { int all1=0, all2=0,i=0,fam=0,famold=1,n=0; int status=0,indnumber=0,ind=0,m=0,j,indiv=0; char line[4*(MAXMARK+3)]; int length=4*(MAXMARK+3); for (i=1;i<=NbMark;i++) Mark[i].Nball=0; while((fscanf(genofile, "%d %d %*d %*d %*d %d ",&fam,&ind,&status))==3) { if (fam!=famold && fam!=(famold+1)) { printf("Problems with family %d.\n Family should have following Id numbers from 1 to N\n ",fam); exit(1); } if (fam!=famold) {indnumber=0; NPheno+=famdata[famold].Pheno; NnoPheno+=famdata[famold].NoPheno; } if(Option==1) { if ((status<0) ||(status>2)) { fprintf(errfile,"individual %d from family %d does not have a phenotype value of 0, 1, or 2 ...skipped\n",ind,fam); fscanf(genofile,"%[^\n]",line); } else if (status==0 || status==1 || status==2) { if(status==0) {famdata[fam].Unknown++; NUnknown=NUnknown+1; } if(status==1) {NUnaffected++; } if(status==2) {Naffected++; } indnumber++; famdata[fam].Pheno++; famdata[fam].descri[indnumber]=ind; m=0; while (mMark[m].Nball) Mark[m].Nball=all1; if (all2>Mark[m].Nball) Mark[m].Nball=all2; } } } } /*** if then ***/ if(Option==2) { if (status ==0) { fprintf(errfile,"individual %d from family %d is not phenotyped...skipped\n",ind,fam); famdata[fam].NoPheno++; famdata[fam].Unknown++; fscanf(genofile,"%[^\n]",line); NUnknown=NUnknown+1; } else if (status==1 || status==2) { if(status==1) {NUnaffected++; } if(status==2) {Naffected++; } indnumber++; famdata[fam].Pheno++; famdata[fam].descri[indnumber]=ind; m=0; while (mMark[m].Nball) Mark[m].Nball=all1; if (all2>Mark[m].Nball) Mark[m].Nball=all2; } } } } /*** if then ***/ fscanf(genofile,"\n"); famold=fam; } NPheno+=famdata[fam].Pheno; NnoPheno+=famdata[fam].NoPheno; } /****************************************************** Construct the genotype and kinship coef matrices for the global sample, the case sample and the control sample. *****************************************************/ void readGenotypes(int fam,int m,int length,double **cholaug,double **cholaugCase,double **cholaugControl,int *MissingVec,int miss,double **kincoefmatrix,double **kincoefMatrixCase,double **kincoefmatrixControl) { int all1=0, all2=0,j=0,i=0,family=0,follow=0; int a1=0,tot=1,controls=1,cases=1; int nbc1=0,nbc2=0,nbt1=0,nbt2=0,nb1=0,nb2=0; double val1=0; for (i=1;i<=length;i++) //length is famdata[fam].NPheno { val1=0; if (Mark[m].mark[fam][i][1]!=0 && Mark[m].mark[fam][i][2]!=0) { nb1++; nb2=nb1; if (Storekin[fam][i][i]==-1) { printf("No inbreeding coefficient for individual %d from family %d. Please check...\n\n",i,fam); exit(1); } kincoefmatrix[nb1][nb1]=Storekin[fam][i][i]+1; if (Mark[m].mark[fam][i][3]==2) { nbc1++; nbc2=nbc1; kincoefMatrixCase[nbc1][nbc1]=kincoefmatrix[nb2][nb1]; for (j=i+1;j<=length;j++) { if (Storekin[fam][i][j]==-1) { printf("No kinship coefficient between individual %d and individual %d from family %d. Please check...\n\n",famdata[fam].descri[i],famdata[fam].descri[j],fam); exit(1); } if (Mark[m].mark[fam][j][1]!=0 && Mark[m].mark[fam][j][2]!=0) { nb2++; kincoefmatrix[nb1][nb2]=2*Storekin[fam][i][j]; kincoefmatrix[nb2][nb1]=kincoefmatrix[nb1][nb2]; if (Mark[m].mark[fam][j][3]==2) { nbc2++; kincoefMatrixCase[nbc1][nbc2]=kincoefmatrix[nb1][nb2]; kincoefMatrixCase[nbc2][nbc1]=kincoefMatrixCase[nbc1][nbc2]; } } } cholaug[tot][M+1]=2; cholaugCase[cases][M+1]=2; cholaug[tot][M+2]=2; cholaugCase[cases][M+2]=2; if(famdata[fam].MZ[i][1]!=-10) {cholaug[tot][M+2]=2*famdata[fam].MZ[i][2]; cholaugCase[cases][M+2]=2*famdata[fam].MZ[i][2]; /* printf("allocated the MZ case\n"); */ } for (a1=1;a1<=M;a1++) cholaug[tot][a1]=0; for(a1=1;a1<=M;a1++) { if (Mark[m].mark[fam][i][1]==a1) cholaug[tot][a1]++; if (Mark[m].mark[fam][i][2]==a1) cholaug[tot][a1]++; cholaugCase[cases][a1]=cholaug[tot][a1]; } cases++; tot++; } else if(Mark[m].mark[fam][i][3]==1||Mark[m].mark[fam][i][3]==0) { nbt1++; nbt2=nbt1; kincoefmatrixControl[nbt1][nbt1]=kincoefmatrix[nb2][nb1]; for (j=i+1;j<=length;j++) { if (Mark[m].mark[fam][j][1]!=0 && Mark[m].mark[fam][j][2]!=0) { nb2++; kincoefmatrix[nb1][nb2]=2*Storekin[fam][i][j]; kincoefmatrix[nb2][nb1]=kincoefmatrix[nb1][nb2]; if (Mark[m].mark[fam][j][3]==1||Mark[m].mark[fam][j][3]==0) { nbt2++; kincoefmatrixControl[nbt1][nbt2]=kincoefmatrix[nb1][nb2]; kincoefmatrixControl[nbt2][nbt1]=kincoefmatrixControl[nbt1][nbt2]; } } } cholaug[tot][M+1]=2; cholaugControl[controls][M+1]=2; cholaug[tot][M+2]=0; cholaugControl[controls][M+2]=0; if(famdata[fam].MZ[i][1]!=-10) { cholaug[tot][M+2]=2*famdata[fam].MZ[i][2]; cholaugControl[controls][M+2]=2*famdata[fam].MZ[i][2]; /* printf("allocated the MZ control\n"); */ } for (a1=1;a1<=M;a1++) cholaug[tot][a1]=0; for (a1=1;a1<=M;a1++) { if (Mark[m].mark[fam][i][1]==a1) cholaug[tot][a1]++; if (Mark[m].mark[fam][i][2]==a1) cholaug[tot][a1]++; cholaugControl[controls][a1]=cholaug[tot][a1]; } controls++; tot++; } } else { follow++; /* MissingVec[follow]=i; */ } } /* if ((cases-1)!=Mark[m].typed[fam].Nc || (controls-1)!=Mark[m].typed[fam].Nt) {printf("Problems while rereading the data file...\n"); exit(1); } if (follow!=miss) {printf("problem with missing data in family %d follow=%d miss=%d\n",fam,follow,miss); exit(1);} */ /** This is to get the phenotype vector for MQLS statistic **/ tot=1; nb1=0; for (i=1;i<=length;i++) //length is famdata[fam].NPheno {val1=0; if (Mark[m].mark[fam][i][1]!=0 && Mark[m].mark[fam][i][2]!=0) {nb1++; if(Option==1) {val1=famdata[fam].AVEC[i][1];} else { if(famdata[fam].MZ[i][1]!=-10) { val1+=famdata[fam].MZ[i][3]; } else { if (Mark[m].mark[fam][i][3]==2) { val1+=kincoefmatrix[nb1][nb1];} if (Mark[m].mark[fam][i][3]==1) { val1+=kincoefmatrix[nb1][nb1]*(-Kp/(1-Kp));} } nb2=0; for(j=1;j<=length;j++) { if (Mark[m].mark[fam][j][1]!=0 && Mark[m].mark[fam][j][2]!=0) {nb2++; if(j!=i) { if(famdata[fam].MZ[j][1]!=-10) { val1+=kincoefmatrix[nb1][nb2]*famdata[fam].MZ[j][3]; } else { if (Mark[m].mark[fam][j][3]==2) {val1+=kincoefmatrix[nb1][nb2]; } if (Mark[m].mark[fam][j][3]==1) {val1+=kincoefmatrix[nb1][nb2]*(-Kp/(1-Kp));} } } } } } /* if(tot==1&&fam==2) {val1=2; printf("in total\n");} if(tot==2&&fam==2) {val1=1+(-Kp/(1-Kp)); printf("in total\n");} printf("fam is %d and person counter is %d and MQLS val is %f and WQLS val is %f \n",fam,tot,val1,cholaug[tot][M+2]/2); */ cholaug[tot][M+3]=2*val1; /* printf("Weight value for individual %d from fam %d using genotyped individuals only is %lf and using all individuals is %lf \n",i,fam,val1,famdata[fam].AVEC[i][1]); */ /* printf("family is %d, person is %d and affected is %d and value is %.3lf, status is %.2lf and allelecount is %.2lf and total alleles is %.2lf \n",fam,i,Mark[m].mark[fam][i][3],cholaug[tot][M+3],cholaug[tot][M+2],cholaug[tot][1],cholaug[tot][M+1]); */ tot++; } } /* printf("\n\n"); */ } /****************************************************** Reads the kinship coefficients of the individual pairs from the kinshipcoef file and stores the values in the Storekin array. Storekin returns value -1 for pairs of individuals for which no coefficient is available *****************************************************/ void readkincoef(char *name,FILE *errfile, struct FAM *famdata,double ***Storekin) { double coef=0; int follow=0,j,i,n,famold=0; long int Id1,Id2; int family=0,ind1=0,ind2=0,indold1=0,indold2=0,Idold1=0,Idold2=0; int err_array[N]; int test=0,lim=0; for (i=1;i<=N;i++) err_array[i]=0; if((idfile=fopen(name, "r"))==NULL) { printf("Can't open %s\n",name); exit(1); } for (follow=1;follow<=F;follow++) for (i=1;i<=famdata[follow].Pheno;i++) for (j=i;j<=famdata[follow].Pheno;j++) { Storekin[follow][i][j]=-1; if (i!=j) Storekin[follow][j][i]=-1; } while (fscanf(idfile, "%d %ld %ld %lf\n",&family,&Id1,&Id2,&coef)==4) { if (family>F) { printf("Problem with family number %d. Family should have following Id numbers from 1 to N\n",family); exit(1); } if (family==famold) { if (Id1==Idold1) ind1=indold1; else if (Id1==Idold2) ind1=indold2; else ind1=findInd(Id1,family); if (Id2==Idold1) ind2=indold1; else if (Id2==Idold2) ind2=indold2; else ind2=findInd(Id2,family); } else { ind1=findInd(Id1,family); ind2=findInd(Id2,family); } famold=family; if (ind1!=0 && ind2!=0) { Storekin[family][ind1][ind2]=coef; Storekin[family][ind2][ind1]=coef; } if (ind1==0) { test=0; for (j=1;j<=lim;j++) if (Id1==err_array[j]) { test=1; j=lim+1; } if (test==0) { fprintf(errfile,"individual %ld from family %d is in the kinshipcoef file but is not phenotyped or not available from marker data file\n",Id1,family); lim++; err_array[lim]=Id1; } } if (ind2==0) { test=0; for (j=1;j<=lim;j++) if (Id2==err_array[j]) { test=1; j=lim+1; } if (test==0) { fprintf(errfile,"individual %ld from family %d is in the kinshipcoef file but is not phenotyped or not available from marker data file\n",Id2,family); lim++; err_array[lim]=Id2; } } Idold1=Id1; Idold2=Id2; indold1=ind1; indold2=ind2; } } /****************************************************** returns the rank of an individual in his family from its family number and Id *****************************************************/ int findInd(Id1,family) { int indpas=1; while (indpas<=famdata[family].Pheno) { if (famdata[family].descri[indpas]==Id1) return(indpas); indpas++; } return(0); } /****************************************************** Computes the frequency estimates using the quasilikelihood approach *****************************************************/ void alleleFreq(double **cholaug,double *frequency,int size, double *denominator) { int i=0, j=0; double pasden=0,numerator=0; for(i=1; i<=size; i++){ pasden=pasden+(cholaug[i][M+1])*(cholaug[i][M+1]); } for(j=1; j<=M; j++) { for(i=1; i<=size; i++) { numerator=numerator+cholaug[i][M+1]*cholaug[i][j]; } frequency[j]+=numerator; numerator=0; } *denominator+=pasden; } /****************************************************** Computes the frequency estimates using the naive counting *****************************************************/ void naiveCount(double **cholaug,double *Naive,double *Naivefreq,int size) { int i=0, j=0; double sum=0, contC=0; for(i=1; i<=M; i++){ contC=0; for(j=1; j<=size; j++) { sum+=cholaug[j][i]; if (cholaug[j][M+2]==2) contC+=cholaug[j][i]; } Naive[i]+=contC; Naivefreq[i]+=sum; sum=0; } } /****************************************************** To get from counts to frequencies *****************************************************/ void getfrequency(double *Naivefreq,double *NaivefreqCase,double *NaivefreqControl,double *frequency,double *frequencyCase,double *frequencyControl,double denominator,double denomcases,double denomcontrols,int Nall,int Ncase,int Ncontrol) { int i; for (i=1;i<=M;i++) { Naivefreq[i]/=(2*Nall); NaivefreqCase[i]/=(2*Ncase); NaivefreqControl[i]/=(2*Ncontrol); frequency[i]/=denominator; frequencyCase[i]/=denomcases; frequencyControl[i]/=denomcontrols; } } /****************************************************** Modify the cholaug matrix such as the M-1 first columns are equal to C-t*(Y-mu) instead of C-t*Y *****************************************************/ void modifcholaug(double **cholent,double **cholaug,double **chol,double *frequency,int size) { int i,j,k; for (i=1;i<=size;i++) for (j=1;j<=M+3;j++) { if (j0;i--) {invcholm[i][i]=1/cholm[i][i]; for (j=i-1;j>0;j--) {for (k=j+1;k<=i;k++) invcholm[j][i]-=cholm[j][k]*invcholm[k][i]; invcholm[j][i]/=cholm[j][j]; invcholm[i][j]=0; } } //inverse of transMat for (i=1;i<=Mpas-1;i++) for (j=1;j<=Mpas-1;j++) for (k=1;k<=Mpas-1;k++) { freqPasMat[i][j]+=invcholm[i][k]*invcholm[j][k]; } k=0; for (i=1;i<=M-1;i++) { if (Followfreq[i]==1) { k++; freqMatrix[i][i]=freqPasMat[k][k]; l=k; for (j=i+1;j<=M-1;j++) { if (Followfreq[j]==1) { l++; freqMatrix[i][j]=freqPasMat[k][l]; freqMatrix[j][i]=freqMatrix[i][j]; } else { freqMatrix[i][j]=0; freqMatrix[j][i]=0; } } } else { freqMatrix[i][i]=0; for (j=i+1;j<=M-1;j++) { freqMatrix[i][j]=0; freqMatrix[j][i]=0; } } } /* printf(""); */ /* Free the temp matrices */ free_dmatrix( transMat, 1, Mpas - 1, 1, Mpas - 1); free_dmatrix( cholm, 1, Mpas - 1, 1, Mpas - 1); free_dmatrix( invcholm, 1, Mpas - 1, 1, Mpas - 1); free_dmatrix( freqPasMat, 1, Mpas - 1, 1, Mpas - 1); return 0; } /****************************************************** Computes the information components required for the pseudo-score test *****************************************************/ void comput_info_score(double **cholaug,double **freqMatrix,double *infoQL_rr,double *infoQL_rf,double *infoQL_ff,double *Rvector,int size) { int i,j; double info_rr=0,info_rf=0,info=0,info_ff=0; for (i=1;i<=size;i++) {info_rr+=cholaug[i][M+2]*cholaug[i][M+2]; info_rf+=cholaug[i][M+2]*cholaug[i][M+1]; info_ff+=cholaug[i][M+1]*cholaug[i][M+1]; for (j=1;j<=M-1;j++) Rvector[j]+=cholaug[i][M+2]*cholaug[i][j]; } *infoQL_rr+=info_rr; *infoQL_rf+=info_rf; *infoQL_ff+=info_ff; } /****************************************************** Computes the pseudo-score test *****************************************************/ void from_info2_score(double **freqMatrix,double infoQL_rr,double infoQL_rf,double infoQL_ff,double *Rvector,double *testval) { int i,j; double info=0,score=0; info=infoQL_rr - infoQL_rf*infoQL_rf/infoQL_ff; for (i=1;i<=M-1;i++) for (j=1;j<=M-1;j++) score+=Rvector[i]*Rvector[j]/info*freqMatrix[i][j]/2; *testval=score; } /****************************************************** Computes the corrected chi2 test *****************************************************/ void from_info2_chi2(double info_rr, double info_rf, double info_ff, double *Naive,double *Naivefreq,double **freqNaive,double *chi2val,int Nall,int Ncase) { int i,j; double chi2old=0; double freqpas=0,corrfactor=0; corrfactor=2/((info_rr-2*Ncase*(double)info_rf/Nall+(double)Ncase/Nall*(double)Ncase/Nall*info_ff)); for (i=1;i<=M-1;i++) for (j=1;j<=M-1;j++) chi2old+=(Naive[i]-2*Ncase*Naivefreq[i])*(Naive[j]-2*Ncase*Naivefreq[j])*freqNaive[i][j]; *chi2val=chi2old*corrfactor; } /****************************************************** Computes the information components required to derive the correction factor for the chi2 test *****************************************************/ void comput_info_chi2(double **kincoefMatrix,double **cholaug,double *info_rr,double *info_rf,double *info_ff,int size) { int i,j; double infopas_rr=0,infopas_rf=0,infopas_ff=0; for (i=1;i<=size;i++) { for (j=1;j<=size;j++) { infopas_rr+=cholaug[i][M+2]*kincoefMatrix[i][j]*cholaug[j][M+2]; infopas_rf+=cholaug[i][M+2]*kincoefMatrix[i][j]*cholaug[j][M+1]; infopas_ff+=cholaug[i][M+1]*kincoefMatrix[i][j]*cholaug[j][M+1]; } } *info_rr+=infopas_rr; *info_rf+=infopas_rf; *info_ff+=infopas_ff; } /****************************************************** Computes the information components required for the pseudo-score test for MQLS *****************************************************/ void comput_info_scoreMQLS(double **cholaug,double **freqMatrix,double *infoQL_rr,double *infoQL_rf,double *infoQL_ff,double *Rvector,int size) { int i,j; double info_rr=0,info_rf=0,info=0,info_ff=0; for (i=1;i<=size;i++) {info_rr+=cholaug[i][M+3]*cholaug[i][M+3]; info_rf+=cholaug[i][M+3]*cholaug[i][M+1]; info_ff+=cholaug[i][M+1]*cholaug[i][M+1]; for (j=1;j<=M-1;j++) Rvector[j]+=cholaug[i][M+3]*cholaug[i][j]; } *infoQL_rr+=info_rr; *infoQL_rf+=info_rf; *infoQL_ff+=info_ff; } void readpen (char *name) { int all1=0, all2=0,fam=0,famold=1,n=0; int indnumber=0,ind=0; char line[2*(MAXMARK+3)]; int length=2*(MAXMARK+3); if((penfile=fopen(name, "r"))==NULL) { printf("Can't open %s\n",name); exit(1); } fscanf(penfile,"%lf",&Kp); /* printf("read in penetrance and value is %lf \n",Kp); */ } void GetMZtwins(int fam,int length) { int j=0,i=0,k=0; int m=1; double val1=0,val2=0; int MZCOUNT=0,totalcount=0; int numcases=0,numcontrols=0,totalnum=0; for (i=1;i<=length;i++) //length is famdata[fam].NPheno {famdata[fam].MZ[i][1]=-10; if(Mark[m].mark[fam][i][3]==2) {numcases=numcases+1;} if(Mark[m].mark[fam][i][3]==1) {numcontrols=numcontrols+1;} totalnum=totalnum+1; } for (i=1;i<=length;i++) //length is famdata[fam].NPheno { for (j=i+1;j<=length;j++) {totalcount=totalcount+1; if((Storekin[fam][i][j]>=.5) && ((Storekin[fam][i][i]<1. && Storekin[fam][j][j] < 1.) || Storekin[fam][i][j]==1.)) { val1=(1.0+Storekin[fam][i][i])/2; val2=(1.0+Storekin[fam][j][j])/2; if( (val1==Storekin[fam][i][j]) && (val2==Storekin[fam][i][j]) &&(val1==val2)) { famdata[fam].MZ[i][1]=1; famdata[fam].MZ[j][1]=1; MZCOUNT=MZCOUNT+1; TOTALMZCOUNT=TOTALMZCOUNT+1; /*** FOR WQLS OF MZ TWINS ***/ val1=0; if(Mark[m].mark[fam][i][3]==2) {val1=1;} val2=0; if(Mark[m].mark[fam][j][3]==2) {val2=1;} famdata[fam].MZ[i][2]=(val1+val2)/2; /** FOR MQLS OF MZ TWINS **/ val1=0; if(Mark[m].mark[fam][i][3]==1) {val1=-Kp/(1-Kp);} if(Mark[m].mark[fam][i][3]==2) {val1=1;} val2=0; if(Mark[m].mark[fam][j][3]==1) {val2=-Kp/(1-Kp);} if(Mark[m].mark[fam][j][3]==2) {val2=1;} famdata[fam].MZ[i][3]=val1+val2; for (k=1;k<=NbMark;k++) { if(Mark[k].mark[fam][j][3]==2&&Mark[k].mark[fam][j][1]>0&& Mark[k].mark[fam][j][2]>0) {Mark[k].typed[fam].Nc--; Mark[k].Nc--;} if(Mark[k].mark[fam][j][3]<2&&Mark[k].mark[fam][j][1]>0&& Mark[k].mark[fam][j][2]>0) {Mark[k].typed[fam].Nt--; Mark[k].Nt--; } if(Mark[k].mark[fam][j][1]>0&& Mark[k].mark[fam][j][2]>0) {Mark[k].typed[fam].N--;} Mark[k].mark[fam][j][1]=0; Mark[k].mark[fam][j][2]=0; } }}} } m=1; for (i=1;i<=length;i++) //length is famdata[fam].NPheno { val1=0; if(famdata[fam].MZ[i][1]!=-10) { val1+=famdata[fam].MZ[i][3]; } else { if (Mark[m].mark[fam][i][3]==2) { val1+=Storekin[fam][i][i]+1;} if (Mark[m].mark[fam][i][3]==1) { val1+=(Storekin[fam][i][i]+1)*(-Kp/(1-Kp));} } for(j=1;j<=length;j++) { if(j!=i) { if(famdata[fam].MZ[j][1]!=-10) { val1+=2*Storekin[fam][i][j]*famdata[fam].MZ[j][3]; } else { if (Mark[m].mark[fam][j][3]==2) { val1+=2*Storekin[fam][i][j]; } if (Mark[m].mark[fam][j][3]==1) {val1+=2*Storekin[fam][i][j]*(-Kp/(1-Kp));} /* printf("fam is %d and individual is %d with relative %d with affection status %d (and kinship coef %lf), and new value is %lf\n",fam,i,j,Mark[m].mark[fam][j][3],Storekin[fam][i][j],val1); */ } } } famdata[fam].AVEC[i][1]=val1; } } void vecsrt(double *d, int *M,int n) { int k,j,i; double p; int place; for (i=1;i