#include #include #include #include "nrutil.h" #include #include "nrutil.c" #include "cholesky.c" #include "pnorms2.c" #define MAXFAM 10000 #define MAXTOP 20 #define MAX_SNPS_FOR_MATRIX 500000 #define MAXLEN 1000 struct FAM { int N; //total number of individuals in a family int Pheno; //number of individuals phenotyped in a family int NoPheno; // number of individuals not phenotyped in a family int Unknown; // number of individuals removed from the analysis int INCORRECTPheno; double **cholent; double **chol; int *MissingVec; // list of individuals with missing genotype int *descri; // array of correspondance between rank in the family and Id's double **MZ; //THIS IS FOR INDIVIDUALS THAT ARE MZ TWINS double **AVEC; //THIS IS THE A PHENOTYPE VECTOR USING EVERYONE int *AFFEC; //THIS IS TO RECORD THE AFFECTION STATUS int *SEX ; // THIS IS TO RECORD THE SEX int *STUDYID; }; typedef struct Nsize { int N; //total number of ind int Nc; // number of cases int Nt; //number of controls int Nu; //number of unknown controls }Nsize; struct MARKER { int Nball; //number of alleles int Nc; //number of cases with known genotype int Nt; //number of controls with known genotype int Nu; //number of unkown controls with known genotype int ***mark; //3-dim array. 1st dimension: family number; 2nd dim: indiv rank; 3rd dim has 3 components [1]:allele1,[2]:allele2 and [3]:status Nsize *typed; }; /* M is the number of alleles N is the number of individuals in the marker data file NPheno is the number of phenotyped individuals NnoPheno the number of not phenotyped individuals NnoUnknown is the number of not individuals that were removed from the anlysis (this is for option 2 since individuals that are not phenotyped are removed) F number of independent families outfile is the result file, genofile is the marker data file and idfile is the kinshipcoef file famdata is an array of FAM with particular information regarding each family Mark is an array of MARKER with particular information for each marker Storekin is a 3-dim array storing the inbreeding and kinship coefficients in each family. */ int M=0,N,F,NbMark,NPheno=0,NnoPheno=0,NUnknown=0,Naffected=0,NUnaffected=0,X=0; int Nmale=0,Nfemale=0, PREVCOUNT=0,ERROR; FILE *outfile,*genofile,*idfile,*errfile,*penfile,*sigfile,*pvfile,*famfile,*chivfile; struct FAM famdata[MAXFAM]; struct MARKER Mark[2]; double ***Storekin; double Kp, *KP; int TOTALMZCOUNT=0; int MAXLINE,MAXMARK; int Option,Missing; int BIGMARKER; int **NMARKERS,*FINALTYPED,TYPEDNUM,MARKERCOUNT,NAMECOUNT; double **ZMAT,**COVMAT,**ESTCOVMAT,**KINALLMATRIX; int *GENOTYPED,*D,*Place,FINALCOUNTED; FILE *fsnpname; char **SNPNAME; char **TOPSNPNAME,**TOPSNPNAME2,**TOPSNPNAME3,CURRENTSNPNAME[200]; void readpen(char *name); void readcar(char *name); void readdata (struct MARKER *Mark,struct FAM *famdata,FILE *errfile); void readGenotypes(int fam,int m,int length,double **cholaug,double **cholaugCase,double **cholaugControl,double **cholaugUnknown,int *MissingVec,int miss,double **kincoefmatrix,double **kincoefMatrixCase,double **kincoefmatrixControl,double **kincoefmatrixUnknown); void readkincoef(char *name,FILE *errfile,struct FAM *famdata,double ***Storekin); void alleleFreq(double **cholaug,double *frequency,int size, double *denominator); void naiveCount(double **cholaug,double *Naive,double *Naivefreq,int size); void getfrequency(double *Naivefreq,double *NaivefreqCase,double *NaivefreqControl,double *NaivefreqUnknown,double *frequency,double *frequencyCase,double *frequencyControl,double *frequencyUnknown,double denominator,double denomcases,double denomcontrols,double denomunknown,int Nall,int Ncase,int Ncontrol,int Nunknown); void modifcholaug(double **cholent,double **cholaug,double **chol,double *frequency,int size); int makeFreqMat(double **freqMatrix,double *frequency); void comput_info_score(double **cholaug,double **freqMatrix,double *infoQL_rr,double *infoQL_rf,double *infoQL_ff,double *Rvector,int size); void from_info2_chi2(double info_rr,double info_rf,double info_ff,double *Naive,double *Naivefreq,double **freqNaive,double *chi2val,int Nall,int Ncase); void from_info2_score(double **freqMatrix,double infoQL_rr,double infoQL_rf,double infoQL_ff,double *Rvector,double *testval); void comput_info_chi2(double **kincoefMatrix,double **cholaug,double *info_rr,double *info_rf,double *info_ff,int size); int findInd(int Id1,int family); void comput_info_scoreMQLS(double **cholaug,double **freqMatrix,double *infoQL_rr,double *infoQL_rf,double *infoQL_ff,double *Rvector,int size); void GET_PHENOVECTOR_AND_MZtwins(int fam,int length); void vecsrt(double *d, int *M,int n); void readFAM(char *name); void readdataFAM (struct FAM *famdata,FILE *errfile); int getgenoline_PLINK(struct MARKER *Mark,struct FAM *famdata,FILE *errfile); int getgenoline_LINKAGE(struct MARKER *Mark,struct FAM *famdata,FILE *errfile); int getgenoline_ADDITIVE(struct MARKER *Mark,struct FAM *famdata,FILE *errfile); void get_EMPIRICAL_MATRIX(FILE *errfile); void get_FINAL_MATRIX(int finaltyped, double **ESTCOVMAT,int *D,double **ZMAT); void readsnpnames (char *name,int MARKERCOUNT,char** SNPNAME); void vecsrt2(double *d, int *M,char **NAME,int n); void getsnpnamefile (char *name); void robustvar(double **cholaug,double *ROBUST_11,double *YYvector,double *Y1vector,int size,int *Nstu); void from_info2_scoreROBUST(double **freqMatrix,double infoQL_rr,double infoQL_rf,double infoQL_ff,double *Rvector,double *testval,double ROBUST_11,double *YYvector,double *Y1vector, int Nstu,double *ROBUSTVAR); void from_info2_chi2_ROBUST(double info_rr, double info_rf, double info_ff, double *Naive,double *Naivefreq,double **freqNaive,double *chi2val,int Nall,int Ncase,double ROBUST_11,double *YYvector,double *Y1vector, int Nstu,double *ROBUSTVAR); void GET_KIN_MATRIX(int fam, int length); int main (int argc, char *argv[]) { /* cholent stores the allelic information in the M first columns. Column M+1 contains 2, column M+2 contains 2 for cases and 0 for controls cholaug stores cholent times the inverse of the cholesky decompo of the covariance matrix chol stores the cholesky decomposition of kincoefMatrix kincoefMatrix stores the kinship based covariance Matrix frequency stores the frequency estimates freqMatrix is a matrix that is a function of the M-1 allele frequencies MissingVec lists all individual number for whom genotype or phenotype info is missing dmatrix and dvector are available from nrutil.c */ double **cholaug,**cholentCase,**cholentControl,**cholentUnknown; double **kincoefMatrix,**kincoefMatrixCase,**kincoefMatrixControl,**kincoefMatrixUnknown; double *frequency,*frequencyCase,*frequencyControl,*frequencyUnknown; double **freqMatrix; double testval=0,chi2val=0, denominator=0, denomcases=0, denomcontrols=0,denomunknown=0; double *Naive,*Naivefreq,**freqNaive,*NaiveCase,*NaiveControl,*NaiveUnknown,*NaivefreqCase,*NaivefreqControl,*NaivefreqUnknown,*Rvector; double info_rr=0,info_rf=0,info_ff=0,infoQL_rr=0,infoQL_rf=0,infoQL_ff=0; int i,j,k,fam,m,followcase=0,followcontrol=0,followunknown,followall=0,df=0,Npas,negfreq=0; double MQLSval=0,MQLSvalrobust=0,chi2valrobust=0,testvalrobust=0; double *Pvalues,*Pvalues2,*Pvalues3; double *TOPPvalues,*TOPPvalues2,*TOPPvalues3,pval1,pval2,pval3; int *Porder,*Porder2,*Porder3,top; int *TOPPorder,*TOPPorder2,*TOPPorder3; double *YYvector,*Y1vector,ROBUST_11,*ROBUSTVAR; int Nstu; double **CINVT,**cholaug2; double **A1,**A2,sum,*tempV1,*tempV2,*tempV1A,*V1,val1,val2,val3,*DRVEC,*DPVEC,*AVEC, DRPART,DPPART,APART,**COVMAT,*YVEC,*V,*TERM1,RW,RM,SBLUE,VARBLUE; double *DR,*DP,RCHI; int k1,l,row,col; int mycount,numtyped,pers1,pers2,oldnumtyped,nonpolycount=0; char tempname[100]; char str1[MAXLEN]; char pedfile[MAXLEN] = "phenofile"; char kinshipfile[MAXLEN] = "kinfile"; char prevfile[MAXLEN] = "prevalence"; char samplefile[MAXLEN] = "study.sample"; char output[MAXLEN] = "output"; char typedfile[MAXLEN]="genofile"; char input[MAXLEN]; char controlfile[MAXLEN]; char markerfile[MAXLEN]; char *ch; int arg; int gfile = 0; /* Change to 1 if there is a genotyped file */ int pfile = 0; int ffile = 0; int sfile = 0; int ofile = 0; int dfile = 0; int cfile = 0; int rfile =0; int kfile =0; int nfile=0; int HWE,POLYMORPHCOUNT=0; Option=1; Missing=1; X=0; HWE=0; /*printf("argc = %d\n", argc);*/ if (argc > 1) { for (arg=1; arg < argc && argv[arg][0] == '-'; arg++) { switch (argv[arg][1]) {case 'p': strncpy(pedfile, argv[++arg], MAXLEN); printf("user specified phenotype file: %s\n", pedfile); pfile = 1; break; case 'g': strncpy(typedfile, argv[++arg], MAXLEN); printf("user specified genotype file: %s\n", typedfile); gfile = 1; break; case 'k': strncpy(kinshipfile, argv[++arg], MAXLEN); printf("user specified pedigree information file: %s\n", kinshipfile); kfile = 1; break; case 'r': strncpy(prevfile, argv[++arg], MAXLEN); printf("user specified prevalence file: %s\n", prevfile); rfile = 1; break; case 'n': strncpy(markerfile, argv[++arg], MAXLEN); printf("user specified SNP names file: %s\n", markerfile); nfile = 1; break; case 'u': Option = 2; printf("Will remove unknown controls from the analysis \n"); break; case 'm': Missing = 2; printf("Will exclude phenotypes of individuals with missing genotypes for the MQLS and the XM test (if the -x option is used).\n"); break; case 'x': X = 1; printf("User specified X Chromosome analysis.\n"); break; case 'X': X = 1; printf("User specified X Chromosome analysis.\n"); break; case 'h': HWE = 1; printf("User specified association tests calculated assuming Hardy-Weinberg equilibrium. \n"); break; default: printf ("Unknown option \"%s\"\n", argv[arg]); exit(1); }} } if (!pfile) { printf("phenotype file: %s\n", pedfile); } if (!gfile) { printf("genotype file: %s\n", typedfile); } if (!kfile) { printf("pedigree information file: %s\n", kinshipfile); } if (!rfile) {printf("prevalence file: %s\n", prevfile); } /* if (!pfile) { printf("Enter pedigree file (-p): "); fgets(pedfile, MAXLEN, stdin); ch = strchr(pedfile, '\n'); if (ch != NULL) *ch = '\0'; printf("\npedigree file: %s\n", pedfile); } if (!gfile) { printf("Enter genotype file (-g): "); fgets(typedfile, MAXLEN, stdin); ch = strchr(typedfile, '\n'); if (ch != NULL) *ch = '\0'; printf("\ngenotype file: %s\n", typedfile); } if (!kfile) { printf("Enter kinship coefficient file (-k): "); fgets(kinshipfile, MAXLEN, stdin); ch = strchr(kinshipfile, '\n'); if (ch != NULL) *ch = '\0'; printf("\n kinship file: %s\n", kinshipfile); } if (!rfile) { printf("Enter kinship coefficient file (-k): "); fgets(prevfile, MAXLEN, stdin); ch = strchr(prevfile, '\n'); if (ch != NULL) *ch = '\0'; printf("\n prevalence file: %s\n", prevfile); } */ MAXMARK=2; Kp=0; KP=dvector(0,3); /* readpen(argv[4]); */ readpen(prevfile); /* readFAM(argv[1]); */ readFAM(pedfile); for (i=0;i<=F;i++) { famdata[i].NoPheno=0; famdata[i].Pheno=0; famdata[i].Unknown=0; famdata[i].INCORRECTPheno=0; famdata[i].descri=(int*)malloc(((famdata[i].N)+1)*sizeof(int)); if (!famdata[i].descri) {printf("Error in memory allocation 5\n"); exit(1);} famdata[i].MZ=(double **)malloc((size_t)(((famdata[i].N)+1)*sizeof(double*))); for (j=0;j<=famdata[i].N;j++) {famdata[i].MZ[j]=(double *)malloc((size_t) ((4)*sizeof(double)));} famdata[i].AVEC=(double **)malloc((size_t)(((famdata[i].N)+1)*sizeof(double*))); for (j=0;j<=famdata[i].N;j++) {famdata[i].AVEC[j]=(double *)malloc((size_t) ((4)*sizeof(double)));} famdata[i].AFFEC=(int *)malloc((size_t)(((famdata[i].N)+1)*sizeof(int))); famdata[i].SEX=(int *)malloc((size_t)(((famdata[i].N)+1)*sizeof(int))); famdata[i].STUDYID=(int *)malloc((size_t)(((famdata[i].N)+1)*sizeof(int))); } if((errfile=fopen("MQLS_XM_Software.err", "w"))==NULL) { printf("Can't open MQLS_XM_Software.err .\n"); exit(1); } readdataFAM(famdata,errfile); /* THIS IS THE MAXIMUM LENGTH FOR EACH ROW IN THE GENOTYPE FILE */ MAXLINE=NPheno*8; Storekin = (double ***)malloc((size_t) ((F+1)*sizeof(double**))); for (i=0;i<=F;i++) { if (i==0) Npas=N; else Npas=famdata[i].Pheno+famdata[i].NoPheno; Storekin[i] = (double **)malloc((size_t) ((Npas+1)*sizeof(double*))); if (!Storekin[i]) {printf("Error in memory allocation for kinship coefficient storage\n"); exit(1);} for (j=0;j<=Npas;j++) { Storekin[i][j]= (double *)malloc((size_t) ((Npas+1)*sizeof(double))); if (!Storekin[i][j]) {printf("Error in memory allocation for kinship coefficient storage\n"); exit(1);} } } /* readkincoef(argv[2],errfile,famdata,Storekin); */ readkincoef(kinshipfile,errfile,famdata,Storekin); printf("Read in kinship coefficients and inbreeding coefficients. \n"); for (fam=1;fam<=F;fam++) {GET_PHENOVECTOR_AND_MZtwins(fam,famdata[fam].Pheno);} printf("Read in phenotype information. \n"); NbMark=1; for (m=1;m<=NbMark;m++) { Mark[NbMark].Nball=0; Mark[NbMark].Nc=0; Mark[NbMark].Nt=0; Mark[NbMark].Nu=0; Mark[m].typed = (Nsize *)malloc((size_t) ((MAXFAM+1)*sizeof(Nsize))); if (!Mark[m].typed) {printf("Error in memory allocation 1\n"); exit(1);} Mark[m].mark = (int ***)malloc((size_t) ((MAXFAM+1)*sizeof(int**))); if (!Mark[m].mark) {printf("Error in memory allocation 2\n"); exit(1);} for (i=0;i<=F;i++) { Mark[m].typed[i].N=0; Mark[m].typed[i].Nc=0; Mark[m].typed[i].Nt=0; Mark[m].typed[i].Nu=0; Mark[m].mark[i] = (int **)malloc((size_t)(((famdata[i].N)+1)*sizeof(int*))); if (!Mark[m].mark[i]) {printf("Error in memory allocation 3\n"); exit(1);} for (j=0;j<=famdata[i].N;j++) {Mark[m].mark[i][j]= (int *)malloc((size_t) ((6)*sizeof(int))); if (!Mark[m].mark[i][j]) {printf("Error in memory allocation 4 %d %d %d\n",famdata[i].N,i,j); exit(1);} } } } /* if((genofile=fopen(argv[3], "r"))==NULL) { printf("Can't open %s\n",argv[3]); exit(1); }*/ if((genofile=fopen(typedfile, "r"))==NULL) { printf("Can't open %s\n",typedfile); exit(1); } /* printf("Calculating The Empirical Covariance Matrix. \n"); get_EMPIRICAL_MATRIX(errfile); rewind(genofile); printf("Empirical Covariance Matrix has been calculated. Now testing each SNP for association.\n"); */ if(X==0) { if((sigfile=fopen("MQLStest.top", "w"))==NULL) { printf("Can't open MQLStest.top.\n"); exit(1); } if((pvfile=fopen("MQLStest.pvalues", "w"))==NULL) { printf("Can't open MQLStest.pvalues.\n"); exit(1); } if((chivfile=fopen("MQLStest.testvalues", "w"))==NULL) { printf("Can't open MQLStest.testvalues.\n"); exit(1); } if((outfile=fopen("MQLStest.out", "w"))==NULL) { printf("Can't open MQLStest.out .\n"); exit(1); } } if(X==1) { if((sigfile=fopen("XMtest.top", "w"))==NULL) { printf("Can't open XMtest.top.\n"); exit(1); } if((pvfile=fopen("XMtest.pvalues", "w"))==NULL) { printf("Can't open XMtest.pvalue.\n"); exit(1); } if((chivfile=fopen("XMtest.testvalues", "w"))==NULL) { printf("Can't open XMtest.testvalues.\n"); exit(1); } if((outfile=fopen("XMtest.out", "w"))==NULL) { printf("Can't open XMtest.out .\n"); exit(1); } } fprintf(outfile,"******Results of the Case-control Association tests ******\n\n"); if(Option==1) {fprintf(outfile,"There are %d individuals from %d independent families. %d of the individuals are affected, %d of the individuals are unaffected, and %d of the individuals are of unknown phenotype.\n\n",NPheno,F,Naffected,NUnaffected,NUnknown); } if(Option==2) {fprintf(outfile,"There are %d individuals from %d independent families. %d of the individuals are affected, %d of the individuals are unaffected, and %d are of unknown phenotype. The -u option was chosen by the user so individuals with unknown phenotypes were not used in the analysis.\n\n",NPheno,F,Naffected,NUnaffected,NUnknown); } if(X==0) {fprintf(outfile,"There are %d males and %d females. The prevalence values used in the MQLS test statistic for males and females are %lf and %lf, respectively. \n",Nmale,Nfemale,KP[1],KP[2]);} if(X==1) {fprintf(outfile,"There are %d males and %d females. The prevalence values used in the XM test statistic for males and females are %lf and %lf, respectively. \n",Nmale,Nfemale,KP[1],KP[2]);} if(TOTALMZCOUNT>0) {fprintf(outfile,"There are %d MZ twin pairs \n\n",TOTALMZCOUNT);} TOPPvalues=dvector(0,MAXTOP); TOPPvalues2=dvector(0,MAXTOP); TOPPvalues3=dvector(0,MAXTOP); TOPPorder=ivector(0,MAXTOP); TOPPorder2=ivector(0,MAXTOP); TOPPorder3=ivector(0,MAXTOP); /* if(nfile==1) { getsnpnamefile(markerfile); } */ TOPSNPNAME=(char **)malloc((size_t) ((MAXTOP+1)*sizeof(char*))); TOPSNPNAME2=(char **)malloc((size_t) ((MAXTOP+1)*sizeof(char*))); TOPSNPNAME3=(char **)malloc((size_t) ((MAXTOP+1)*sizeof(char*))); for(j=1;j<=MAXTOP;j++) {TOPSNPNAME[j]=(char *)malloc((size_t) ((100)*sizeof(char))); TOPSNPNAME2[j]=(char *)malloc((size_t) ((100)*sizeof(char))); TOPSNPNAME3[j]=(char *)malloc((size_t) ((100)*sizeof(char))); } /* fprintf(pvfile,"MARKER \t SNP_NAME \t RM \t RCHI \t RW \n"); */ if(X==0) { if(HWE==0) {fprintf(pvfile,"SNP \t NAME \t\t \t MQLS_Robust \t CCHI_Robust \t WQLS_Robust \n"); fprintf(chivfile,"SNP \t NAME \t\t \t MQLS_Robust \t CCHI_Robust \t WQLS_Robust \n"); } else {fprintf(pvfile,"SNP \t NAME \t\t \t MQLS_HWE \t CCHI_HWE \t WQLS_HWE \n"); fprintf(chivfile,"SNP \t NAME \t\t \t MQLS_HWE \t CCHI_HWE \t WQLS_HWE \n"); } } else{ if(HWE==0) {fprintf(pvfile,"SNP \t NAME \t\t \t XM_Robust \t XCHI_Robust \t XW_Robust \n"); fprintf(chivfile,"SNP \t NAME \t\t \t XM_Robust \t XCHI_Robust \t XW_Robust \n"); } else {fprintf(pvfile,"SNP \t NAME \t\t \t XM_HWE \t XCHI_HWE \t XW_HWE \n"); fprintf(chivfile,"SNP \t NAME \t\t \t XM_HWE \t XCHI_HWE \t XW_HWE \n"); } } BIGMARKER=0; printf("Calculating association test statistics for every marker. \n"); while(getgenoline_PLINK(Mark,famdata,errfile)>0) { BIGMARKER++; if(BIGMARKER%1000==0) {printf("Testing marker %d for association\n",BIGMARKER);} m=1; negfreq=0; M=Mark[m].Nball; freqMatrix=dmatrix(1,M,1,M); frequency=dvector(1,M); frequencyCase=dvector(1,M); frequencyControl=dvector(1,M); frequencyUnknown=dvector(1,M); Naive=dvector(1,M); NaiveCase=dvector(1,M); NaiveControl=dvector(1,M); NaiveUnknown=dvector(1,M); Naivefreq=dvector(1,M); Rvector=dvector(1,M); NaivefreqCase=dvector(1,M); NaivefreqControl=dvector(1,M); NaivefreqUnknown=dvector(1,M); freqNaive=dmatrix(1,M,1,M); testval=0; chi2val=0; denominator=0; denomcases=0; denomcontrols=0; denomunknown=0; info_rr=0; info_rf=0; info_ff=0; infoQL_rr=0; infoQL_rf=0; infoQL_ff=0; MQLSval=0; ROBUSTVAR=dvector(1,M); YYvector=dvector(1,M); Y1vector=dvector(1,M); ROBUST_11=0; Nstu=0; mycount=0; FINALCOUNTED=0; for (i=1;i<=M;i++) { frequency[i]=0; frequencyCase[i]=0; frequencyControl[i]=0; frequencyUnknown[i]=0; Naive[i]=0; NaiveCase[i]=0; NaiveControl[i]=0; NaiveUnknown[i]=0; Naivefreq[i]=0; NaivefreqCase[i]=0; NaivefreqControl[i]=0; NaivefreqUnknown[i]=0; YYvector[i]=0; Y1vector[i]=0; if (i0) { followall=1; famdata[fam].cholent=dmatrix(1,Mark[m].typed[fam].N,1,M+3); famdata[fam].chol=dmatrix(1,Mark[m].typed[fam].N,1,Mark[m].typed[fam].N); cholaug=dmatrix(1,Mark[m].typed[fam].N,1,M+3); kincoefMatrix=dmatrix(1,Mark[m].typed[fam].N,1,Mark[m].typed[fam].N); } if ((famdata[fam].Pheno-Mark[m].typed[fam].N)>0) famdata[fam].MissingVec=ivector(1,famdata[fam].Pheno-Mark[m].typed[fam].N); if (Mark[m].typed[fam].Nc>0){ followcase=1; cholentCase=dmatrix(1,Mark[m].typed[fam].Nc,1,M+2); kincoefMatrixCase=dmatrix(1,Mark[m].typed[fam].Nc,1,Mark[m].typed[fam].Nc); } if (Mark[m].typed[fam].Nt>0){ followcontrol=1; cholentControl=dmatrix(1,Mark[m].typed[fam].Nt,1,M+2); kincoefMatrixControl=dmatrix(1,Mark[m].typed[fam].Nt,1,Mark[m].typed[fam].Nt); } if (Mark[m].typed[fam].Nu>0&&Option!=2){ followunknown=1; cholentUnknown=dmatrix(1,Mark[m].typed[fam].Nu,1,M+2); kincoefMatrixUnknown=dmatrix(1,Mark[m].typed[fam].Nu,1,Mark[m].typed[fam].Nu); } readGenotypes(fam,m,famdata[fam].Pheno,famdata[fam].cholent,cholentCase,cholentControl,cholentUnknown,famdata[fam].MissingVec,famdata[fam].Pheno-Mark[m].typed[fam].N,kincoefMatrix,kincoefMatrixCase,kincoefMatrixControl,kincoefMatrixUnknown); /*computing allele frequencies in the cases... */ if (followcase==1) { naiveCount(cholentCase,NaiveCase,NaivefreqCase,Mark[m].typed[fam].Nc); if (cholesky(kincoefMatrixCase,Mark[m].typed[fam].Nc,cholentCase,M+1,kincoefMatrixCase,cholentCase,1)!=1) {printf("cholesky decomposition of the case cov matrix failed for family %d. Might be due to inconsistent kinship coefficient values...\n",fam); exit(1); } alleleFreq(cholentCase,frequencyCase,Mark[m].typed[fam].Nc, &denomcases); } /*computing allele frequencies in the controls... */ if (followcontrol==1) { naiveCount(cholentControl,NaiveControl,NaivefreqControl,Mark[m].typed[fam].Nt); if (cholesky(kincoefMatrixControl,Mark[m].typed[fam].Nt,cholentControl,M+1,kincoefMatrixControl,cholentControl,1)!=1) { printf("cholesky decomposition of the control cov matrix failed for family %d. Might be due to inconsistent kinship coefficient values...\n",fam); exit(1); } alleleFreq(cholentControl,frequencyControl,Mark[m].typed[fam].Nt, &denomcontrols); } /*computing allele frequencies in unknowns... */ if (followunknown==1) { naiveCount(cholentUnknown,NaiveUnknown,NaivefreqUnknown,Mark[m].typed[fam].Nu); if (cholesky(kincoefMatrixUnknown,Mark[m].typed[fam].Nu,cholentUnknown,M+1,kincoefMatrixUnknown,cholentUnknown,1)!=1) { printf("cholesky decomposition of the unknown phenotyped cov matrix failed for family %d. Might be due to inconsistent kinship coefficient values...\n",fam); exit(1); } alleleFreq(cholentUnknown,frequencyUnknown,Mark[m].typed[fam].Nu, &denomunknown); } /*computing overall allele frequencies */ if (followall==1) { naiveCount(famdata[fam].cholent,Naive,Naivefreq,Mark[m].typed[fam].N); comput_info_chi2(kincoefMatrix,famdata[fam].cholent,&info_rr,&info_rf,&info_ff,Mark[m].typed[fam].N); if (cholesky(kincoefMatrix,Mark[m].typed[fam].N,famdata[fam].cholent,M+2,famdata[fam].chol,cholaug,1)!=1) {printf("cholesky decomposition of the cov matrix failed for family %d. Might be due to inconsistent kinship coefficient values...\n",fam); exit(1); } alleleFreq(cholaug,frequency,Mark[m].typed[fam].N, &denominator); if(HWE==0) { robustvar(cholaug,&ROBUST_11,YYvector,Y1vector,Mark[m].typed[fam].N,&Nstu); } } if (Mark[m].typed[fam].N>0) free_dmatrix(cholaug,1,Mark[m].typed[fam].N,1,M+3); if (Mark[m].typed[fam].N>0) free_dmatrix(kincoefMatrix,1,Mark[m].typed[fam].N,1,Mark[m].typed[fam].N); if (Mark[m].typed[fam].Nc>0) free_dmatrix(kincoefMatrixCase,1,Mark[m].typed[fam].Nc,1,Mark[m].typed[fam].Nc); if (Mark[m].typed[fam].Nt>0) free_dmatrix(kincoefMatrixControl,1,Mark[m].typed[fam].Nt,1,Mark[m].typed[fam].Nt); if (Mark[m].typed[fam].Nc>0) free_dmatrix(cholentCase,1,Mark[m].typed[fam].Nc,1,M+2); if (Mark[m].typed[fam].Nt>0) free_dmatrix(cholentControl,1,Mark[m].typed[fam].Nt,1,M+2); if (followunknown==1) { if (Mark[m].typed[fam].Nu>0) free_dmatrix(kincoefMatrixUnknown,1,Mark[m].typed[fam].Nu,1,Mark[m].typed[fam].Nu); if (Mark[m].typed[fam].Nu>0) free_dmatrix(cholentUnknown,1,Mark[m].typed[fam].Nu,1,M+2); } } getfrequency(Naivefreq,NaivefreqCase,NaivefreqControl,NaivefreqUnknown,frequency, frequencyCase,frequencyControl,frequencyUnknown,denominator,denomcases,denomcontrols,denomunknown,Mark[m].Nc+Mark[m].Nt+Mark[m].Nu,Mark[m].Nc,Mark[m].Nt,Mark[m].Nu); makeFreqMat(freqNaive,Naivefreq); /* Remove the below comment to calculate the orignial Corrected chi-squared that assumes HWE in the variance calculation */ if(HWE==1) { if(Option==1) {from_info2_chi2(info_rr,info_rf,info_ff,Naive,Naivefreq,freqNaive,&chi2val,Mark[m].Nc+Mark[m].Nu+Mark[m].Nt,Mark[m].Nc);} if(Option==2) {from_info2_chi2(info_rr,info_rf,info_ff,Naive,Naivefreq,freqNaive,&chi2val,Mark[m].Nc+Mark[m].Nt,Mark[m].Nc);} } if (makeFreqMat(freqMatrix,frequency)==1) {negfreq=1;} if (negfreq==0) { for (fam=1;fam<=F;fam++) { if (Mark[m].typed[fam].N>0) { cholaug=dmatrix(1,Mark[m].typed[fam].N,1,M+3); modifcholaug(famdata[fam].cholent,cholaug,famdata[fam].chol,frequency,Mark[m].typed[fam].N); comput_info_scoreMQLS(cholaug,freqMatrix,&infoQL_rr,&infoQL_rf,&infoQL_ff,Rvector,Mark[m].typed[fam].N); free_dmatrix(cholaug,1,Mark[m].typed[fam].N,1,M+3); } } if(HWE==1) { from_info2_score(freqMatrix,infoQL_rr,infoQL_rf,infoQL_ff,Rvector,&MQLSval); } if(HWE==0) { from_info2_scoreROBUST(freqMatrix,infoQL_rr,infoQL_rf,infoQL_ff,Rvector,&MQLSvalrobust,ROBUST_11,YYvector,Y1vector,Nstu,ROBUSTVAR); if(Option==1) { from_info2_chi2_ROBUST(info_rr,info_rf,info_ff,Naive,Naivefreq,freqNaive,&chi2valrobust,Mark[m].Nc+Mark[m].Nt+Mark[m].Nu,Mark[m].Nc,ROBUST_11,YYvector,Y1vector,Nstu,ROBUSTVAR); } if(Option==2) { from_info2_chi2_ROBUST(info_rr,info_rf,info_ff,Naive,Naivefreq,freqNaive,&chi2valrobust,Mark[m].Nc+Mark[m].Nt,Mark[m].Nc,ROBUST_11,YYvector,Y1vector,Nstu,ROBUSTVAR); } } } /* if(nfile==1&&BIGMARKER<=NAMECOUNT) {fscanf(fsnpname,"%s ",tempname); } else {sprintf(tempname,"SNP_%d",BIGMARKER); } */ /*output printing ... */ /* MAKE SURE NO HETEROZYGOTE MAKERS FOR FIRST 10 POLYMORPHIC MARKERS WITH MINOR ALLELE FREQUENCY GREATER THAN .1 */ if(X==1 &&POLYMORPHCOUNT<=100&&Naivefreq[1]>.1&&Naivefreq[1]<.9) {POLYMORPHCOUNT++; ERROR=0; for (i=1;i<=F;i++) {for(j=1;j<=famdata[i].N;j++) if(famdata[i].SEX[j]==1 && Mark[m].mark[i][j][1]>0 && Mark[m].mark[i][j][1]!=Mark[m].mark[i][j][2]) {ERROR=1; fprintf(errfile,"Error in the X-chromosome analysis. Individual %d from family %d is a male with a heterozygous genotype (%d,%d) for marker %d \n",famdata[i].descri[j],i, Mark[m].mark[i][j][1], Mark[m].mark[i][j][2],BIGMARKER); } } if(ERROR==1) {printf("ERROR! User specified X Chromosome analysis. Checking the first 100 markers with minor allele frequency greater than .1 to identify any heterozygous males. Marker %d has at least one heterozygous male. Please make sure that all markers in the genotype input file include only X-linked markers (and no autosomal markers). \n",BIGMARKER); } } fprintf(outfile,"****************************************"); fprintf(outfile,"\n\nAnalysis of Marker %d: %s \n\n",BIGMARKER,CURRENTSNPNAME); fprintf(outfile,"****************************************\n"); if(Option==1) {fprintf(outfile,"There are %d affected individuals, %d unaffected individuals, and %d individuals of unknown phenotype available. \n\n",Mark[m].Nc,Mark[m].Nt,Mark[m].Nu); } if(Option==2) {fprintf(outfile,"%d affected individuals and %d unaffected individuals available.\n\n",Mark[m].Nc,Mark[m].Nt); } fprintf(outfile,"*****************************************\n"); pval1=1; pval2=1; pval3=1; if ( (Mark[m].Nt!=0 ||Mark[m].Nu!=0) && Mark[m].Nc!=0&&Naivefreq[1]>.01&&Naivefreq[1]<.99) {df=0; for (i=1;i<=M-1;i++) if (frequency[i]!=0) df++; /******* MQLS and XM TESTS ************/ if(X==0) { if(HWE==0) {fprintf(outfile,"MQLS test using robust variance estimator\n\n");} if(HWE==1) fprintf(outfile,"MQLS test using HWE variance estimator\n\n"); } else{if(HWE==0) {fprintf(outfile,"XM test using robust variance estimator\n\n");} if(HWE==1) fprintf(outfile,"XM test using HWE variance estimator\n\n"); } if (Naivefreq[1]>0&&Naivefreq[1]<1) { /* fprintf(outfile,"RM statistic value = %f\t pvalue = %g df = %d\n\n",RM,pochisq(RM,df),df); */ if(HWE==1) { if(X==0) { fprintf(outfile,"MQLS statistic value = %f\t pvalue = %g df = %d\n\n",MQLSval,2*pnorms(-1*sqrt(MQLSval)),df); /*fprintf(outfile,"MQLS statistic value = %f\t pvalue = %g df = %d\n\n",MQLSval,pochisq(MQLSval,df),df);*/ } else { /*fprintf(outfile,"XM statistic value = %f\t pvalue = %g df = %d\n\n",MQLSval,pochisq(MQLSval,df),df);*/ fprintf(outfile,"XM statistic value = %f\t pvalue = %g df = %d\n\n",MQLSval,2*pnorms(-1*sqrt(MQLSval)),df); } RM=MQLSval; } if(HWE==0) { if(X==0) { /*fprintf(outfile,"MQLS statistic value = %f\t pvalue = %g df = %d\n\n",MQLSvalrobust,pochisq(MQLSvalrobust,df),df);*/ fprintf(outfile,"MQLS statistic value = %f\t pvalue = %g df = %d\n\n",MQLSvalrobust,2*pnorms(-1*sqrt(MQLSvalrobust)),df); } else { /* fprintf(outfile,"XM statistic value = %f\t pvalue = %g df = %d\n\n",MQLSvalrobust,pochisq(MQLSvalrobust,df),df); */ fprintf(outfile,"XM statistic value = %f\t pvalue = %g df = %d\n\n",MQLSvalrobust,2*pnorms(-1*sqrt(MQLSvalrobust)),df); } RM=MQLSvalrobust; } pval1=2*pnorms(-1*sqrt(RM)); /* printf("Value is %lf and old p-value is %g and new p-value is %g \n",MQLSvalrobust,pochisq(MQLSvalrobust,df),2*pnorms(-1*sqrt(MQLSvalrobust))); */ if ( pval1<=0.05) { if (Rvector[1]>0) fprintf(outfile,"Frequency of allele %d is increased in the cases (quasi-score associated to this allele is %.4f)\n\n",1,Rvector[1]); if (Rvector[1]<0) fprintf(outfile,"Frequency of allele %d is increased in the controls (quasi-score associated to this allele is %.4f)\n\n",1,Rvector[1]); if (Rvector[1]==0) fprintf(outfile,"Frequency of allele %d is the same in cases and controls (quasi-score associated to this allele is 0)\n\n",1); } for (i=1;i<=M;i++) {if (((Mark[m].Nc)*frequencyCase[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of type %d alleles in cases\n",i); if(Option==2) { if (((Mark[m].Nt)*frequencyControl[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of type %d alleles in controls\n",i);} if(Option==1) { if ( ( (Mark[m].Nt)*frequencyControl[i] + (Mark[m].Nu)*frequencyUnknown[i] )<5) fprintf(outfile,"The p-value might not be exact because of the small number of type %d alleles in controls\n",i);} } } else {if(X==0) {printf("Computation of the MQLS statistic is not possible\n\n");} else { printf("Computation of the XM statistic is not possible\n\n");} } /******** Corrected Chi-Squared and X-Chi TESTS ************/ df=0; for (i=1;i<=M-1;i++) if (frequency[i]!=0) df++; fprintf(outfile,"\n*****************************************\n"); /* fprintf(outfile,"RCHI test \n\n"); */ if(X==0) { if(HWE==0) {fprintf(outfile,"Corrected chi-squared test using robust variance estimator\n\n");} if(HWE==1) fprintf(outfile,"Corrected chi-squared test using HWE variance estimator\n\n"); } else{if(HWE==0) {fprintf(outfile,"XCHI test using robust variance estimator\n\n");} if(HWE==1) fprintf(outfile,"XCHI test using HWE variance estimator\n\n"); } if (Naivefreq[1]>0&&Naivefreq[1]<1) { /* fprintf(outfile,"RCHI statistic value = %f\t pvalue = %g df = %d\n\n",RCHI,pochisq(RCHI,df),df); */ if(HWE==1) { if(X==0) { /*fprintf(outfile,"Corrected chi-squared statistic value = %f\t pvalue = %g df = %d \n\n",chi2val,pochisq(chi2val,df),df);*/ fprintf(outfile,"Corrected chi-squared statistic value = %f\t pvalue = %g df = %d \n\n",chi2val,2*pnorms(-1*sqrt(chi2val)),df); } else { /*fprintf(outfile,"XCHI statistic value = %f\t pvalue = %g df = %d \n\n",chi2val,pochisq(chi2val,df),df); */ fprintf(outfile,"XCHI statistic value = %f\t pvalue = %g df = %d \n\n",chi2val,2*pnorms(-1*sqrt(chi2val)),df); } RCHI=chi2val; } if(HWE==0) { if(X==0) { /*fprintf(outfile,"Corrected chi-squared statistic value = %f\t pvalue = %g df = %d \n\n",chi2valrobust,pochisq(chi2valrobust,df),df);*/ fprintf(outfile,"Corrected chi-squared statistic value = %f\t pvalue = %g df = %d \n\n",chi2valrobust,2*pnorms(-1*sqrt(chi2valrobust)),df); } else { /* fprintf(outfile,"XCHI statistic value = %f\t pvalue = %g df = %d \n\n",chi2valrobust,pochisq(chi2valrobust,df),df); */ fprintf(outfile,"XCHI statistic value = %f\t pvalue = %g df = %d \n\n",chi2valrobust,2*pnorms(-1*sqrt(chi2valrobust)),df); } RCHI=chi2valrobust; } pval2=2*pnorms(-1*sqrt(RCHI)); for (i=1;i<=M;i++) {if (((Mark[m].Nc)*NaivefreqCase[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of allele %d in cases\n\n",i); if(Option==2) { if (((Mark[m].Nt)*NaivefreqControl[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of type %d alleles in controls\n",i);} if(Option==1) { if ( ( (Mark[m].Nt)*NaivefreqControl[i] + (Mark[m].Nu)*NaivefreqUnknown[i] )<5) fprintf(outfile,"The p-value might not be exact because of the small number of type %d alleles in controls\n",i);} if (((Mark[m].Nt)*NaivefreqControl[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of allele %d in controls\ni\n",i); } } else {if(X==0) {printf("Computation of the Corrected chi-squared statistic is not possible\n\n");} else{printf("Computation of the XCHI statistic is not possible\n\n");} } /********* WQLS and XW TESTS *********/ info_rr=0; info_rf=0; info_ff=0; infoQL_rr=0; infoQL_rf=0; infoQL_ff=0; testval=0; for(j=1;j<=M;j++) {Rvector[j]=0;} for (fam=1;fam<=F;fam++) { if (Mark[m].typed[fam].N>0) { cholaug=dmatrix(1,Mark[m].typed[fam].N,1,M+3); modifcholaug(famdata[fam].cholent,cholaug,famdata[fam].chol,frequency,Mark[m].typed[fam].N); comput_info_score(cholaug,freqMatrix,&infoQL_rr,&infoQL_rf,&infoQL_ff,Rvector,Mark[m].typed[fam].N); free_dmatrix(cholaug,1,Mark[m].typed[fam].N,1,M+3); free_dmatrix(famdata[fam].cholent,1,Mark[m].typed[fam].N,1,M+3); free_dmatrix(famdata[fam].chol,1,Mark[m].typed[fam].N,1,Mark[m].typed[fam].N); } } /* Remove the below comment to calculate the orignal WQLS that assumes HWE in the variance calculation */ if(HWE==1) { from_info2_score(freqMatrix,infoQL_rr,infoQL_rf,infoQL_ff,Rvector,&testval);} if(HWE==0) { from_info2_scoreROBUST(freqMatrix,infoQL_rr,infoQL_rf,infoQL_ff,Rvector,&testvalrobust,ROBUST_11,YYvector,Y1vector,Nstu,ROBUSTVAR); } df=0; for (i=1;i<=M-1;i++) if (frequency[i]!=0) df++; fprintf(outfile,"*****************************************\n"); if(X==0) { if(HWE==0) {fprintf(outfile,"WQLS test using robust variance estimator\n\n");} if(HWE==1) fprintf(outfile,"WQLS test using HWE variance estimator\n\n"); } else{if(HWE==0) {fprintf(outfile,"XW test using robust variance estimator\n\n");} if(HWE==1) fprintf(outfile,"XW test using HWE variance estimator\n\n"); } if (Naivefreq[1]>0&&Naivefreq[1]<1) { /* fprintf(outfile,"RW statistic value = %f\t pvalue = %g df = %d\n\n",RW,pochisq(RW,df),df); */ if(HWE==1) { if(X==0) { /* fprintf(outfile,"WQLS statistic value = %f\t pvalue = %g df = %d\n\n",testval,pochisq(testval,df),df); */ fprintf(outfile,"WQLS statistic value = %f\t pvalue = %g df = %d\n\n",testval,2*pnorms(-1*sqrt(testval)),df); } else { /* fprintf(outfile,"XW statistic value = %f\t pvalue = %g df = %d\n\n",testval,pochisq(testval,df),df); */ fprintf(outfile,"XW statistic value = %f\t pvalue = %g df = %d\n\n",testval,2*pnorms(-1*sqrt(testval)),df); } RW=testval; } if(HWE==0) { if(X==0) { /*fprintf(outfile,"WQLS statistic value = %f\t pvalue = %g df = %d\n\n",testvalrobust,pochisq(testvalrobust,df),df); */ fprintf(outfile,"WQLS statistic value = %f\t pvalue = %g df = %d\n\n",testvalrobust,2*pnorms(-1*sqrt(testvalrobust)),df); } else { /*fprintf(outfile,"XW statistic value = %f\t pvalue = %g df = %d\n\n",testvalrobust,pochisq(testvalrobust,df),df); */ fprintf(outfile,"XW statistic value = %f\t pvalue = %g df = %d\n\n",testvalrobust,2*pnorms(-1*sqrt(testvalrobust)),df); } RW=testvalrobust; } /* pval3=pochisq(RW,df); */ pval3=2*pnorms(-1*sqrt(RW)); if (pval3<=0.05) { if (Rvector[1]>0) fprintf(outfile,"Frequency of allele %d is increased in the cases (quasi-score associated to this allele is %.4f)\n\n",1,Rvector[1]); if (Rvector[1]<0) fprintf(outfile,"Frequency of allele %d is increased in the controls (quasi-score associated to this allele is %.4f)\n\n",1,Rvector[1]); if (Rvector[1]==0) fprintf(outfile,"Frequency of allele %d is the same in cases and controls (quasi-score associated to this allele is 0)\n\n",1); } for (i=1;i<=M;i++) {if (((Mark[m].Nc)*frequencyCase[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of type %d alleles in cases\n",i); if(Option==2) { if (((Mark[m].Nt)*frequencyControl[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of type %d alleles in controls\n",i);} if(Option==1) { if ( ( (Mark[m].Nt)*frequencyControl[i] + (Mark[m].Nu)*frequencyUnknown[i] )<5) fprintf(outfile,"The p-value might not be exact because of the small number of type %d alleles in controls\n",i);} } } else{ if(X==0) {printf("Computation of the WQLS statistic is not possible\n\n");} else { printf("Computation of the XW statistic is not possible\n\n");} } } else { if ((Mark[m].Nc)==0) fprintf(outfile,"\n\nTest statistics are not computed because there are no genotyped cases in the sample for this SNP\n\n"); else if ((Mark[m].Nt)==0&&(Mark[m].Nu)==0) fprintf(outfile,"\n\nTest statistics are not computed because there are no genotyped controls in the sample for this SNP\n\n"); else {nonpolycount++; fprintf(outfile,"\n\n Test statistics are not computed because the SNP is not polymorphic: minor allele frequency is less than .01 \n\n");} } fprintf(outfile,"\n*****************************************\n"); if (negfreq==0) { if(Option==2) { if(frequencyCase[1]==0&&frequencyCase[2]==0) {denomcases=1;} if(frequencyControl[1]==0&&frequencyControl[2]==0) {denomcontrols=1;} fprintf(outfile,"allele frequency estimates using the Best Linear Unbiased Estimator (BLUE) in \n"); fprintf(outfile,"\t\t cases\t\t\t unaffected controls \t\t\t all sample \n"); for (i=1;i<=M;i++) fprintf(outfile,"allele %d : freq = %.4f sd = %.4f\t freq = %.4f sd = %.4f\t\t freq = %.4f sd = %.4f\n",i,frequencyCase[i],sqrt(2*frequencyCase[i]*(1-frequencyCase[i])/denomcases),frequencyControl[i],sqrt(2*frequencyControl[i]*(1-frequencyControl[i])/denomcontrols),frequency[i],sqrt(2*frequency[i]*(1-frequency[i])/denominator)); } if(Option!=2) { if(frequencyCase[1]==0&&frequencyCase[2]==0) {denomcases=1;} if(frequencyControl[1]==0&&frequencyControl[2]==0) {denomcontrols=1;} if(frequencyUnknown[1]==0&&frequencyUnknown[2]==0) {denomunknown=1;} fprintf(outfile,"allele frequency estimates using the Best Linear Unbiased Estimator (BLUE) in \n"); fprintf(outfile,"\t\t\t cases \t\t unaffected controls \t\t unknown controls \t\t all sample \n"); for (i=1;i<=M;i++) fprintf(outfile,"allele %d : freq = %.4f sd = %.4f \t freq = %.4f sd = %.4f \t freq = %.4f sd = %.4f \t freq = %.4f sd = %.4f\n",i,frequencyCase[i],sqrt(2*frequencyCase[i]*(1-frequencyCase[i])/denomcases),frequencyControl[i],sqrt(2*frequencyControl[i]*(1-frequencyControl[i])/denomcontrols),frequencyUnknown[i],sqrt(2*frequencyUnknown[i]*(1-frequencyUnknown[i])/denomunknown),frequency[i],sqrt(2*frequency[i]*(1-frequency[i])/denominator)); } } else fprintf(outfile,"QL computation of allele frequencies gives negative values....\n\nskipped... \n\nUse naive estimates\n\n"); fprintf(outfile,"*****************************************\n"); if(Option==2) { fprintf(outfile,"allele frequency estimates using naive counting in\n"); fprintf(outfile,"\t\t cases \t\t unaffected controls \t\t all sample \n"); for (i=1;i<=M;i++) fprintf(outfile,"allele %d : freq = %.4f \t\t freq = %.4f \t\t freq = %.4f\n",i,NaivefreqCase[i],NaivefreqControl[i],Naivefreq[i]); fprintf(outfile,"*****************************************\n"); } if(Option!=2) { fprintf(outfile,"allele frequency estimates using naive counting in\n"); fprintf(outfile,"\t\tcases \t\t unaffected controls \t\tunknown controls \t all sample \n"); for (i=1;i<=M;i++) fprintf(outfile,"allele %d : freq = %.4f \t\t freq = %.4f \t\t freq = %.4f \t\t freq = %.4f\n",i,NaivefreqCase[i],NaivefreqControl[i],NaivefreqUnknown[i],Naivefreq[i]); fprintf(outfile,"*****************************************\n"); } fprintf(outfile,"\n\n\n\n"); free_dmatrix(freqMatrix,1,M,1,M); free_dvector(frequency,1,M); free_dvector(frequencyCase,1,M); free_dvector(frequencyControl,1,M); free_dvector(frequencyUnknown,1,M); free_dvector(Naive,1,M); free_dvector(NaiveCase,1,M); free_dvector(NaiveControl,1,M); free_dvector(NaiveUnknown,1,M); free_dvector(Naivefreq,1,M); free_dvector(Rvector,1,M); free_dvector(NaivefreqCase,1,M); free_dvector(NaivefreqControl,1,M); free_dvector(NaivefreqUnknown,1,M); free_dmatrix(freqNaive,1,M,1,M); free_dvector(ROBUSTVAR,1,M); free_dvector(YYvector,1,M); free_dvector(Y1vector,1,M); fprintf(pvfile,"%d \t %s \t %g \t %g \t %g \n",BIGMARKER,CURRENTSNPNAME,pval1,pval2,pval3); fprintf(chivfile,"%d \t %s \t %g \t %g \t %g \n",BIGMARKER,CURRENTSNPNAME,RM,RCHI,RW); if(BIGMARKER==1) {TOPPvalues[BIGMARKER]=pval1; TOPPorder[BIGMARKER]=BIGMARKER; strncpy(TOPSNPNAME[BIGMARKER],CURRENTSNPNAME,100); TOPPvalues2[BIGMARKER]=pval2; TOPPorder2[BIGMARKER]=BIGMARKER; strncpy(TOPSNPNAME2[BIGMARKER],CURRENTSNPNAME,100); TOPPvalues3[BIGMARKER]=pval3; TOPPorder3[BIGMARKER]=BIGMARKER; strncpy(TOPSNPNAME3[BIGMARKER],CURRENTSNPNAME,100); } else {if(BIGMARKER<=MAXTOP) { TOPPvalues[BIGMARKER]=pval1; TOPPorder[BIGMARKER]=BIGMARKER; strncpy(TOPSNPNAME[BIGMARKER],CURRENTSNPNAME,100); vecsrt2(TOPPvalues,TOPPorder,TOPSNPNAME,BIGMARKER); TOPPvalues2[BIGMARKER]=pval2; TOPPorder2[BIGMARKER]=BIGMARKER; strncpy(TOPSNPNAME2[BIGMARKER],CURRENTSNPNAME,100); vecsrt2(TOPPvalues2,TOPPorder2,TOPSNPNAME2,BIGMARKER); TOPPvalues3[BIGMARKER]=pval3; TOPPorder3[BIGMARKER]=BIGMARKER; strncpy(TOPSNPNAME3[BIGMARKER],CURRENTSNPNAME,100); vecsrt2(TOPPvalues3,TOPPorder3,TOPSNPNAME3,BIGMARKER); } else {if(pval1=BIGMARKER) {top=BIGMARKER;} if(X==0 & HWE==0) {fprintf(sigfile,"Below is a list of the top %d markers with the smallest p-values using the MQLS robust test statistic \n\n",top); fprintf(sigfile,"**************************************************\n\n"); for (m=1;m<=top;m++) {fprintf(sigfile,"MARKER %d: %s has a p-value of %g\n",TOPPorder[m],TOPSNPNAME[m],TOPPvalues[m]);} fprintf(sigfile,"\n\n############################################### \n\n"); fprintf(sigfile,"Below is a list of the top %d markers with the smallest p-values using the Corrected Chi-Squred robust test statistic \n\n",top); fprintf(sigfile,"**************************************************\n\n"); for (m=1;m<=top;m++) {fprintf(sigfile,"MARKER %d: %s has a p-value of %g\n",TOPPorder2[m],TOPSNPNAME2[m],TOPPvalues2[m]);} fprintf(sigfile,"\n\n############################################### \n\n"); fprintf(sigfile,"Below is a list of top %d markers with the smallest p-values using the WQLS robust test statistic \n\n",top); fprintf(sigfile,"**************************************************\n\n"); for (m=1;m<=top;m++) {fprintf(sigfile,"MARKER %d: %s has a p-value of %g\n",TOPPorder3[m],TOPSNPNAME3[m],TOPPvalues3[m]);} } if(X==0 & HWE==1) {fprintf(sigfile,"Below is a list of the top %d markers with the smallest p-values using the MQLS HWE test statistic \n\n",top); fprintf(sigfile,"**************************************************\n\n"); for (m=1;m<=top;m++) {fprintf(sigfile,"MARKER %d: %s has a p-value of %g\n",TOPPorder[m],TOPSNPNAME[m],TOPPvalues[m]);} fprintf(sigfile,"\n\n############################################### \n\n"); fprintf(sigfile,"Below is a list of the top %d markers with the smallest p-values using the Corrected Chi-Squred HWE test statistic \n\n",top); fprintf(sigfile,"**************************************************\n\n"); for (m=1;m<=top;m++) {fprintf(sigfile,"MARKER %d: %s has a p-value of %g\n",TOPPorder2[m],TOPSNPNAME2[m],TOPPvalues2[m]);} fprintf(sigfile,"\n\n############################################### \n\n"); fprintf(sigfile,"Below is a list of top %d markers with the smallest p-values using the WQLS HWE test statistic \n\n",top); fprintf(sigfile,"**************************************************\n\n"); for (m=1;m<=top;m++) {fprintf(sigfile,"MARKER %d: %s has a p-value of %g\n",TOPPorder3[m],TOPSNPNAME3[m],TOPPvalues3[m]);} } if(X==1 & HWE==0) {fprintf(sigfile,"Below is a list of the top %d markers with the smallest p-values using the XM robust test statistic \n\n",top); fprintf(sigfile,"**************************************************\n\n"); for (m=1;m<=top;m++) {fprintf(sigfile,"MARKER %d: %s has a p-value of %g\n",TOPPorder[m],TOPSNPNAME[m],TOPPvalues[m]);} fprintf(sigfile,"\n\n############################################### \n\n"); fprintf(sigfile,"Below is a list of the top %d markers with the smallest p-values using the XCHI robust test statistic \n\n",top); fprintf(sigfile,"**************************************************\n\n"); for (m=1;m<=top;m++) {fprintf(sigfile,"MARKER %d: %s has a p-value of %g\n",TOPPorder2[m],TOPSNPNAME2[m],TOPPvalues2[m]);} fprintf(sigfile,"\n\n############################################### \n\n"); fprintf(sigfile,"Below is a list of top %d markers with the smallest p-values using the XW robust test statistic \n\n",top); fprintf(sigfile,"**************************************************\n\n"); for (m=1;m<=top;m++) {fprintf(sigfile,"MARKER %d: %s has a p-value of %g\n",TOPPorder3[m],TOPSNPNAME3[m],TOPPvalues3[m]);} } if(X==1 & HWE==1) {fprintf(sigfile,"Below is a list of the top %d markers with the smallest p-values using the XM HWE test statistic \n\n",top); fprintf(sigfile,"**************************************************\n\n"); for (m=1;m<=top;m++) {fprintf(sigfile,"MARKER %d: %s has a p-value of %g\n",TOPPorder[m],TOPSNPNAME[m],TOPPvalues[m]);} fprintf(sigfile,"\n\n############################################### \n\n"); fprintf(sigfile,"Below is a list of the top %d markers with the smallest p-values using the XCHI HWE test statistic \n\n",top); fprintf(sigfile,"**************************************************\n\n"); for (m=1;m<=top;m++) {fprintf(sigfile,"MARKER %d: %s has a p-value of %g\n",TOPPorder2[m],TOPSNPNAME2[m],TOPPvalues2[m]);} fprintf(sigfile,"\n\n############################################### \n\n"); fprintf(sigfile,"Below is a list of top %d markers with the smallest p-values using the XW HWE test statistic \n\n",top); fprintf(sigfile,"**************************************************\n\n"); for (m=1;m<=top;m++) {fprintf(sigfile,"MARKER %d: %s has a p-value of %g\n",TOPPorder3[m],TOPSNPNAME3[m],TOPPvalues3[m]);} } fclose(sigfile); fclose(outfile); return 0; } void readFAM (char *name) { int all1=0, all2=0,fam=0,famold=1,n=0; int indnumber=0,ind=0,par1,par2,sex,aff; /* char line[MAXLINE]; int length=MAXLINE; */ if((famfile=fopen(name, "r"))==NULL) { printf("Can't open %s\n",name); exit(1); } famdata[1].N=0; while((fscanf(famfile, "%d %d %*d %*d %d %d",&fam,&ind,&sex,&aff))==4) { if (fam==famold) famdata[fam].N++; else famdata[fam].N=1; if (fam!=famold && fam!=(famold+1)) {printf("Problems with family %d.\n Family should have following Id numbers from 1 to N\n",fam); exit(1);} indnumber++; famold=fam; } N=indnumber; F=fam; if (F>MAXFAM) { printf("Number of families exceeds the maximum number of families allowed\n. You should try to change the value of MAXFAM in the CC-QLStest.c file and recompile.\n\n"); exit(1);} rewind(famfile); /* printf("There are %d families and %d study individuals \n",F,N); */ } void readdataFAM (struct FAM *famdata,FILE *errfile) { int all1=0, all2=0,i=0,fam=0,famold=1,n=0; int status=0,indnumber=0,ind=0,m=0,j,indiv=0,sex=0; int mycount=0; /* char line[MAXLINE]; int length=MAXLINE; */ while((fscanf(famfile, "%d %d %*d %*d %d %d",&fam,&ind,&sex,&status))==4) { if (fam!=famold && fam!=(famold+1)) { printf("Problems with family %d.\n Family should have following Id numbers from 1 to N\n ",fam); exit(1); } if (fam!=famold) {indnumber=0; NPheno+=famdata[famold].Pheno; NnoPheno+=famdata[famold].NoPheno; } if(sex==1) {Nmale++; } if(sex==2) {Nfemale++; } if ((status<0) ||(status>2)) { fprintf(errfile,"individual %d from family %d does not have a phenotype value of 0, 1, or 2 ..will be skipped\n",ind,fam); famdata[fam].INCORRECTPheno++; } else if (status==0 || status==1 || status==2) { if(status==0) {famdata[fam].Unknown++; NUnknown=NUnknown+1; } if(status==1) {NUnaffected++; } if(status==2) {Naffected++; } famdata[fam].Pheno++; } indnumber++; famdata[fam].descri[indnumber]=ind; famdata[fam].AFFEC[indnumber]=status; famdata[fam].SEX[indnumber]=sex; if ((status<0) ||(status>2)) {famdata[fam].AFFEC[indnumber]=-9;} mycount++; famdata[fam].STUDYID[indnumber]=mycount; fscanf(famfile,"\n"); famold=fam; } NPheno+=famdata[fam].Pheno; NnoPheno+=famdata[fam].NoPheno; /* printf("mycount in reading fam is %d \n",mycount); */ printf("There are %d individuals from %d independent families.\n%d of the individuals are affected, %d of the individuals are unaffected, and %d of the individuals are of unknown phenotype.\n\n",NPheno,F,Naffected,NUnaffected,NUnknown); if(X==0) {printf("There are %d males and %d females. The prevalence values used in the MQLS test statistic for males and females are %lf and %lf, respectively. \n",Nmale,Nfemale,KP[1],KP[2]);} if(X==1) {printf("There are %d males and %d females. The prevalence values used in the XM test statistic for males and females are %lf and %lf, respectively. \n",Nmale,Nfemale,KP[1],KP[2]);} } /****************************************************** Reads the kinship coefficients of the individual pairs from the kinshipcoef file and stores the values in the Storekin array. Storekin returns value -1 for pairs of individuals for which no coefficient is available *****************************************************/ void readkincoef(char *name,FILE *errfile, struct FAM *famdata,double ***Storekin) { double coef=0; int follow=0,j,i,n,famold=0; long int Id1,Id2; int family=0,ind1=0,ind2=0,indold1=0,indold2=0,Idold1=0,Idold2=0; int err_array[N]; int test=0,lim=0; for (i=1;i<=N;i++) err_array[i]=0; if((idfile=fopen(name, "r"))==NULL) { printf("Can't open %s\n",name); exit(1); } for (follow=1;follow<=F;follow++) for (i=1;i<=famdata[follow].Pheno;i++) for (j=i;j<=famdata[follow].Pheno;j++) { Storekin[follow][i][j]=-1; if (i!=j) Storekin[follow][j][i]=-1; } while (fscanf(idfile, "%d %ld %ld %lf\n",&family,&Id1,&Id2,&coef)==4) { if (family>F) { printf("Problem with family number %d. Family should have following Id numbers from 1 to N\n",family); exit(1); } if (family==famold) { if (Id1==Idold1) ind1=indold1; else if (Id1==Idold2) ind1=indold2; else ind1=findInd(Id1,family); if (Id2==Idold1) ind2=indold1; else if (Id2==Idold2) ind2=indold2; else ind2=findInd(Id2,family); } else { ind1=findInd(Id1,family); ind2=findInd(Id2,family); } famold=family; if (ind1!=0 && ind2!=0) { Storekin[family][ind1][ind2]=coef; Storekin[family][ind2][ind1]=coef; if(X==1) { if(ind1==ind2&& famdata[family].SEX[ind1]==1) {if(coef!=1) { printf("\nERROR! There is a problem with the kinship coefficient input file. User specfied option -x for an X-chromsome analysis. Individual %ld from family %d is a male and has an inbreeding coefficient value of %lf. All males must have an X-chromosome inbreeding coefficient set to 1 in this kinship coefficient file. Please check the kinship coefficient file and make sure that the file contains only X-chromsome kinship and inbreeding coefficients. \n",Id1,family,coef); exit(1); }}} } if (ind1==0) { test=0; for (j=1;j<=lim;j++) if (Id1==err_array[j]) { test=1; j=lim+1; } if (test==0) { fprintf(errfile,"individual %ld from family %d is in the kinshipcoef file but is not phenotyped or not available from marker data file\n",Id1,family); lim++; err_array[lim]=Id1; } } if (ind2==0) { test=0; for (j=1;j<=lim;j++) if (Id2==err_array[j]) { test=1; j=lim+1; } if (test==0) { fprintf(errfile,"individual %ld from family %d is in the kinshipcoef file but is not phenotyped or not available from marker data file\n",Id2,family); lim++; err_array[lim]=Id2; } } Idold1=Id1; Idold2=Id2; indold1=ind1; indold2=ind2; } } /****************************************************** returns the rank of an individual in his family from its family number and Id *****************************************************/ int findInd(Id1,family) { int indpas=1; while (indpas<=famdata[family].Pheno) { if (famdata[family].descri[indpas]==Id1) return(indpas); indpas++; } return(0); } int getgenoline_PLINK(struct MARKER *Mark,struct FAM *famdata,FILE *errfile) { int all1=0, all2=0,fam=0,famold=1,n=0; char line[MAXLINE]; int length=MAXLINE; int Ngenotypes=0; int status=0,indnumber=0,ind=0,m=0,indiv=0,sex=0; int i,j,k,l; int MYCONTINUE,genovalue,mytyped=0,pers1,pers2,num; char str1[200],str2[200],str3[200],str4[200]; FILE *ftemp; int **GENOVALUE; GENOVALUE=imatrix(0,NPheno,0,2); fgets(line,length,genofile); //strncpy(line2,line,MAXLINE); MYCONTINUE=0; Ngenotypes=0; n=sscanf(line,"%s %s %s %s %[^\n]",str1,CURRENTSNPNAME,str3,str4,line); n=sscanf(line,"%d %d %[^\n]",&all1,&all2,line); /* n=sscanf(line,"%d %[^\n]",&genovalue,line);*/ if (n<2) { if(BIGMARKER==0) { printf("No marker to test. Please check first line in marker data file\n\n"); exit(1); } } else { Ngenotypes++; GENOVALUE[Ngenotypes][1]=all1; GENOVALUE[Ngenotypes][2]=all2; while (n==3) { n=sscanf(line,"%d %d %[^\n]",&all1,&all2,line); if (n>=2) { Ngenotypes++; GENOVALUE[Ngenotypes][1]=all1; GENOVALUE[Ngenotypes][2]=all2; } } } if(Ngenotypes!=NPheno) {printf("Program has completed. Tested %d SNPs for association. \n",BIGMARKER);} if(Ngenotypes==NPheno) {MYCONTINUE=1; m=1; Mark[m].Nball=0; Mark[m].Nc=0; Mark[m].Nt=0; Mark[m].Nu=0; for(i=1;i<=F;i++) { Mark[m].typed[i].N=0; Mark[m].typed[i].Nc=0; Mark[m].typed[i].Nt=0; Mark[m].typed[i].Nu=0; } // n=sscanf(line2,"%s %s %s %s %[^\n]",str1,CURRENTSNPNAME,str3,str4,line2); num=0; for (i=1;i<=F;i++) {for(j=1;j<=famdata[i].N;j++) { //n=sscanf(line2,"%d %d %[^\n]",&all1,&all2,line2); num++; all1=GENOVALUE[num][1]; all2=GENOVALUE[num][2]; if( ( (all1==1 || all1==2) && (all2==1 ||all2==2)) && (famdata[i].MZ[j][1]==-10||(famdata[i].MZ[j][1]>0&&famdata[i].MZ[j][1]!=2))) { mytyped++; /* GENOTYPED[num]=1; */ /* printf("%d %d %d \n",i,j,FINALTYPED[mytyped]); */ } else{all1=0; all2=0; /* GENOTYPED[num]=0; */ } fam=i; indnumber=j; status=famdata[i].AFFEC[j]; ind=famdata[i].descri[j]; /* sex=famdata[i].SEX[j]; */ Mark[m].mark[fam][indnumber][1]=all1; Mark[m].mark[fam][indnumber][2]=all2; Mark[m].mark[fam][indnumber][3]=status; Mark[m].mark[fam][indnumber][4]=ind; /* Mark[m].mark[fam][indnumber][5]=sex;*/ if ( (status==0||status==1) && all1!=0 && all2!=0) { Mark[m].typed[fam].N++; if(status==1) {Mark[m].typed[fam].Nt++; Mark[m].Nt++; } if(status==0) {Mark[m].Nu++; Mark[m].typed[fam].Nu++; } } if (status==2 && all1!=0 && all2!=0) { Mark[m].typed[fam].Nc++; Mark[m].typed[fam].N++; Mark[m].Nc++; } if (all1>Mark[m].Nball) Mark[m].Nball=all1; if (all2>Mark[m].Nball) Mark[m].Nball=all2; } } } /* printf("There are %d total typed for marker %d \n",mytyped,BIGMARKER); */ if(MYCONTINUE==1) { TYPEDNUM=mytyped; /* printf("There are %d total typed for marker %d \n",mytyped,BIGMARKER); */ } free_imatrix(GENOVALUE,0,NPheno,0,2); return(MYCONTINUE); } void readpen (char *name) { int all1=0, all2=0,fam=0,famold=1,n=0,i=0; int indnumber=0,ind=0, count=0; double val; if((penfile=fopen(name, "r"))==NULL) { printf("Can't open %s\n",name); exit(1); } count=0; while(fscanf(penfile,"%lf",&val) ==1 && count<2) {count++; KP[count]=val; } PREVCOUNT=count; if(PREVCOUNT==0) {printf("Error: Input Prevelance file %s does not have prevelance values. \n",name); exit(1); } if(PREVCOUNT==1) {KP[2]=KP[1]; } } void GET_PHENOVECTOR_AND_MZtwins(int fam,int length) { int j=0,i=0,k=0; int m=1; double val1=0,val2=0; int MZCOUNT=0,totalcount=0; int numcases=0,numcontrols=0,totalnum=0; for (i=1;i<=length;i++) //length is famdata[fam].NPheno {famdata[fam].MZ[i][1]=-10; /* if(Mark[m].mark[fam][i][3]==2) {numcases=numcases+1;} if(Mark[m].mark[fam][i][3]==1) {numcontrols=numcontrols+1;} totalnum=totalnum+1; */ } for (i=1;i<=length;i++) //length is famdata[fam].NPheno { for (j=i+1;j<=length;j++) {totalcount=totalcount+1; if((Storekin[fam][i][j]>=.5) && ((Storekin[fam][i][i]<1. && Storekin[fam][j][j] < 1.) || Storekin[fam][i][j]==1.)) { val1=(1.0+Storekin[fam][i][i])/2; val2=(1.0+Storekin[fam][j][j])/2; if( (val1==Storekin[fam][i][j]) && (val2==Storekin[fam][i][j]) &&(val1==val2)) { famdata[fam].MZ[i][1]=1; famdata[fam].MZ[j][1]=2; MZCOUNT=MZCOUNT+1; TOTALMZCOUNT=TOTALMZCOUNT+1; /*** FOR WQLS OF MZ TWINS ***/ val1=0; if(famdata[fam].AFFEC[i]==2) {val1=1;} val2=0; if(famdata[fam].AFFEC[j]==2) {val2=1;} famdata[fam].MZ[i][2]=(val1+val2)/2; /** FOR MQLS OF MZ TWINS **/ val1=0; if(famdata[fam].AFFEC[i]==1) {val1=-KP[famdata[fam].SEX[i]];} if(famdata[fam].AFFEC[i]==2) {val1=1-KP[famdata[fam].SEX[i]];} val2=0; if(famdata[fam].AFFEC[j]==1) {val2=-KP[famdata[fam].SEX[j]];} if(famdata[fam].AFFEC[j]==2) {val2=1-KP[famdata[fam].SEX[j]];} famdata[fam].MZ[i][3]=val1+val2; }}} } m=1; for (i=1;i<=length;i++) //length is famdata[fam].NPheno { val1=0; if(famdata[fam].MZ[i][1]!=-10) { val1+=famdata[fam].MZ[i][3]; } else { if (famdata[fam].AFFEC[i]==2) { val1+=(Storekin[fam][i][i]+1)*(1-KP[famdata[fam].SEX[i]]);} if (famdata[fam].AFFEC[i]==1) { val1+=(Storekin[fam][i][i]+1)*(-KP[famdata[fam].SEX[i]]);} } for(j=1;j<=length;j++) { if(j!=i) { if(famdata[fam].MZ[j][1]!=-10) { val1+=2*Storekin[fam][i][j]*famdata[fam].MZ[j][3]; } else { if (famdata[fam].AFFEC[j]==2) { val1+=2*Storekin[fam][i][j]*(1-KP[famdata[fam].SEX[j]]); } if (famdata[fam].AFFEC[j]==1) {val1+=2*Storekin[fam][i][j]*(-KP[famdata[fam].SEX[j]]);} /* printf("fam is %d and individual is %d with relative %d with affection status %d (and kinship coef %lf), and new value is %lf\n",fam,i,j,Mark[m].mark[fam][j][3],Storekin[fam][i][j],val1); */ } } } famdata[fam].AVEC[i][1]=val1; } } void readGenotypes(int fam,int m,int length,double **cholaug,double **cholaugCase,double **cholaugControl,double **cholaugUnknown,int *MissingVec,int miss,double **kincoefmatrix,double **kincoefMatrixCase,double **kincoefmatrixControl,double **kincoefmatrixUnknown) { int all1=0, all2=0,j=0,i=0,family=0,follow=0; int a1=0,tot=1,controls=1,cases=1,unknown=1; int nbc1=0,nbc2=0,nbt1=0,nbt2=0,nb1=0,nb2=0,nbu1=0,nbu2=0; double val1=0; int k,num; k=m; /* IF THERE ARE MZ TWINS THEN REMOVING ONE OF THEM */ if(TOTALMZCOUNT>0) { for (j=1;j<=length;j++) //length is famdata[fam].NPheno { if(famdata[fam].MZ[j][1]>0&&famdata[fam].MZ[j][1]==2) { k=m; if(Mark[k].mark[fam][j][3]==2&&Mark[k].mark[fam][j][1]>0&& Mark[k].mark[fam][j][2]>0) {Mark[k].typed[fam].Nc--; Mark[k].Nc--;} if(Mark[k].mark[fam][j][3]==1&&Mark[k].mark[fam][j][1]>0&& Mark[k].mark[fam][j][2]>0) {Mark[k].typed[fam].Nt--; Mark[k].Nt--; } if(Mark[k].mark[fam][j][1]>0&& Mark[k].mark[fam][j][2]>0) {Mark[k].typed[fam].N--;} Mark[k].mark[fam][j][1]=0; Mark[k].mark[fam][j][2]=0; }}} for (i=1;i<=length;i++) //length is famdata[fam].NPheno { /* REMOVING INDIVIDUALS WITH UNRECOGNIZED PHENOTYPES */ if ((Mark[k].mark[fam][i][3]<0) ||(Mark[k].mark[fam][i][3]>2)) { printf("fam %d and individual %d has an unrecognized phenotype of %d \n",fam,i,Mark[k].mark[fam][i][3]); if(Mark[k].mark[fam][i][3]==2&&Mark[k].mark[fam][i][1]>0&& Mark[k].mark[fam][i][2]>0) {Mark[k].typed[fam].Nc--; Mark[k].Nc--;} if(Mark[k].mark[fam][i][3]==1&&Mark[k].mark[fam][i][1]>0&& Mark[k].mark[fam][i][2]>0) {Mark[k].typed[fam].Nt--; Mark[k].Nt--; } if(Mark[k].mark[fam][i][3]==0&&Mark[k].mark[fam][i][1]>0&& Mark[k].mark[fam][i][2]>0) {Mark[k].typed[fam].Nu--; Mark[k].Nu--; } if(Mark[k].mark[fam][i][1]>0&& Mark[k].mark[fam][i][2]>0) {Mark[k].typed[fam].N--;} Mark[m].mark[fam][i][1]=0; Mark[m].mark[fam][i][2]=0; } /* OPTION 2 REMOVING UNKNOWN PHENOTYPED INDIVIDUALS */ if( (Option==2) && (Mark[m].mark[fam][i][3]==0)) { if(Mark[k].mark[fam][i][1]>0&& Mark[k].mark[fam][i][2]>0) { Mark[k].typed[fam].Nu--; Mark[k].Nu--; Mark[k].typed[fam].N--;} Mark[m].mark[fam][i][1]=0; Mark[m].mark[fam][i][2]=0; } } for (i=1;i<=length;i++) //length is famdata[fam].NPheno { val1=0; if (Mark[m].mark[fam][i][1]!=0 && Mark[m].mark[fam][i][2]!=0) { nb1++; nb2=nb1; if (Storekin[fam][i][i]==-1) { printf("No inbreeding coefficient for individual %d from family %d. Please check...\n\n",i,fam); exit(1); } kincoefmatrix[nb1][nb1]=Storekin[fam][i][i]+1; if (Mark[m].mark[fam][i][3]==2) { nbc1++; nbc2=nbc1; kincoefMatrixCase[nbc1][nbc1]=kincoefmatrix[nb2][nb1]; for (j=i+1;j<=length;j++) { if (Storekin[fam][i][j]==-1) { printf("No kinship coefficient between individual %d and individual %d from family %d. Please check...\n\n",famdata[fam].descri[i],famdata[fam].descri[j],fam); exit(1); } if (Mark[m].mark[fam][j][1]!=0 && Mark[m].mark[fam][j][2]!=0) { nb2++; kincoefmatrix[nb1][nb2]=2*Storekin[fam][i][j]; kincoefmatrix[nb2][nb1]=kincoefmatrix[nb1][nb2]; if (Mark[m].mark[fam][j][3]==2) { nbc2++; kincoefMatrixCase[nbc1][nbc2]=kincoefmatrix[nb1][nb2]; kincoefMatrixCase[nbc2][nbc1]=kincoefMatrixCase[nbc1][nbc2]; } } } cholaug[tot][M+1]=2; cholaugCase[cases][M+1]=2; cholaug[tot][M+2]=2; cholaugCase[cases][M+2]=2; if(famdata[fam].MZ[i][1]!=-10) {cholaug[tot][M+2]=2*famdata[fam].MZ[i][2]; cholaugCase[cases][M+2]=2*famdata[fam].MZ[i][2]; /* printf("allocated the MZ case\n"); */ } for (a1=1;a1<=M;a1++) cholaug[tot][a1]=0; for(a1=1;a1<=M;a1++) { if (Mark[m].mark[fam][i][1]==a1) cholaug[tot][a1]++; if (Mark[m].mark[fam][i][2]==a1) cholaug[tot][a1]++; cholaugCase[cases][a1]=cholaug[tot][a1]; } cases++; tot++; } else if(Mark[m].mark[fam][i][3]==1) { nbt1++; nbt2=nbt1; kincoefmatrixControl[nbt1][nbt1]=kincoefmatrix[nb2][nb1]; for (j=i+1;j<=length;j++) { if (Mark[m].mark[fam][j][1]!=0 && Mark[m].mark[fam][j][2]!=0) { nb2++; kincoefmatrix[nb1][nb2]=2*Storekin[fam][i][j]; kincoefmatrix[nb2][nb1]=kincoefmatrix[nb1][nb2]; if (Mark[m].mark[fam][j][3]==1) { nbt2++; kincoefmatrixControl[nbt1][nbt2]=kincoefmatrix[nb1][nb2]; kincoefmatrixControl[nbt2][nbt1]=kincoefmatrixControl[nbt1][nbt2]; } } } cholaug[tot][M+1]=2; cholaugControl[controls][M+1]=2; cholaug[tot][M+2]=0; cholaugControl[controls][M+2]=0; if(famdata[fam].MZ[i][1]!=-10) { cholaug[tot][M+2]=2*famdata[fam].MZ[i][2]; cholaugControl[controls][M+2]=2*famdata[fam].MZ[i][2]; /* printf("allocated the MZ control\n"); */ } for (a1=1;a1<=M;a1++) cholaug[tot][a1]=0; for (a1=1;a1<=M;a1++) { if (Mark[m].mark[fam][i][1]==a1) cholaug[tot][a1]++; if (Mark[m].mark[fam][i][2]==a1) cholaug[tot][a1]++; cholaugControl[controls][a1]=cholaug[tot][a1]; } controls++; tot++; } else if(Mark[m].mark[fam][i][3]==0) { nbu1++; nbu2=nbu1; kincoefmatrixUnknown[nbu1][nbu1]=kincoefmatrix[nb2][nb1]; for (j=i+1;j<=length;j++) { if (Mark[m].mark[fam][j][1]!=0 && Mark[m].mark[fam][j][2]!=0) { nb2++; kincoefmatrix[nb1][nb2]=2*Storekin[fam][i][j]; kincoefmatrix[nb2][nb1]=kincoefmatrix[nb1][nb2]; if (Mark[m].mark[fam][j][3]==0) { nbu2++; kincoefmatrixUnknown[nbu1][nbu2]=kincoefmatrix[nb1][nb2]; kincoefmatrixUnknown[nbu2][nbu1]=kincoefmatrixUnknown[nbu1][nbu2]; } } } cholaug[tot][M+1]=2; cholaugUnknown[unknown][M+1]=2; cholaug[tot][M+2]=0; cholaugUnknown[unknown][M+2]=0; if(famdata[fam].MZ[i][1]!=-10) { cholaug[tot][M+2]=2*famdata[fam].MZ[i][2]; cholaugUnknown[unknown][M+2]=2*famdata[fam].MZ[i][2]; /* printf("allocated the MZ control\n"); */ } for (a1=1;a1<=M;a1++) cholaug[tot][a1]=0; for (a1=1;a1<=M;a1++) { if (Mark[m].mark[fam][i][1]==a1) cholaug[tot][a1]++; if (Mark[m].mark[fam][i][2]==a1) cholaug[tot][a1]++; cholaugUnknown[unknown][a1]=cholaug[tot][a1]; } unknown++; tot++; } } else { follow++; /* MissingVec[follow]=i; */ } } /* if ((cases-1)!=Mark[m].typed[fam].Nc || (controls-1)!=Mark[m].typed[fam].Nt) {printf("Problems while rereading the data file...\n"); exit(1); } if (follow!=miss) {printf("problem with missing data in family %d follow=%d miss=%d\n",fam,follow,miss); exit(1);} */ /** This is to get the phenotype vector for MQLS statistic **/ tot=1; nb1=0; for (i=1;i<=length;i++) //length is famdata[fam].NPheno {val1=0; if (Mark[m].mark[fam][i][1]!=0 && Mark[m].mark[fam][i][2]!=0) {nb1++; if(Missing==1) {val1=famdata[fam].AVEC[i][1];} else { if(famdata[fam].MZ[i][1]!=-10) { val1+=famdata[fam].MZ[i][3]; } else { if (Mark[m].mark[fam][i][3]==2) { val1+=kincoefmatrix[nb1][nb1]*(1-KP[famdata[fam].SEX[i]]);} if (Mark[m].mark[fam][i][3]==1) { val1+=kincoefmatrix[nb1][nb1]*(-KP[famdata[fam].SEX[i]]);} } nb2=0; for(j=1;j<=length;j++) { if (Mark[m].mark[fam][j][1]!=0 && Mark[m].mark[fam][j][2]!=0) {nb2++; if(j!=i) { if(famdata[fam].MZ[j][1]!=-10) { val1+=kincoefmatrix[nb1][nb2]*famdata[fam].MZ[j][3]; } else { if (Mark[m].mark[fam][j][3]==2) {val1+=kincoefmatrix[nb1][nb2]*(1-KP[famdata[fam].SEX[j]]); } if (Mark[m].mark[fam][j][3]==1) {val1+=kincoefmatrix[nb1][nb2]*(-KP[famdata[fam].SEX[j]]);} } } } } } /* if(tot==1&&fam==2) {val1=2; printf("in total\n");} if(tot==2&&fam==2) {val1=1+(-Kp/(1-Kp)); printf("in total\n");} printf("fam is %d and person counter is %d and MQLS val is %f and WQLS val is %f \n",fam,tot,val1,cholaug[tot][M+2]/2); */ cholaug[tot][M+3]=2*val1; /* printf("Weight value for individual %d from fam %d using genotyped individuals only is %lf and using all individuals is %lf \n",i,fam,val1,famdata[fam].AVEC[i][1]); */ /* printf("family is %d, person is %d and affected is %d and value is %.3lf, status is %.2lf and allelecount is %.2lf and total alleles is %.2lf \n",fam,i,Mark[m].mark[fam][i][3],cholaug[tot][M+3],cholaug[tot][M+2],cholaug[tot][1],cholaug[tot][M+1]); */ tot++; } } /* printf("\n\n"); */ } /****************************************************** Computes the frequency estimates using the quasilikelihood approach *****************************************************/ void alleleFreq(double **cholaug,double *frequency,int size, double *denominator) { int i=0, j=0; double pasden=0,numerator=0; for(i=1; i<=size; i++){ pasden=pasden+(cholaug[i][M+1])*(cholaug[i][M+1]); } for(j=1; j<=M; j++) { for(i=1; i<=size; i++) { numerator=numerator+cholaug[i][M+1]*cholaug[i][j]; } frequency[j]+=numerator; numerator=0; } *denominator+=pasden; } /****************************************************** Computes the frequency estimates using the naive counting *****************************************************/ void naiveCount(double **cholaug,double *Naive,double *Naivefreq,int size) { int i=0, j=0; double sum=0, contC=0; for(i=1; i<=M; i++){ contC=0; for(j=1; j<=size; j++) { sum+=cholaug[j][i]; if (cholaug[j][M+2]==2) contC+=cholaug[j][i]; } Naive[i]+=contC; Naivefreq[i]+=sum; sum=0; } } /****************************************************** To get from counts to frequencies *****************************************************/ void getfrequency(double *Naivefreq,double *NaivefreqCase,double *NaivefreqControl,double *NaivefreqUnknown,double *frequency,double *frequencyCase,double *frequencyControl,double *frequencyUnknown,double denominator,double denomcases,double denomcontrols,double denomunknown,int Nall,int Ncase,int Ncontrol,int Nunknown) { int i; for (i=1;i<=M;i++) { if(Nall>0) { Naivefreq[i]/=(2*Nall);} if(Ncase>0) {NaivefreqCase[i]/=(2*Ncase);} if(Ncontrol>0) {NaivefreqControl[i]/=(2*Ncontrol);} if(Nunknown>0) { NaivefreqUnknown[i]/=(2*Nunknown);} if(denominator>0) { frequency[i]/=denominator;} if(denomcases>0) { frequencyCase[i]/=denomcases; } if(denomcontrols>0) { frequencyControl[i]/=denomcontrols;} if(denomunknown>0) { frequencyUnknown[i]/=denomunknown; } } } /****************************************************** Modify the cholaug matrix such as the M-1 first columns are equal to C-t*(Y-mu) instead of C-t*Y *****************************************************/ void modifcholaug(double **cholent,double **cholaug,double **chol,double *frequency,int size) { int i,j,k; for (i=1;i<=size;i++) for (j=1;j<=M+3;j++) { if (j0;i--) {invcholm[i][i]=1/cholm[i][i]; for (j=i-1;j>0;j--) {for (k=j+1;k<=i;k++) invcholm[j][i]-=cholm[j][k]*invcholm[k][i]; invcholm[j][i]/=cholm[j][j]; invcholm[i][j]=0; } } //inverse of transMat for (i=1;i<=Mpas-1;i++) for (j=1;j<=Mpas-1;j++) for (k=1;k<=Mpas-1;k++) { freqPasMat[i][j]+=invcholm[i][k]*invcholm[j][k]; } k=0; for (i=1;i<=M-1;i++) { if (Followfreq[i]==1) { k++; freqMatrix[i][i]=freqPasMat[k][k]; l=k; for (j=i+1;j<=M-1;j++) { if (Followfreq[j]==1) { l++; freqMatrix[i][j]=freqPasMat[k][l]; freqMatrix[j][i]=freqMatrix[i][j]; } else { freqMatrix[i][j]=0; freqMatrix[j][i]=0; } } } else { freqMatrix[i][i]=0; for (j=i+1;j<=M-1;j++) { freqMatrix[i][j]=0; freqMatrix[j][i]=0; } } } /* printf(""); */ /* Free the temp matrices */ free_dmatrix( transMat, 1, Mpas - 1, 1, Mpas - 1); free_dmatrix( cholm, 1, Mpas - 1, 1, Mpas - 1); free_dmatrix( invcholm, 1, Mpas - 1, 1, Mpas - 1); free_dmatrix( freqPasMat, 1, Mpas - 1, 1, Mpas - 1); free_ivector(Followfreq,1,M-1); return 0; } /****************************************************** Computes the information components required for the pseudo-score test *****************************************************/ void comput_info_score(double **cholaug,double **freqMatrix,double *infoQL_rr,double *infoQL_rf,double *infoQL_ff,double *Rvector,int size) { int i,j; double info_rr=0,info_rf=0,info=0,info_ff=0; for (i=1;i<=size;i++) {info_rr+=cholaug[i][M+2]*cholaug[i][M+2]; info_rf+=cholaug[i][M+2]*cholaug[i][M+1]; info_ff+=cholaug[i][M+1]*cholaug[i][M+1]; for (j=1;j<=M-1;j++) Rvector[j]+=cholaug[i][M+2]*cholaug[i][j]; } *infoQL_rr+=info_rr; *infoQL_rf+=info_rf; *infoQL_ff+=info_ff; } /****************************************************** Computes the pseudo-score test *****************************************************/ void from_info2_score(double **freqMatrix,double infoQL_rr,double infoQL_rf,double infoQL_ff,double *Rvector,double *testval) { int i,j; double info=0,score=0; info=infoQL_rr - infoQL_rf*infoQL_rf/infoQL_ff; for (i=1;i<=M-1;i++) for (j=1;j<=M-1;j++) score+=Rvector[i]*Rvector[j]/info*freqMatrix[i][j]/2; *testval=score; } /****************************************************** Computes the corrected chi2 test *****************************************************/ void from_info2_chi2(double info_rr, double info_rf, double info_ff, double *Naive,double *Naivefreq,double **freqNaive,double *chi2val,int Nall,int Ncase) { int i,j; double chi2old=0; double freqpas=0,corrfactor=0; corrfactor=2/((info_rr-2*Ncase*(double)info_rf/Nall+(double)Ncase/Nall*(double)Ncase/Nall*info_ff)); for (i=1;i<=M-1;i++) for (j=1;j<=M-1;j++) chi2old+=(Naive[i]-2*Ncase*Naivefreq[i])*(Naive[j]-2*Ncase*Naivefreq[j])*freqNaive[i][j]; *chi2val=chi2old*corrfactor; } /****************************************************** Computes the information components required to derive the correction factor for the chi2 test *****************************************************/ void comput_info_chi2(double **kincoefMatrix,double **cholaug,double *info_rr,double *info_rf,double *info_ff,int size) { int i,j; double infopas_rr=0,infopas_rf=0,infopas_ff=0; for (i=1;i<=size;i++) { for (j=1;j<=size;j++) { infopas_rr+=cholaug[i][M+2]*kincoefMatrix[i][j]*cholaug[j][M+2]; infopas_rf+=cholaug[i][M+2]*kincoefMatrix[i][j]*cholaug[j][M+1]; infopas_ff+=cholaug[i][M+1]*kincoefMatrix[i][j]*cholaug[j][M+1]; } } *info_rr+=infopas_rr; *info_rf+=infopas_rf; *info_ff+=infopas_ff; } /****************************************************** Computes the information components required for the pseudo-score test for MQLS *****************************************************/ void comput_info_scoreMQLS(double **cholaug,double **freqMatrix,double *infoQL_rr,double *infoQL_rf,double *infoQL_ff,double *Rvector,int size) { int i,j; double info_rr=0,info_rf=0,info=0,info_ff=0; for (i=1;i<=size;i++) {info_rr+=cholaug[i][M+3]*cholaug[i][M+3]; info_rf+=cholaug[i][M+3]*cholaug[i][M+1]; info_ff+=cholaug[i][M+1]*cholaug[i][M+1]; for (j=1;j<=M-1;j++) Rvector[j]+=cholaug[i][M+3]*cholaug[i][j]; } *infoQL_rr+=info_rr; *infoQL_rf+=info_rf; *infoQL_ff+=info_ff; } void vecsrt(double *d, int *M,int n) { int k,j,i; double p; int place; for (i=1;i=2) { Ngenotypes++; } } } if(Ngenotypes==NPheno) {MYCONTINUE=1; m=1; Mark[m].Nball=0; Mark[m].Nc=0; Mark[m].Nt=0; Mark[m].Nu=0; for(i=1;i<=F;i++) { Mark[m].typed[i].N=0; Mark[m].typed[i].Nc=0; Mark[m].typed[i].Nt=0; Mark[m].typed[i].Nu=0; } num=0; for (i=1;i<=F;i++) {for(j=1;j<=famdata[i].N;j++) { n=sscanf(line2,"%d %d %[^\n]",&all1,&all2,line2); num++; if( ( (all1==1 || all1==2) && (all2==1 ||all2==2)) && (famdata[i].MZ[j][1]==-10||(famdata[i].MZ[j][1]>0&&famdata[i].MZ[j][1]!=2))) { mytyped++; /* GENOTYPED[num]=1; */ /* printf("%d %d %d \n",i,j,FINALTYPED[mytyped]); */ } else{all1=0; all2=0; /* GENOTYPED[num]=0; */ } fam=i; indnumber=j; status=famdata[i].AFFEC[j]; ind=famdata[i].descri[j]; /* sex=famdata[i].SEX[j]; */ Mark[m].mark[fam][indnumber][1]=all1; Mark[m].mark[fam][indnumber][2]=all2; Mark[m].mark[fam][indnumber][3]=status; Mark[m].mark[fam][indnumber][4]=ind; /* Mark[m].mark[fam][indnumber][5]=sex;*/ if ( (status==0||status==1) && all1!=0 && all2!=0) { Mark[m].typed[fam].N++; if(status==1) {Mark[m].typed[fam].Nt++; Mark[m].Nt++; } if(status==0) {Mark[m].Nu++; Mark[m].typed[fam].Nu++; } } if (status==2 && all1!=0 && all2!=0) { Mark[m].typed[fam].Nc++; Mark[m].typed[fam].N++; Mark[m].Nc++; } if (all1>Mark[m].Nball) Mark[m].Nball=all1; if (all2>Mark[m].Nball) Mark[m].Nball=all2; } } } /* printf("There are %d total typed for marker %d \n",mytyped,BIGMARKER); */ if(MYCONTINUE==1) { TYPEDNUM=mytyped; /* printf("There are %d total typed for marker %d \n",mytyped,BIGMARKER); */ } return(MYCONTINUE); } int getgenoline_ADDITIVE(struct MARKER *Mark,struct FAM *famdata,FILE *errfile) { int all1=0, all2=0,fam=0,famold=1,n=0; char line[MAXLINE],line2[MAXLINE]; int length=MAXLINE; int Ngenotypes=0; int status=0,indnumber=0,ind=0,m=0,indiv=0,sex=0; int i,j,k,l; int MYCONTINUE,genovalue,mytyped=0,pers1,pers2,num; fgets(line,length,genofile); strncpy(line2,line,MAXLINE); MYCONTINUE=0; Ngenotypes=0; /* n=sscanf(line,"%d %d %[^\n]",&all1,&all2,line); */ n=sscanf(line,"%d %[^\n]",&genovalue,line); if (n<1) { if(BIGMARKER==0) { printf("No marker to test. Please check first line in marker data file\n\n"); exit(1); } } else { Ngenotypes++; while (n==2) { n=sscanf(line,"%d %[^\n]",&genovalue,line); if (n>=1) { Ngenotypes++; } } } if(Ngenotypes==NPheno) {MYCONTINUE=1; m=1; Mark[m].Nball=0; Mark[m].Nc=0; Mark[m].Nt=0; Mark[m].Nu=0; for(i=1;i<=F;i++) { Mark[m].typed[i].N=0; Mark[m].typed[i].Nc=0; Mark[m].typed[i].Nt=0; Mark[m].typed[i].Nu=0; } num=0; for (i=1;i<=F;i++) {for(j=1;j<=famdata[i].N;j++) { n=sscanf(line2,"%d %[^\n]",&genovalue,line2); num++; if( (genovalue==0 || genovalue==1 || genovalue==2) && (famdata[i].MZ[j][1]==-10||(famdata[i].MZ[j][1]>0&&famdata[i].MZ[j][1]!=2))) { mytyped++; /* GENOTYPED[num]=1; */ /* printf("%d %d %d \n",i,j,FINALTYPED[mytyped]); */ if(genovalue==2) {all1=1; all2=1;} if(genovalue==1) {all1=1; all2=2;} if(genovalue==0) {all1=2; all2=2;} } else{all1=0; all2=0; /* GENOTYPED[num]=0; */ } fam=i; indnumber=j; status=famdata[i].AFFEC[j]; ind=famdata[i].descri[j]; /* sex=famdata[i].SEX[j]; */ Mark[m].mark[fam][indnumber][1]=all1; Mark[m].mark[fam][indnumber][2]=all2; Mark[m].mark[fam][indnumber][3]=status; Mark[m].mark[fam][indnumber][4]=ind; /* Mark[m].mark[fam][indnumber][5]=sex;*/ if ( (status==0||status==1) && all1!=0 && all2!=0) { Mark[m].typed[fam].N++; if(status==1) {Mark[m].typed[fam].Nt++; Mark[m].Nt++; } if(status==0) {Mark[m].Nu++; Mark[m].typed[fam].Nu++; } } if (status==2 && all1!=0 && all2!=0) { Mark[m].typed[fam].Nc++; Mark[m].typed[fam].N++; Mark[m].Nc++; } if (all1>Mark[m].Nball) Mark[m].Nball=all1; if (all2>Mark[m].Nball) Mark[m].Nball=all2; } } } if(MYCONTINUE==1) { TYPEDNUM=mytyped; /* printf("There are %d total typed for marker %d \n",mytyped,BIGMARKER); */ } return(MYCONTINUE); } /* TO RUN THE PROGRAM, USE COMMANDS ./MQLS-XM -p MQLS_XM_software_input.pedinfo -g NEW_PEDIGREE_ASSOCIATION_TRANSPOSE_20SNPs.tped -k KINSHIP_PEDIGREE.output -r pedigree_prevalence ./MQLS_XM -p example.ped -g example.geno -k example.kinship -r example.prev -n example.SNPnames gcc -lm -O3 ROADTRIPS_SOURCE_NEW_REVAMPEDMSM2.c -o ROADTRIPS ./ROADTRIPS -p example.ped -g example.geno -k example.kinship -r example.prev -n example.SNPnames gcc -lm -O3 NEW_MQLS_TEST6.c -o MQLS */