/****************************************************************************************** * Qian 5/23/2009 * Added function checkParentsSex to detect mismatch between parental status and sex value * (e.g. male mother, female father). Errors are printed to screen and to file sexErrorFile. * * Qian Zhang 2/25/2009 * X-kinship coefficients. * * Note: * - Individual ID's can be any integer. No order is assumed on the ID's. * * ----------------- * Catherine Bourgain 12/1/2003 * Computes inbreeding and kinship coefficients for pedigrees using KARIGL algorithm (1981,Ann. Hum Genet 45:299-305) * Usage : ./KinInbcoef pedigreefile listname outputfile * where * - pedigreefile has 4 col. Fam indId fatherId motherId * - listname is a file with the list of the individuals for whom computation of the coefficients is required. Should have 2 columns : Fam indId * - outputfile has four col: Fam Id1 Id2 kinship/inbreeding coef * for all possible pairs of indiv listed in the listname file * * Inbreeding and Kinship coefficients are computed only for individuals from the same family. * * MAXFAM is the maximum number of families to be analyzed at the same time. Can be changed * MAXIND is the maximum number of individuals in each family for whom inbreeding/kinship coefficients are to be computed. Can be changed. * * g++ should be used for compilation (to allow use of map class) * **************************************************************************************/ #include #include #include using namespace std; #define MAXFAM 100 // Maximum number of families #define MAXIND 1000 // Maximum number of individuals per family // Makes "parents" an alias for "struct data" typedef struct data { int father; int mother; int generation; }parents; // Information for a family // Maps are used instead of arrays because no order is assumed on the individual ID's, // e.g. they need not be consecutive integers starting from 0 or 1, so this cuts down on // unused allocated memory. struct info { map ped; // Map between individual IDs and individuals' parents int Nsize; // Number of people in this family map sex; // Map between individual IDs and sex (1 for male, 2 for female) }; // Declare functions struct info Global[MAXFAM+1]; // Holds most of the information in this program void check(long *x, long *y,int boolfix); void create_generation(int fam,struct info *Global); double phi2(int i, int j,int fam); int make_generation(int ind,int per,int mer,struct info G); int checkParentsSex(); // Declare variables: int keep[MAXFAM+1]; // list contains ID numbers. list[fam][i] is the ID of individual i of family fam. int list[MAXFAM+1][MAXIND+1]; // i is the index of array ped. i=0 never gets used. // F is the number of families, fam is a family ID used by Global as an index (fam = 0 never gets used). int i,j,F,fam=0; FILE *infile; // Pedigree file FILE *listfile; // List file, says who coefficients should be calculated for FILE *outfile; // Output file // Error file listing an individual's ID and his or her parent's ID, // where the parent does not have the correct sex value (i.e. female father or male mother) FILE *sexErrorFile; // argc is the number of arguments that program takes + 1 (so including the name of the program). // argv is an array of inputs to the program, where the name of the program is argv[0]. int main(int argc, char *argv[]) { int NbInd=0,Nblist=0; int famold=1; // May 22 add on sexErrorFile if (argc!= 5) { printf("wrong usage. Should be ./idcoef pedfile listfile outfile sexErrorFile\n"); exit(1); } if ((infile=fopen(argv[1],"r"))==NULL) { printf("Can't open pedigree file\n"); exit(1); } if ((listfile=fopen(argv[2],"r"))==NULL) { printf("Can't open file with the list of individuals\n"); exit(1); } if ((outfile=fopen(argv[3],"w"))==NULL) { printf("Can't open outfile\n"); exit(1); } // May 22: Add on sexErrorFile if ((sexErrorFile=fopen(argv[4],"w"))==NULL) { printf("Can't open sexErrorFile\n"); exit(1); } // Intialize Nsize for each family for (fam=1;fam<=MAXFAM;fam++) { Global[fam].Nsize=0; } // Read in family number, individual ID, father, mother, and sex for the 1st person // in the pedigree file. Intialize values in Global for this person. NbInd++; fscanf(infile,"%d ",&fam); fscanf(infile,"%d ",&i); Global[fam].Nsize++; Global[fam].ped[i].mother=0; Global[fam].ped[i].father=0; Global[fam].ped[i].generation=0; Global[fam].sex[i] = 0; fscanf(infile,"%d %d %d\n", &Global[fam].ped[i].father, &Global[fam].ped[i].mother, &Global[fam].sex[i]); // Continue reading in family #, individual ID, father, mother, and sex data for every individual in the pedigree file. while (!feof(infile)) { fscanf(infile,"%d ",&fam); if (fam > MAXFAM) { printf("number of families exceeds the maximum.\n Change the value of MAXFAM in the source file and recompile\n"); exit(1); } // if statement is true if we have come across the first person (as listed in the pedigree file) of a new family if (fam!=famold) { if (fam!=(famold+1)) { printf("families should have following Id's from 1 to F\n\n"); exit(1); } else { famold = fam; } } Global[fam].Nsize++; // Update the size of the family as we read in info for each individual in the family if (Global[fam].Nsize > MAXIND) { printf("number of individuals in family %d exceeds the maximum.\n Change the value of MAXIND in the source file and recompile\n",fam); exit(1); } // Update count of total number of individuals in the pedigree file NbInd++; fscanf(infile, "%d", &i); Global[fam].ped[i].mother=0; Global[fam].ped[i].father=0; Global[fam].ped[i].generation=0; Global[fam].sex[i] = 0; fscanf(infile,"%d %d %d\n", &Global[fam].ped[i].father, &Global[fam].ped[i].mother, &Global[fam].sex[i]); }// end while loop // After the while has finished, famold should be the number of families in the pedigree. // F gets assigned this number. F = famold; // May 22 Qian: Global should now hold all mother, father, sex info. // Check that all fathers have sex male and all mothers have sex female. // If there is an error, quit the program. if (checkParentsSex() == 1) { exit(1); } for (fam=1;fam<=F;fam++) { create_generation(fam,Global); // Determine the generation numbers of parents in family fam // Initialize keep and list keep[fam]=0; for (j=0; j<=Global[fam].Nsize; j++) { list[fam][j]=0; } } /* keep is an array whose indices are family IDs and * whose entries are the # of individuals of the family in the * listfile. Array list keeps individual ID numbers */ while (!feof(listfile)) { fscanf(listfile,"%d\n",&fam); fscanf(listfile,"%d\n",&i); // find returns the iterator corresponding to individual i in this family's map of individuals to parents. // end returns the iterator corresponding to one past the last element of map. if (Global[fam].ped.find(i)==Global[fam].ped.end()) { printf("Individual %d not in pedigree file...\n Skipped\n",i); } else { // Update count of the number of individuals in the list file Nblist++; if (Nblist<=NbInd) { keep[fam]++; list[fam][keep[fam]-1]=i; } else { printf("too many individuals in the list file\n"); exit(1); } }// end else }// end while // Calculate kinship and inbreeding coefficients for individuals in each family for (fam=1;fam<=F;fam++){ // Continue so long as family fam has individuals for whom coefficients need to be calculated // (i.e. this family has individuals listed in the list file) if (keep[fam]>0){ // Iterate through individuals in the family for (i=0;i<=keep[fam]-1;i++){ // Print inbreeding coefficient to output file: // 1 for male i, 1/2 + 1/2(phi2 between parents of i) for female i if(Global[fam].sex[list[fam][i]] == 1){// i is male fprintf(outfile,"%d %d %d %f\n",fam,list[fam][i],list[fam][i],1.0); } else{ // i is female fprintf(outfile,"%d %d %d %f\n",fam,list[fam][i],list[fam][i], phi2(Global[fam].ped[list[fam][i]].father,Global[fam].ped[list[fam][i]].mother,fam)); } // Print kinship coefficients to output file: // For i with every other individual j in the family, so long as the coefficient hasn't already been calculated. // Note coefficient(i,j) = coefficient(j,i). for (j=i+1;j<=keep[fam]-1;j++){ fprintf(outfile,"%d %d %d %f\n",fam,list[fam][i],list[fam][j],phi2(list[fam][i],list[fam][j],fam)); } } }// end if } // end for // close all files fclose(infile); fclose(listfile); fclose(outfile); fclose(sexErrorFile); } // end main function /*----------------------------------*/ // Switch ID numbers *x and *y if individual *x is of an older generation than y. // Dereference memory addresses x and y to get their contents *x and *y, // which will be integers, ID numbers. void check(int *x, int *y,int fam) { int z; if (Global[fam].ped[*x].generation >= Global[fam].ped[*y].generation) { return; } else { // If individual *x is of generation < *y's generation (i.e. of an older generation than y) // swap the ID values *x and *y z = *x; *x = *y; *y = z; } } /*------------------------------------- */ // Calculates coefficients (inbreeding if i = j, kinship if i != j) // between individual of ID i, individual of ID j, of family fam. double phi2(int i, int j,int fam){ if (i * j == 0) { return 0.0; } else if (i == j) { // If i male, return inbreeding coeff 1 if(Global[fam].sex[i] == 1) { return 1; } else { // i is female, return inbreeding coeff 1/2 + 1/2phi(i's dad, i's mom) return ((1 + phi2(Global[fam].ped[i].father, Global[fam].ped[i].mother, fam)) / 2); } } else { // i != j // After the check function, i should denote the individual of the same or newer // generation than the other individual in the pair of individuals {i,j} check(&i, &j,fam); if (Global[fam].sex[i] == 1) { // i is male, calculate phi(i's mom, j, fam) return(phi2(Global[fam].ped[i].mother, j, fam)); } else { // i is female return ((phi2(Global[fam].ped[i].father, j, fam) + phi2(Global[fam].ped[i].mother, j, fam)) / 2); } } } /*--------------------------------------*/ void create_generation(int fam,struct info *Global) { int i=0, per=0, mer=0, generation=0, pas=0, ind=0; // :: is scope operator // Declare iter and iterend, of type iterator, which is an attribute of the map class map::iterator iter,iterend; iter = Global[fam].ped.begin(); // iterator corresponding to 1st individual in the domain of the map iterend = Global[fam].ped.end(); // iterator corresponding to element that is one after the last element of the map while (iter!=iterend) { // Iterator is like a pointer to a pair (first, second). // first is an individual ID, an element of the domain of the map between individuals and parents. // second is the thing of type parents that first maps to. ind = iter->first; // Dereference iterator iter, assign first to ind per = iter->second.father; mer = iter->second.mother; // if parents second of individual first have unassigned generation, assign a generation if (iter->second.generation==0) { iter->second.generation=make_generation(ind, per, mer, Global[fam]); } ++iter; // Make iter be the next iterator, the one corresponding to the next element in the domain of the map } } /* ------------------------------------- */ // ind is individual ID, per is father ID, mer is mother ID. // generations are consecutive integers ordered from 1 to whatever, where 1 is the oldest generation, // and the next integer is the next generation. int make_generation(int ind, int per, int mer, struct info G) { if (per!=0 && G.ped[per].generation==0) { G.ped[per].generation=make_generation(per, G.ped[per].father, G.ped[per].mother, G); } if (mer!=0 && G.ped[mer].generation==0) { G.ped[mer].generation=make_generation(mer, G.ped[mer].father, G.ped[mer].mother, G); } // If mom and dad are from the 1st generation, assign them generation 1 if (per==0 && mer==0) { return(1); } else { // Return the newest generation # if (G.ped[per].generation >= G.ped[mer].generation) { return(G.ped[per].generation+1); } else { return(G.ped[mer].generation+1); } } } // May 22: Checks that each individual ind of family fam has a father whose sex is male and a mother whose sex is female. // If there is an error (either father listed as female or mother listed as male), // then print the individual's ID and ID of parent with incorrectly listed sex to the screen, as well as to sexErrorFile. // Each row of sexErrorFile gives data ordered: family ID, individual ID, ID of parent with mismatched sex, // parent's status (mother or father), and parent's sex (male or female). int checkParentsSex() { // Integer error is 1 if there is an error, 0 if there is no error int father=0,mother=0,ind=0, error = 0; map::iterator iter,iterend; for(fam = 1; fam <= F; fam++) { iter = Global[fam].ped.begin(); // iterator corresponding to 1st individual in the domain of the map iterend = Global[fam].ped.end(); // iterator corresponding to element that is one after the last element of the map while (iter != iterend) { ind = iter->first; // ID # for this individual father = iter->second.father; // ID # of this individuals' father mother = iter->second.mother; // ID # of this individuals' mother // If individual ind is not a founder (father ID not 0) // and ind's father is not male (encoded 1), then error if (father != 0 && Global[fam].sex[father] != 1) { printf("Error: Individual %d in family %d has female father %d \n", ind, fam, father); fprintf(sexErrorFile,"%d %d %d %d %d\n", fam, ind, father, 1, 2); error = 1; } // If individual ind is not a founder (mother ID not 0) // and ind's mother is not female, then error if (mother != 0 && Global[fam].sex[mother] == 1) { printf("Error: Individual %d in family %d has male mother %d \n ", ind, fam, mother); fprintf(sexErrorFile,"%d %d %d %d %d\n", fam, ind, mother, 2, 1); error = 1; } ++iter; // Make iter be the next iterator, the one corresponding to the next element in the domain of the map } } return(error); }