using namespace std; double smallest(double x, double y, double z) { return x < y ? (x < z ? x : z) : (y < z ? y : z); } double calculate_cost(char char1, char char2) { if (char1 == char2) return 0; else return 1; } bool calculate_identity(char char1, char char2) { if (char1 == char2) return 1; else return 0; } const string ss2_align_two_strings(const string & string1, const string & string2, double* score) { /* * Set up the penalties */ double v = 2.5; //opening a gap penalty double u = 0.5; //continuing a gap penalty int M = string1.length(); //length of string 1, row index int N = string2.length(); //length of string 2, column index bool skip_ten_eleven; //Vertical penalty matrix P, Horizontal Penalty matrix Q, Score matrix R double P[M+1][N+1]; double Q[M+1][N+1]; double R[M+1][N+1]; //Edge index matrices a, b, c, d, e, f, g bool a[M+2][N+2]; bool b[M+2][N+2]; bool c[M+2][N+2]; bool d[M+2][N+2]; bool e[M+2][N+2]; bool f[M+2][N+2]; bool g[M+2][N+2]; int idx; int jdx; double cost_value; string aligned_string; // Variables for setting direction values bool prev_vert = 0; bool prev_horz = 0; bool next_vert = 0; bool next_horz = 0; /* * Matrices initialization (Step 1) */ int max_MN; if (M>N) max_MN = M; else max_MN = N; for(jdx=0; jdx<(N+1); jdx++) { P[0][jdx] = 2*v + max_MN*u + 1; R[0][jdx] = v + jdx*u; } for(idx=0; idx<(M+1); idx++) { Q[idx][0] = 2*v + max_MN*u + 1; R[idx][0] = v + idx*u; } R[0][0] = 0; for(idx=0; idx<(M+2); idx++) { for (jdx = 0; jdx<(N+2); jdx++) { a[idx][jdx] = 0; b[idx][jdx] = 0; c[idx][jdx] = 0; d[idx][jdx] = 0; e[idx][jdx] = 0; f[idx][jdx] = 0; g[idx][jdx] = 0; } } c[M+1][N+1] = 1; /* for (idx = 0; idx < (M+1); idx++) { for (jdx = 0; jdx< (N+1); jdx++) { cout << idx << " " << jdx << " " << P[idx][jdx] << " " << Q[idx][jdx] << " " << R[idx][jdx] << " " ; cout << a[idx][jdx] << " " << b[idx][jdx] << " " << c[idx][jdx] << " " << d[idx][jdx] << " "; cout << e[idx][jdx] << " " << f[idx][jdx] << " " << g[idx][jdx] << endl; } } cout << endl; */ /* * Cost Assignment */ for(idx=0; idx<(M+1); idx++) { for (jdx = 0; jdx<(N+1); jdx++) { //Step 2 if (idx==0) P[idx][jdx] = 2*v + max_MN*u + 1; else if ((P[idx-1][jdx]) < (R[idx-1][jdx]+v)) P[idx][jdx] = u + P[idx-1][jdx] ; else P[idx][jdx] = u + R[idx-1][jdx]+v; //Step 3 if (idx==0){ } else if ( (P[idx][jdx]) == (P[idx-1][jdx] + u)) d[idx-1][jdx] = 1; if (idx==0){ } else if ( (P[idx][jdx]) == (R[idx-1][jdx] + v + u)) e[idx-1][jdx] = 1; //Step 4 if (jdx==0) Q[idx][jdx] = 2*v + max_MN*u + 1; else if ( (Q[idx][jdx-1]) < (R[idx][jdx-1] + v)) Q[idx][jdx] = u + Q[idx][jdx-1]; else Q[idx][jdx] = u + R[idx][jdx-1] + v; //Step 5 if (jdx==0){ } else if ( (Q[idx][jdx]) == (Q[idx][jdx-1] + u)) f[idx][jdx-1] = 1; if (jdx==0){ } else if ( (Q[idx][jdx]) == (R[idx][jdx-1] + v + u)) g[idx][jdx-1] = 1; //Step 6 cost_value = calculate_cost(string1[idx-1], string2[jdx-1]); if ((idx==0) && (jdx==0)) R[idx][jdx] = 0; else if ((idx==0) || (jdx==0)) R[idx][jdx] = smallest(P[idx][jdx], Q[idx][jdx], 2*v + max_MN*u + 1 + cost_value); else R[idx][jdx] = smallest(P[idx][jdx], Q[idx][jdx], R[idx-1][jdx-1] + cost_value); //Step 7 if (R[idx][jdx] == P[idx][jdx]) a[idx][jdx] = 1; if (R[idx][jdx] == Q[idx][jdx]) b[idx][jdx] = 1; if( (idx==0) || (jdx==0)) { if(R[idx][jdx] == cost_value) c[idx][jdx] = 1;} else if (R[idx][jdx] == (R[idx-1][jdx-1] + cost_value)) c[idx][jdx] = 1; } } /* * * Edge assignment * */ for (idx=M; idx>-1; idx--) { for (jdx=N; jdx>-1; jdx--) { //Step 8 if ( ((a[idx+1][jdx]==0)||(e[idx][jdx] == 0)) && ((b[idx][jdx+1]==0)||(g[idx][jdx]==0)) && ((c[idx+1][jdx+1]==0)) ) { a[idx][jdx] = 0; b[idx][jdx] = 0; c[idx][jdx] = 0; } //Step 9 if ((a[idx+1][jdx] == 0) && (b[idx][jdx+1] == 0) && (c[idx+1][jdx+1] == 0)) skip_ten_eleven = 1; else skip_ten_eleven = 0; if (!(skip_ten_eleven)) { //Step 10 if ((a[idx+1][jdx]==1) && (d[idx][jdx]==1)) { d[idx+1][jdx] = 1-e[idx][jdx]; e[idx][jdx] = 1-a[idx][jdx]; a[idx][jdx] = 1; } else { d[idx+1][jdx] = 0; e[idx][jdx] = 0; } //Step 11 if ((b[idx][jdx+1]==1) && (f[idx][jdx]==1)) { f[idx][jdx+1] = 1-g[idx][jdx]; g[idx][jdx] = 1-b[idx][jdx]; b[idx][jdx] = 1; } else { f[idx][jdx+1] = 0; g[idx][jdx] = 0; } } } } //Printing Variables /* for (idx = 0; idx < (M+1); idx++) { for (jdx = 0; jdx< (N+1); jdx++) { cout << idx << " " << jdx << " " << P[idx][jdx] << " " << Q[idx][jdx] << " " << R[idx][jdx] << " " ; cout << a[idx][jdx] << " " << b[idx][jdx] << " " << c[idx][jdx] << " " << d[idx][jdx] << " "; cout << e[idx][jdx] << " " << f[idx][jdx] << " " << g[idx][jdx] << endl; } } cout << endl; */ /* * Printing the alignment * using 'D' for deletion, 'I' for insertion, 'M' for a match and 'X' for a mismatch * */ *score = R[M][N]; idx = M; jdx = N; while( idx || jdx) { if (next_horz == 1 ) { aligned_string.insert(0, "D"); prev_horz = 1; prev_vert = 0; next_horz = 0; next_vert = 0; jdx = jdx-1; } else if (next_vert == 1) { aligned_string.insert(0, "I"); prev_horz = 0; prev_vert = 1; next_horz = 0; next_vert = 0; idx = idx-1; } else if ( b[idx][jdx] == 1 ) { if( g[idx][jdx]==1 ) { b[idx][jdx] = 0; if (prev_horz==1){ aligned_string.insert(0, "D"); prev_horz = 1; prev_vert = 0; jdx = jdx-1; if (f[idx][jdx] == 1){ next_horz = 1; } } } else { b[idx][jdx] = 0; aligned_string.insert(0, "D"); prev_horz = 1; prev_vert = 0; jdx = jdx-1; if (f[idx][jdx] == 1){ next_horz = 1; } } } else if ( a[idx][jdx] == 1 ) { if( e[idx][jdx] == 1 ) { a[idx][jdx] = 0; if (prev_vert == 1){ aligned_string.insert(0, "I"); prev_horz = 0; prev_vert = 1; idx = idx-1; if (d[idx][jdx] == 1){ next_vert = 1; } } } else { a[idx][jdx] = 0; aligned_string.insert(0, "I"); prev_horz = 0; prev_vert = 1; idx = idx-1; if (d[idx][jdx] == 1){ next_vert = 1; } } } else if (c[idx][jdx] == 1){ c[idx][jdx] = 0; if(calculate_identity(string1[idx-1], string2[jdx-1])) aligned_string.insert(0, "M"); else aligned_string.insert(0, "X"); idx = idx-1; jdx = jdx-1; prev_horz = 0; prev_vert = 0; next_horz = 0; next_vert = 0; } } return aligned_string; }