/* Problem 3--Approximate Matches This problem is a variation on the Longest Common Subsequence problem. If you subtract out the LCS from both strings, what is left is the minimum number of deletions. */ #include #include #include typedef struct LLS { char *s; struct LLS *next; } LLS; typedef struct CS { char c; struct CS *next; } CS; int main (int argc, char **argv); char * Validate (char *a, int i, int j, char *b, LLS **l); int LCS (char *a, char *b); int max (int a, int b); char *ReadString (FILE *in); int main (int argc, char **argv) { int r, mm, i, j, mct; FILE *in, *out; LLS *l, *t; char *a, *b, *c; in = fopen ("prob3.in","r"); out = fopen ("prob3.out","w"); for (;;) { fscanf (in,"%d",&mm); a = ReadString (in); /* Read past the end-of-line */ free (a); if (mm < 0) break; l = NULL; a = ReadString (in); /* Get the strings */ b = ReadString (in); i = 0; for (;;) { if (i == strlen (a)) break; j = i; for (;;) { if (j == strlen (a)) break; /* Process all substrings */ c = Validate (a,i,j,b,&l); /* If I haven't done this one yet */ if (c != NULL) { mct = LCS (c,b); /* Figure out the deletions */ if (mct <= mm) { if (mct==0) { fprintf (out,"%s matches %s\n",c,b); } else { if (mct==1) { fprintf (out,"%s matches %s with 1 mismatch\n",c,b); } else { fprintf (out,"%s matches %s with %d mismatches\n",c,b,mct); } } } else { } } else { } j++; } i++; } for (;;) { /* Release memory */ if (l==NULL) break; t = l; l = t->next; free (t->s); free (t); } free (a); free (b); fprintf (out,"\n"); } fclose (in); fclose (out); r = EXIT_SUCCESS; return r; } /* Validate accepts a string and adds it to a linked list of strings. If this one is already there, it is not readded. */ char * Validate (char *a, int i, int j, char *b, LLS **l) { char *c; int ii, found; LLS *t; c = malloc ((j-i+2)*sizeof (char)); ii = i; for (;;) { /* Build appropriate substring */ if (ii > j) break; c[ii-i] = a[ii]; ii++; } c[ii-i] = 0; found = 0; t = *l; for (;;) { /* Search through linked list for string */ if (found || t==NULL) break; found = strcmp (c,t->s)==0; t = t->next; } if (!found) { /* If it's not there, add it */ t = malloc (sizeof (LLS)); t->s = c; t->next = *l; *l = t; } else { /* If it is there, deallocate */ free (c); c = NULL; } return c; } /* LCS uses the standard dynamic programming technique to find the length of the longest common subsequence. */ int LCS (char *a, char *b) { int **M, i, j, t; M = malloc ((1+strlen(a))*sizeof (int *)); /* Allocate table */ i = 0; for (;;) { if (i==1+strlen(a)) break; M[i] = malloc ((1+strlen(b))*sizeof (int)); i++; } i = 0; /* Build table so that M[i][j] represents the number of */ for (;;) { /* characters in the LCS in the first i characters of a */ if (i==1+strlen(a)) break; /* and the first j characters of b */ j = 0; for (;;) { if (j==1+strlen(b)) break; if (i==0 || j==0) { M[i][j] = 0; } else { if (a[i-1] == b[j-1]) { M[i][j] = M[i-1][j-1]+1; } else { M[i][j] = max (M[i-1][j],M[i][j-1]); } } j++; } i++; } /* Compute number of deletions */ t = strlen (a) + strlen (b) - 2*M[strlen (a)][strlen (b)]; i = 0; for (;;) { /* release the matrix */ if (i==1+strlen(a)) break; free (M[i]); i++; } free (M); return t; } /* max returns the maximum of two integers */ int max (int a, int b) { int m; if (a > b) { m = a; } else { m = b; } return m; } /* ReadString reads and returns an end-of-line terminated string from in */ char *ReadString (FILE *in) { char c; int ct; CS *cl, *cct; char *s; cl = NULL; ct = 0; for (;;) { /* Read in characters one at a time and add to linked list*/ fscanf (in,"%c",&c); if (c=='\n') break; /* stopping when end-of-line is encountered */ cct = malloc (sizeof (CS)); cct->c = c; cct->next = cl; cl = cct; ct++; } s = malloc ((ct+1)*sizeof (char)); /*Allocate enough memory for str */ s[ct] = 0; /* null-terminate it */ for (;;) { if (ct==0) break; /*Remove characters from linked list and add to str*/ ct--; s[ct] = cl->c; cct = cl; cl = cct->next; free (cct); } return s; }