-
Notifications
You must be signed in to change notification settings - Fork 369
/
Copy pathburrow_wheeler_transform.c
138 lines (114 loc) · 4.65 KB
/
burrow_wheeler_transform.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
/* =============== PROBLEM STATEMENT =============
Implement the Burrows-Wheeler Transform (BWT) and its inverse.
Given an input string s, the program should compute its BWT and then reconstruct the original string using the inverse BWT.
============= SOLUTION ==============
The bwt function takes a string and returns its Burrows-Wheeler transform bwt.
1. The function generates the suffix array by using the iota function to fill a vector sa with values from 0 to n - 1, where n is the length of the input string.
2. The modified_quicksort function is then used to sort the suffixes lexicographically.
3. Finally, the transform bwt is constructed by concatenating the last characters of each sorted suffix, with a special sentinel character $ appended to the end.
The inverse_bwt function takes the transformed string bwt and returns the original string.
1. The function first constructs the first column of the Burrows-Wheeler matrix by sorting the transform bwt.
2. It then iteratively constructs t by adding the last character of the previous iteration to the beginning of the string.
3. The index of the next character is determined by counting the number of occurrences of the current character up to and including the current index in the transformed string bwt, and then finding the index of the nth occurrence of the character in the first column of the Burrows-Wheeler matrix.
Sample Input:
banana
Sample Output:
BWT of banana$ is annb$aa
Inverse BWT of annb$aa is banana
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// Function to compare two suffixes for sorting
int compare_suffixes(const void* a, const void* b) {
const char* suffixA = *(const char**)a;
const char* suffixB = *(const char**)b;
return strcmp(suffixA, suffixB);
}
// Function to perform the Burrows-Wheeler Transform
char* bwt(char* s) {
strcat(s, "$");
// Length of the input string
int len = strlen(s);
// Create an array of rotations
char** rotations = (char**)malloc(len * sizeof(char*));
for (int i = 0; i < len; i++) {
rotations[i] = (char*)malloc((len + 1) * sizeof(char));
strcpy(rotations[i], s + i);
strncat(rotations[i], s, i);
}
// Sort the rotations lexicographically
qsort(rotations, len, sizeof(char*), compare_suffixes);
// Construct the transformed string
char* bwt = (char*)malloc((len + 1) * sizeof(char));
for (int i = 0; i < len; i++) {
bwt[i] = rotations[i][len - 1];
}
bwt[len] = '\0';
// Free memory allocated for rotations
for (int i = 0; i < len; i++) {
free(rotations[i]);
}
free(rotations);
return bwt;
}
char* inverse_bwt(char* bwt_string) {
// Count the occurrences of each character
int counts[256] = {0};
int len = strlen(bwt_string);
for (int i = 0; i < len; i++) {
counts[bwt_string[i]]++;
}
// Generate the sorted list of characters
char sorted_chars[256];
int sorted_count = 0;
for (int i = 0; i < 256; i++) {
if (counts[i] > 0) {
sorted_chars[sorted_count++] = i;
}
}
// Construct the first column of the transformation matrix
char first_column[len];
int first_column_index = 0;
for (int i = 0; i < sorted_count; i++) {
char current_char = sorted_chars[i];
int count = counts[current_char];
for (int j = 0; j < count; j++) {
first_column[first_column_index++] = current_char;
}
}
// Perform the reverse transformation
int index = 0;
char* original_string = (char*)malloc((len + 1) * sizeof(char));
original_string[len] = '\0';
for (int i = 0; i < len; i++) {
char current_char = bwt_string[index];
original_string[len - i - 1] = current_char;
index = -1;
for (int j = 0; j < len; j++) {
if (first_column[j] == current_char) {
index = j;
first_column[j] = '\0';
break;
}
}
}
char* new_string = original_string + 1; // new string that starts from the second character (since pointer)
return new_string;
}
int main() {
char input[100]; // Declare an array to store user input
// Get input from the user
printf("Enter a string: ");
fgets(input, sizeof(input), stdin);
input[strcspn(input, "\n")] = '\0'; // Remove the newline character
// Burrows-Wheeler Transform
char* bwt_transform = bwt(input);
printf("Transformed string: %s\n", bwt_transform);
// Inverse Burrows-Wheeler Transform
char* original = inverse_bwt(bwt_transform);
printf("Original string: %s\n", original);
free(bwt_transform);
free(original);
return 0;
}