DP-HLS
 All Classes Namespaces Files Functions Variables Typedefs Macros Pages
solution_viterbi.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include <map>
4 #include <iostream>
5 #include <array>
6 #include <string>
7 #include <limits>
8 #include <fstream>
9 #include <iostream>
10 #include <iomanip> // For std::setw and std::setfill
11 #include <complex>
12 #include <set>
13 #include "host_utils.h"
14 #include <cassert>
15 
30 template <typename PENALTY_T, int SOL_MAX_QUERY_LENGTH, int SOL_MAX_REFERENCE_LENGTH, int SOL_N_LAYERS>
31 void viterbi_solution(std::string query, std::string reference, PENALTY_T &penalties,
32  array<array<array<double, SOL_MAX_REFERENCE_LENGTH>, SOL_MAX_QUERY_LENGTH>, SOL_N_LAYERS> &score_mat,
33  array<array<string, SOL_MAX_REFERENCE_LENGTH>, SOL_MAX_QUERY_LENGTH> &tb_mat,
34  map<string, string> &alignments)
35 {
36  // Layer 0: Insertion Matrix
37  // Layer 1: Match Mismatch Matrix
38  // Layer 2: Deletion Matrix
39 
40  // Declare initial column and row scores
41  array<array<double, SOL_N_LAYERS>, SOL_MAX_QUERY_LENGTH> initial_col;
42  array<array<double, SOL_N_LAYERS>, SOL_MAX_REFERENCE_LENGTH> initial_row;
43 
44  double init_penal = penalties.log_lambda;
45  // Initialize intial column and row values
46  for (int i = 0; i < SOL_MAX_QUERY_LENGTH; i++)
47  {
48  init_penal += penalties.log_1_m_mu;
49  initial_col[i][0] = -1000000;
50  initial_col[i][1] = init_penal;
51  initial_col[i][2] = -1000000; // This can be whatever, since won't be accessed
52  }
53  init_penal = penalties.log_lambda;
54  for (int j = 0; j < SOL_MAX_REFERENCE_LENGTH; j++)
55  {
56  init_penal += penalties.log_1_m_mu;
57  initial_row[j][0] = -1000000; // This can be whatever, since won't be accessed
58  initial_row[j][1] = init_penal;
59  initial_row[j][2] = -1000000;
60  }
61 
62  // Initialize the score matrix
63  for (int i = 0; i < SOL_MAX_QUERY_LENGTH; i++)
64  {
65  for (int j = 0; j < SOL_MAX_REFERENCE_LENGTH; j++)
66  {
67  for (int k = 0; k < SOL_N_LAYERS; k++)
68  {
69  score_mat[k][i][j] = 0;
70  }
71  }
72  }
73 
74  // Initialize the traceback matrix
75  for (int i = 0; i < SOL_MAX_QUERY_LENGTH; i++)
76  {
77  for (int j = 0; j < SOL_MAX_REFERENCE_LENGTH; j++)
78  {
79  tb_mat[i][j] = '*';
80  }
81  }
82 
83  // Fill in the DP matrix and traceback matrix
84  for (int i = 0; i < query.length(); i++)
85  {
86 
87  for (int j = 0; j < reference.length(); j++)
88  {
89  double del_up;
90  double ins_left;
91  double main_diag, main_up, main_left;
92  double ins_diag, del_diag;
93 
94  if (i == 0 && j == 0)
95  {
96  main_diag = 0.0;
97  main_up = initial_row[0][1];
98  main_left = initial_col[0][1];
99 
100  ins_left = initial_col[0][0];
101  ins_diag = 0;
102 
103  del_up = initial_row[0][2];
104  del_diag = 0;
105  }
106  else if (i == 0 && j > 0) // In first row, not column
107  {
108  main_diag = initial_row[j - 1][1];
109  main_up = initial_row[j][1];
110  main_left = score_mat[1][i][j - 1];
111 
112  ins_left = score_mat[0][0][j - 1];
113  ins_diag = initial_row[j - 1][0];
114 
115  del_up = initial_row[j][2];
116  del_diag = initial_row[j - 1][2];
117  }
118  else if (i > 0 && j == 0)
119  {
120  main_diag = initial_col[i - 1][1];
121  main_up = score_mat[1][i - 1][j];
122  main_left = initial_col[i][1];
123 
124  ins_left = initial_col[i][0];
125  ins_diag = initial_col[i - 1][0];
126 
127  del_up = score_mat[2][i - 1][j];
128  del_diag = initial_col[i - 1][2];
129  }
130  else
131  {
132  main_diag = score_mat[1][i - 1][j - 1];
133  main_up = score_mat[1][i - 1][j];
134  main_left = score_mat[1][i][j - 1];
135 
136  ins_left = score_mat[0][i][j - 1];
137  ins_diag = score_mat[0][i-1][j-1];
138 
139  del_up = score_mat[2][i - 1][j];
140  del_diag = score_mat[2][i-1][j-1];
141  }
142 
143  double del_write, ins_write, main_write; // values write to the score matrix
144  double ins_open, ins_extend, del_open, del_extend; // values for the insertion and deletion matrix
145  ins_open = main_left + penalties.log_lambda;
146  ins_extend = ins_left + penalties.log_1_m_mu;
147  del_open = main_up + penalties.log_lambda;
148  del_extend = del_up + penalties.log_1_m_mu;
149 
150  bool ins_open_b = ins_extend < ins_open;
151  bool del_open_b = del_extend < del_open;
152  ins_write = penalties.transition[HostUtils::Sequence::base_to_num(query[i])][4] + (ins_open_b ? ins_open : ins_extend);
153  del_write = penalties.transition[4][HostUtils::Sequence::base_to_num(reference[j])] + (del_open_b ? del_open : del_extend);
154 
155  double main_match = penalties.log_1_m_2_lambda + main_diag;
156  double main_ins = penalties.log_mu + ins_diag;
157  double main_del = penalties.log_mu + del_diag;
158 
159  double main_max = main_match;
160  main_max = main_max > main_ins ? main_max : main_ins;
161  main_max = main_max > main_del ? main_max : main_del;
162  main_write = penalties.transition[HostUtils::Sequence::base_to_num(query[i])][HostUtils::Sequence::base_to_num(reference[j])] + main_max;
163 
164  score_mat[0][i][j] = ins_write;
165  score_mat[1][i][j] = main_write;
166  score_mat[2][i][j] = del_write;
167 
168  // Choose the maximum score and update the traceback matrix
169  if (main_max == main_ins)
170  {
171  tb_mat[i][j] = ins_open_b ? "L " : "LE"; // 'L' indicates a left direction (insertion)
172  }
173  else if (main_max == main_del)
174  {
175  tb_mat[i][j] = del_open_b ? "U " : "UE"; // 'U' indicates an up direction (deletion)
176  }
177  else if (main_max == main_match)
178  {
179  tb_mat[i][j] = "D "; // 'D' indicates a diagonal direction (match or mismatch)
180  }
181  else
182  {
183  cout << "ERROR: Invalid traceback matrix value" << endl;
184  }
185  }
186  }
187 
188  // Traceback to find the aligned sequences
189  int i = query.length() - 1;
190  int j = reference.length() - 1;
191  string aligned_query = "";
192  string aligned_reference = "";
193  while (i >= 0 && j >= 0)
194  {
195  if (tb_mat[i][j] == "D ")
196  {
197  aligned_query = query[i] + aligned_query;
198  aligned_reference = reference[j] + aligned_reference;
199  i--;
200  j--;
201  }
202  else if (tb_mat[i][j] == "U ")
203  {
204  aligned_query = query[i] + aligned_query;
205  aligned_reference = "_" + aligned_reference;
206  i--;
207  }
208  else if (tb_mat[i][j] == "UE")
209  {
210  aligned_query = query[i] + aligned_query;
211  aligned_reference = "_" + aligned_reference;
212  i--;
213  }
214  else if (tb_mat[i][j] == "L ")
215  {
216  aligned_query = "_" + aligned_query;
217  aligned_reference = reference[j] + aligned_reference;
218  j--;
219  }
220  else if (tb_mat[i][j] == "LE")
221  {
222  aligned_query = "_" + aligned_query;
223  aligned_reference = reference[j] + aligned_reference;
224  j--;
225  }
226  else
227  {
228  cout << "ERROR: Invalid traceback matrix value" << endl;
229  break;
230  }
231  }
232 
233  // Finish up the rest of the characters in the query and reference
234  while (i >= 0)
235  {
236  aligned_query = query[i] + aligned_query;
237  aligned_reference = "_" + aligned_reference;
238  i--;
239  }
240  while (j >= 0)
241  {
242  aligned_query = "_" + aligned_query;
243  aligned_reference = reference[j] + aligned_reference;
244  j--;
245  }
246 
247  alignments["query"] = aligned_query;
248  alignments["reference"] = aligned_reference;
249 }
void viterbi_solution(std::string query, std::string reference, PENALTY_T &penalties, array< array< array< double, SOL_MAX_REFERENCE_LENGTH >, SOL_MAX_QUERY_LENGTH >, SOL_N_LAYERS > &score_mat, array< array< string, SOL_MAX_REFERENCE_LENGTH >, SOL_MAX_QUERY_LENGTH > &tb_mat, map< string, string > &alignments)
Viterbi itself is a global alignment, don&#39;t need to consider the case of making it local...
Definition: solution_viterbi.h:31
int base_to_num(char base)
Map a single base to a number. A: 0, C: 1, G: 2, T: 3, _: 4.
Contains helper functions to write the host side code of the kernel.