26 #define AL_END_H 0b000 // 0 stopping condition
27 #define AL_INS_H 0b001 // 1 Align Insertion
28 #define AL_MMI_H 0b010 // 2 Align Match/Mismatch
29 #define AL_DEL_H 0b011 // 3 Align Deletion
30 #define AL_NULL_H 0b100 // 4 Do not change coordinate
39 std::random_device rd;
42 std::mt19937 rng(rd());
45 std::uniform_int_distribution<int> dist(1, 100);
48 for (
int i = 0; i < length; i++)
50 seq += alphabet[dist(rng) % N];
58 std::random_device rd;
61 std::mt19937 rng(rd());
64 std::uniform_int_distribution<int> dist(1, 100);
67 for (
int i = 0; i < length; i++)
69 seq.push_back(T(dist(rng) % 10, dist(rng) % 10));
85 std::vector<std::array<int, 5>> profile;
86 for (
int i = 0; i < len; i++)
89 std::array<int, 5> col;
90 for (
int j = 0; j < 4; j++)
92 col[j] = rand() % remaining;
95 col[4] = WID - remaining;
96 profile.push_back(col);
116 std::vector<std::string>
readFasta(
const std::string &filePath);
118 template <
typename T,
size_t M,
size_t N>
121 for (
int i = 0; i < M; i++)
123 for (
int j = 0; j < N; j++)
125 dst[j][i] = src[i][j];
156 template <
typename T>
195 template <
typename T,
size_t MAX_QRY_LENGTH,
size_t MAX_REF_LENGTH>
197 int query_start_idx,
int reference_start_idx,
198 T (&tb_streams)[MAX_REF_LENGTH + MAX_QRY_LENGTH])
200 std::vector<string> alignments_query;
201 std::vector<string> alignments_reference;
203 int num_queries = querys.size();
204 int num_references = references.size();
206 for (
int i = 0; i < num_queries; i++)
208 alignments_query.push_back(
"");
210 for (
int i = 0; i < num_references; i++)
212 alignments_reference.push_back(
"");
214 std::cout <<
"Number of queries: " << num_queries << endl;
215 std::cout <<
"Number of references: " << num_references << endl;
220 std::vector<stack<char>> query_stack;
221 std::vector<stack<char>> reference_stack;
224 for (
int i = 0; i < querys.size(); i++)
227 for (
int j = 0; j < query_start_idx; j++)
229 q.push(querys[i][j]);
231 query_stack.push_back(q);
234 for (
int i = 0; i < references.size(); i++)
237 for (
int j = 0; j < reference_start_idx; j++)
239 r.push(references[i][j]);
241 reference_stack.push_back(r);
250 T *curr_ptr = &tb_streams[0];
259 for (
int i = 0; i < querys.size(); i++)
261 alignments_query[i].insert(0, 1, query_stack[i].top());
262 query_stack[i].pop();
264 for (
int i = 0; i < reference_stack.size(); i++)
266 alignments_reference[i].insert(0, 1, reference_stack[i].top());
267 reference_stack[i].pop();
272 for (
int i = 0; i < query_stack.size(); i++)
274 alignments_query[i].insert(0, 1,
'_');
276 for (
int i = 0; i < reference_stack.size(); i++)
278 alignments_reference[i].insert(0, 1, reference_stack[i].top());
279 reference_stack[i].pop();
284 for (
int i = 0; i < query_stack.size(); i++)
286 alignments_query[i].insert(0, 1, query_stack[i].top());
287 query_stack[i].pop();
289 for (
int i = 0; i < reference_stack.size(); i++)
291 alignments_reference[i].insert(0, 1,
'_');
300 printf(
"Alignment Output Iteartion End\n");
308 for (
int i = 0; i < query_stack.size(); i++)
310 while (!query_stack[i].empty())
312 alignments_query[i].insert(0, 1, query_stack[i].top());
313 query_stack[i].pop();
317 for (
int i = 0; i < reference_stack.size(); i++)
319 while (!reference_stack[i].empty())
321 alignments_reference[i].insert(0, 1, reference_stack[i].top());
322 reference_stack[i].pop();
327 for (
int i = 0; i < query_stack.size(); i++)
329 while (alignments_query[i].length() < alignments_reference[0].length())
332 alignments_query[i].insert(0, 1,
'_');
336 for (
int i = 0; i < reference_stack.size(); i++)
338 while (alignments_reference[i].length() < alignments_query[0].length())
341 alignments_reference[i].insert(0, 1,
'_');
346 std::vector<string> alignments;
347 for (
int i = 0; i < querys.size(); i++)
349 alignments.push_back(alignments_query[i]);
351 for (
int i = 0; i < references.size(); i++)
353 alignments.push_back(alignments_reference[i]);
375 template <
typename T,
size_t MAX_QRY_LENGTH,
size_t MAX_REF_LENGTH>
377 int query_start_idx,
int reference_start_idx,
378 T (&tb_streams)[MAX_REF_LENGTH + MAX_QRY_LENGTH])
383 string alignment_query =
"";
384 string alignment_reference =
"";
386 stack<char> query_stack;
387 stack<char> reference_stack;
390 for (
int i = 0; i < query_start_idx + 1; i++)
392 query_stack.push(query[i]);
394 for (
int i = 0; i < reference_start_idx + 1; i++)
396 reference_stack.push(reference[i]);
405 T *curr_ptr = &tb_streams[0];
413 alignment_query = alignment_query.insert(0, 1, query_stack.top());
414 alignment_reference = alignment_reference.insert(0, 1, reference_stack.top());
416 reference_stack.pop();
420 alignment_query = alignment_query.insert(0, 1,
'_');
421 alignment_reference = alignment_reference.insert(0, 1, reference_stack.top());
422 reference_stack.pop();
426 alignment_query = alignment_query.insert(0, 1, query_stack.top());
427 alignment_reference = alignment_reference.insert(0, 1,
'_');
436 printf(
"Alignment Output Iteartion End\n");
442 while (!query_stack.empty())
444 alignment_query = alignment_query.insert(0, 1, query_stack.top());
447 while (!reference_stack.empty())
449 alignment_reference = alignment_reference.insert(0, 1, reference_stack.top());
450 reference_stack.pop();
454 while (alignment_query.length() < alignment_reference.length())
456 alignment_query = alignment_query.insert(0, 1,
'_');
459 while (alignment_reference.length() < alignment_query.length())
461 alignment_reference = alignment_reference.insert(0, 1,
'_');
465 map<string, string> alignments;
466 alignments[
"query"] = alignment_query;
467 alignments[
"reference"] = alignment_reference;
471 template <
typename T,
size_t MAX_QRY_LENGTH,
size_t MAX_REF_LENGTH>
473 int query_start_idx,
int reference_start_idx,
474 T (&tb_streams)[MAX_REF_LENGTH + MAX_QRY_LENGTH])
476 string alignment_reference =
"";
477 string alignment_query =
"";
479 T *curr_ptr = &tb_streams[0];
480 if (query_start_idx == query.length() - 1)
482 char *qry_ptr = &query[query.length() - 1];
483 char *ref_ptr = &reference[reference.length() - 1];
485 for (
int i = 0; i < reference.size() - reference_start_idx; i++)
487 alignment_query = alignment_query.insert(0, 1,
'_');
488 alignment_reference = alignment_reference.insert(0, 1, *ref_ptr--);
498 if (*qry_ptr ==
'\0' ||
'\0' == *ref_ptr)
506 alignment_query = alignment_query.insert(0, 1, *qry_ptr--);
507 alignment_reference = alignment_reference.insert(0, 1, *ref_ptr--);
511 alignment_query = alignment_query.insert(0, 1,
'_');
512 alignment_reference = alignment_reference.insert(0, 1, *ref_ptr--);
516 alignment_query = alignment_query.insert(0, 1, *qry_ptr--);
517 alignment_reference = alignment_reference.insert(0, 1,
'_');
525 printf(
"Alignment Output Iteartion End\n");
537 while (*qry_ptr !=
'\0')
539 alignment_query = alignment_query.insert(0, 1, *qry_ptr--);
541 while (*qry_ptr !=
'\0'){
542 alignment_reference = alignment_reference.insert(0, 1, *ref_ptr--);
545 while (*ref_ptr !=
'\0'){
546 alignment_query = alignment_query.insert(0, 1,
'_');
547 alignment_reference = alignment_reference.insert(0, 1, *ref_ptr--);
551 else if (reference_start_idx == reference.length() - 1)
556 std::runtime_error(
"Alignment is not actually semiglobal, please check traceback start index");
558 cout <<
"Reconstructed" << endl;
560 map<string, string> alignments;
561 alignments[
"query"] = alignment_query;
562 alignments[
"reference"] = alignment_reference;
566 template <
typename T,
size_t MAX_QRY_LENGTH,
size_t MAX_REF_LENGTH>
568 int query_start_idx,
int reference_start_idx,
569 T (&tb_streams)[MAX_REF_LENGTH + MAX_QRY_LENGTH])
571 string alignment_reference =
"";
572 string alignment_query =
"";
574 T *curr_ptr = &tb_streams[0];
576 char *qry_ptr = &query[query.length() - 1];
577 char *ref_ptr = &reference[reference.length() - 1];
579 for (
int i = 0; i < query.size() - query_start_idx; i++)
581 alignment_query = alignment_query.insert(0, 1, *qry_ptr--);
582 alignment_reference = alignment_reference.insert(0, 1,
'_');
596 if (*qry_ptr ==
'\0' ||
'\0' == *ref_ptr)
604 alignment_query = alignment_query.insert(0, 1, *qry_ptr--);
605 alignment_reference = alignment_reference.insert(0, 1, *ref_ptr--);
609 alignment_query = alignment_query.insert(0, 1,
'_');
610 alignment_reference = alignment_reference.insert(0, 1, *ref_ptr--);
614 alignment_query = alignment_query.insert(0, 1, *qry_ptr--);
615 alignment_reference = alignment_reference.insert(0, 1,
'_');
623 printf(
"Alignment Output Iteartion End\n");
634 while (*ref_ptr !=
'\0'){
635 alignment_query = alignment_query.insert(0, 1,
'_');
636 alignment_reference = alignment_reference.insert(0, 1, *ref_ptr--);
639 cout <<
"Reconstructed" << endl;
641 map<string, string> alignments;
642 alignments[
"query"] = alignment_query;
643 alignments[
"reference"] = alignment_reference;
647 template <
typename T,
int LEN>
651 for (
int i = 0; i < LEN; i++)
665 template <
typename T,
int NB,
int MAX_QRY_LENGTH,
int MAX_REF_LENGTH>
667 int query_start_idx[NB],
int reference_start_idx[NB],
668 T (&tb_streams)[NB][MAX_REF_LENGTH + MAX_QRY_LENGTH])
671 array<map<string, string>, NB> alignments;
673 for (
int i = 0; i < NB; i++)
675 alignments[i] = HostUtils::Sequence::ReconstructTraceback<T, MAX_QRY_LENGTH, MAX_REF_LENGTH>(query[i], reference[i],
676 query_start_idx[i], reference_start_idx[i], tb_streams[i]);
682 template <
typename T,
int NB,
int MAX_QRY_LENGTH,
int MAX_REF_LENGTH>
684 int query_start_idx[NB],
int reference_start_idx[NB],
685 T (&tb_streams)[NB][MAX_REF_LENGTH + MAX_QRY_LENGTH])
689 array<map<string, string>, NB> alignments;
691 for (
int i = 0; i < NB; i++)
693 alignments[i] = HostUtils::Sequence::ReconstructTracebackSemiglobal<T, MAX_QRY_LENGTH, MAX_REF_LENGTH>(query[i], reference[i],
694 query_start_idx[i], reference_start_idx[i], tb_streams[i]);
701 template <
typename T,
int NB,
int MAX_QRY_LENGTH,
int MAX_REF_LENGTH>
703 int query_start_idx[NB],
int reference_start_idx[NB],
704 T (&tb_streams)[NB][MAX_REF_LENGTH + MAX_QRY_LENGTH])
708 array<map<string, string>, NB> alignments;
710 for (
int i = 0; i < NB; i++)
712 alignments[i] = HostUtils::Sequence::ReconstructTracebackOverlapSuffixPrefix<T, MAX_QRY_LENGTH, MAX_REF_LENGTH>(query[i], reference[i],
713 query_start_idx[i], reference_start_idx[i], tb_streams[i]);
#define AL_INS_H
Definition: host_utils.h:27
void SwitchDimension(T(&src)[M][N], T(&dst)[N][M])
Definition: host_utils.h:119
#define AL_MMI_H
Definition: host_utils.h:28
array< map< string, string >, NB > ReconstructTracebackOverlapSuffixPrefixBlocks(string query[NB], string reference[NB], int query_start_idx[NB], int reference_start_idx[NB], T(&tb_streams)[NB][MAX_REF_LENGTH+MAX_QRY_LENGTH])
Definition: host_utils.h:702
std::vector< std::array< int, 5 > > MultipleSequencesToProfileAlign(std::vector< string > seq, int len)
Given a array of sequences, convert them to the format of the input of profile alignment.
string NavigationToString(T nav[LEN])
Definition: host_utils.h:648
std::map< string, string > ReconstructTracebackOverlapSuffixPrefix(string query, string reference, int query_start_idx, int reference_start_idx, T(&tb_streams)[MAX_REF_LENGTH+MAX_QRY_LENGTH])
Definition: host_utils.h:567
std::map< string, string > ReconstructTraceback(string query, string reference, int query_start_idx, int reference_start_idx, T(&tb_streams)[MAX_REF_LENGTH+MAX_QRY_LENGTH])
Fucntion to reconstruct the alignment result from the an array of traceback navigations.
Definition: host_utils.h:376
std::vector< std::string > readFasta(const std::string &filePath)
#define AL_END_H
Definition: host_utils.h:26
array< map< string, string >, NB > ReconstructTracebackBlocks(string query[NB], string reference[NB], int query_start_idx[NB], int reference_start_idx[NB], T(&tb_streams)[NB][MAX_REF_LENGTH+MAX_QRY_LENGTH])
Function to reconstruct the alignment result from the array of the traceback navigations for all bloc...
Definition: host_utils.h:666
string Sequence(char alphabet[N], int length)
Definition: host_utils.h:37
std::vector< T > SequenceComplex(int length)
Definition: host_utils.h:56
int base_to_num(char base)
Map a single base to a number. A: 0, C: 1, G: 2, T: 3, _: 4.
#define AL_DEL_H
Definition: host_utils.h:29
#define AL_NULL_H
Definition: host_utils.h:30
std::map< string, string > ReconstructTracebackSemiglobal(string query, string reference, int query_start_idx, int reference_start_idx, T(&tb_streams)[MAX_REF_LENGTH+MAX_QRY_LENGTH])
Definition: host_utils.h:472
array< map< string, string >, NB > ReconstructTracebackSemiglobalBlocks(string query[NB], string reference[NB], int query_start_idx[NB], int reference_start_idx[NB], T(&tb_streams)[NB][MAX_REF_LENGTH+MAX_QRY_LENGTH])
Definition: host_utils.h:683
string nav_to_string(T nav)
Definition: host_utils.h:157
std::vector< string > ReconstructTracebackProfile(std::vector< string > querys, std::vector< string > references, int query_start_idx, int reference_start_idx, T(&tb_streams)[MAX_REF_LENGTH+MAX_QRY_LENGTH])
Reconstruct the traceback for profile alignmetnt from the array of traceback navigations.
Definition: host_utils.h:196
std::vector< std::array< int, 5 > > SequenceProfileAlignment(int len)
A function that generates a random sequence in the format of the input of profile alignment...
Definition: host_utils.h:83
char num_to_base(int num)
map< string, std::vector< string > > read_sequences_from_json(string file_path)
Read form a json file storing the sequences in the format of "specie_name": "dna string".