#include #include #include #include #include using namespace std; // We use the distance function to calculate the most likely transition between a parent and // a daughter sequence. The stgRNA sequence from amongst the parent sequences that has the least // distance to the daughter will be given a score of 1. #include "linked_stgRNA.h" //The function below is used to check if the stgRNA can participate in any self-targeted action bool check_self(string stgRNA, int len) { if (!stgRNA.compare(len-7, 7, "MMMMMMM")) return 1; else return 0; } //The function below returns the distance between two stgRNA sequences depending on their identity // using the following score : - D->M = 1, I->M = 1, I->D = 2, X->M or I or D = 1 int pair_distance(string stgRNA1, string stgRNA2) { int distance = 0; for (int idx = 0; idxget_child_pointer(); while(iter_linked_stgRNA){ num_reads = num_reads + iter_linked_stgRNA->get_count(); iter_linked_stgRNA = iter_linked_stgRNA->get_child_pointer(); } return num_reads; } //The function below returns the index of the specific stgRNA kmer by looking up the index_linked_stgRNA int search_index_linked_stgRNA(linked_stgRNA* index_linked_stgRNA, const string query_kmer){ linked_stgRNA* iter_linked_stgRNA; iter_linked_stgRNA = index_linked_stgRNA->get_child_pointer(); while(!(query_kmer.compare(iter_linked_stgRNA->get_kmer())==0)) { iter_linked_stgRNA = iter_linked_stgRNA->get_child_pointer(); } return iter_linked_stgRNA->get_count(); } int main(int argc, char* argv[]) { // Check the number of parameters if (argc !=5) { // Tell the user how to run the program std::cerr << "Usage: " << argv[0] << " INPUT_BARCODE_THREADS_Kmers.txt " << "stgRNA Index file " << "OUTPUT_TRANSITION_PROB_MATRIX " << "OUTPUT_TRANSITION_PROB_COUNTS "<< std::endl; return 1; } ifstream barcode_threads; ifstream stgRNA_indices; ofstream transition_prob; ofstream transition_prob_values; ofstream transition_prob_counts; string iptline; int tpmat_size = 0; double row_count = 0; barcode_threads.open(argv[1]); stgRNA_indices.open(argv[2]); transition_prob.open(argv[3]); transition_prob_counts.open(argv[4]); linked_stgRNA *curr_linked_stgRNA; linked_stgRNA *prev_linked_stgRNA; linked_stgRNA *iter_linked_stgRNA1; linked_stgRNA *iter_linked_stgRNA2; string kmer; string stgRNA1; string stgRNA2; int kmer_count; int curr_timepoint = 0; int prev_timepoint = 0; int read_count_threshold = 100; int prev_tp_read_counts = 0; int space_found = 0; int leftbrace_found = 0; int rightbrace_found = 0; int outer_sum = 0; int inner_sum = 0; int outer_idx; int inner_idx; int min_distance; int calc_distance; int min_inner_idx=0; int max_size = 50; // Creating a linked list containing all the kmers with indices and calculating the size of the index file linked_stgRNA* index_linked_stgRNA = new linked_stgRNA; iter_linked_stgRNA1 = index_linked_stgRNA; while(getline(stgRNA_indices, iptline)){ space_found = iptline.find(" "); kmer = iptline.substr(0, space_found); kmer_count = stoi(iptline.substr(space_found+1, string::npos)); iter_linked_stgRNA1 = iter_linked_stgRNA1->set_child_pointer(kmer, kmer_count); tpmat_size = tpmat_size+1; } stgRNA_indices.close(); // Initializing Transition Probability Matrix and its count Matrix here vector > tpmat; vector > tpmat_count; vector > tpmat_values; tpmat.resize(tpmat_size); tpmat_count.resize(tpmat_size); tpmat_values.resize(tpmat_size); for (int i = 0; i < tpmat_size; ++i){ tpmat[i].resize(tpmat_size); tpmat_count[i].resize(tpmat_size); tpmat_values[i].resize(tpmat_size); } for (int i = 0; iget_child_pointer(); if(iter_linked_stgRNA1){ outer_idx = search_index_linked_stgRNA(index_linked_stgRNA, iter_linked_stgRNA1->get_kmer()); min_distance = max_size; iter_linked_stgRNA2 = prev_linked_stgRNA; if(prev_tp_read_counts>read_count_threshold){ while(iter_linked_stgRNA2){ iter_linked_stgRNA2 = iter_linked_stgRNA2->get_child_pointer(); if(iter_linked_stgRNA2){ inner_idx = search_index_linked_stgRNA(index_linked_stgRNA, iter_linked_stgRNA2->get_kmer()); if (iter_linked_stgRNA2->is_self()){ calc_distance = pair_distance(iter_linked_stgRNA1->get_kmer(), iter_linked_stgRNA2->get_kmer()); if(calc_distanceset_child_pointer(kmer, kmer_count); iter_linked_stgRNA1 = iter_linked_stgRNA1->get_child_pointer(); } else if (iptline.compare("")==0){ //For the final Timepoint from previous barcode; // We are computing the presence of identities for the combinations of stgRNAs if ((curr_linked_stgRNA!=NULL) && (prev_linked_stgRNA!=NULL) && (curr_timepoint == (prev_timepoint+1))){ iter_linked_stgRNA1 = curr_linked_stgRNA; iter_linked_stgRNA2 = prev_linked_stgRNA; prev_tp_read_counts = num_read_counts(prev_linked_stgRNA); while(iter_linked_stgRNA1) { iter_linked_stgRNA1 = iter_linked_stgRNA1->get_child_pointer(); if(iter_linked_stgRNA1){ outer_idx = search_index_linked_stgRNA(index_linked_stgRNA, iter_linked_stgRNA1->get_kmer()); min_distance = max_size; iter_linked_stgRNA2 = prev_linked_stgRNA; if(prev_tp_read_counts>read_count_threshold){ while(iter_linked_stgRNA2){ iter_linked_stgRNA2 = iter_linked_stgRNA2->get_child_pointer(); if(iter_linked_stgRNA2){ inner_idx = search_index_linked_stgRNA(index_linked_stgRNA, iter_linked_stgRNA2->get_kmer()); if (iter_linked_stgRNA2->is_self()){ calc_distance = pair_distance(iter_linked_stgRNA1->get_kmer(), iter_linked_stgRNA2->get_kmer()); if(calc_distance