Comments (2)
Hi @decenwang ,
I guess you mean post the C++ code for the -run_demo
command line? This command is not running any other command line, it is 'hardcoded' through the C++ API of IGoR as an example for people wishing to use the more powerful C++ API rather than command lines.
You can find the run_demo C++ code pretty clearly isolated in the igor_src/main.cpp file :
if(run_demo){
/*Run this sample demo code
*
* Outline:
*
* Read TCRb genomic templates
*
* Align the sequences contained in the /demo/murugan_naive1_noncoding_demo_seqs.txt file to those templates
*
* Create a TCRb model, a simple error rate and the corresponding marginals
*
* Show reads and write functions for the model and marginals
*
* Infer a model from the sequences (perform 10 iterations of EM)
*
* Generate sequences from the obtained model
*
*/
//Path to the working folder
/* string path (argv[1]);
if (path[path.size()-1] != '/'){
path+="/";
}*/
clog<<"Reading genomic templates"<<endl;
vector<pair<string,string>> v_genomic = read_genomic_fasta( string(IGOR_DATA_DIR) + "/demo/genomicVs_with_primers.fasta");
vector<pair<string,string>> d_genomic = read_genomic_fasta( string(IGOR_DATA_DIR) + "/demo/genomicDs.fasta");
vector<pair<string,string>> j_genomic = read_genomic_fasta( string(IGOR_DATA_DIR) + "/demo/genomicJs_all_curated.fasta");
//Declare substitution matrix used for alignments(nuc44 here)
double nuc44_vect [] = { // A,C,G,T,R,Y,K,M,S,W,B,D,H,V,N
5,-14,-14,-14,-14,2,-14,2,2,-14,-14,1,1,1,0,
-14,5,-14,-14,-14,2,2,-14,-14,2,1,-14,1,1,0,
-14,-14,5,-14,2,-14,2,-14,2,-14,1,1,-14,1,0,
-14,-14,-14,5,2,-14,-14,2,-14,2,1,1,1,-14,0,
-14,-14,2,2,1.5,-14,-12,-12,-12,-12,1,1,-13,-13,0,
2,2,-14,-14,-14,1.5,-12,-12,-12,-12,-13,-13,1,1,0,
-14,2,2,-14,-12,-12,1.5,-14,-12,-12,1,-13,-13,1,0,
2,-14,-14,2,-12,-12,-14,1.5,-12,-12,-13,1,1,-13,0,
2,-14,2,-14,-12,-12,-12,-12,1.5,-14,-13,1,-13,1,0,
-14,2,-14,2,-12,-12,-12,-12,-14,1.5,1,-13,1,-13,0,
-14,1,1,1,1,-13,1,-13,-13,1,0.5,-12,-12,-12,0,
1,-14,1,1,1,-13,-13,1,1,-13,-12,0.5,-12,-12,0,
1,1,-14,1,-13,1,-13,1,-13,1,-12,-12,0.5,-12,0,
1,1,1,-14,-13,1,1,-13,1,-13,-12,-12,-12,0.5,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
Matrix<double> nuc44_sub_matrix(15,15,nuc44_vect);
//Instantiate aligner with substitution matrix declared above and gap penalty of 50
Aligner v_aligner = Aligner(nuc44_sub_matrix , 50 , V_gene);
v_aligner.set_genomic_sequences(v_genomic);
Aligner d_aligner = Aligner(nuc44_sub_matrix , 50 , D_gene);
d_aligner.set_genomic_sequences(d_genomic);
Aligner j_aligner (nuc44_sub_matrix , 50 , J_gene);
j_aligner.set_genomic_sequences(j_genomic);
clog<<"Reading sequences and aligning"<<endl;
typedef std::chrono::system_clock myclock;
myclock::time_point begin_time, end_time;
begin_time = myclock::now();
vector<pair<const int, const string>> indexed_seqlist = read_txt( string(IGOR_DATA_DIR) + "/demo/murugan_naive1_noncoding_demo_seqs.txt" ); //Could also read a FASTA file <code>read_fasta()<\code> or indexed sequences <code>read_indexed_seq_csv()<\code>
cl_path+="igor_demo/";
system(&("mkdir " + cl_path )[0]);
v_aligner.align_seqs( string(cl_path + "/murugan_naive1_noncoding_demo_seqs") + string("_alignments_V.csv"),indexed_seqlist,50,true,INT16_MIN,-155);
//v_aligner.write_alignments_seq_csv(path + string("alignments_V.csv") , v_alignments);
d_aligner.align_seqs(string(cl_path + "/murugan_naive1_noncoding_demo_seqs") + string("_alignments_D.csv"),indexed_seqlist,0,false);
//d_aligner.write_alignments_seq_csv(path + string("alignments_D.csv") , d_alignments);
j_aligner.align_seqs(string(cl_path + "/murugan_naive1_noncoding_demo_seqs") + string("_alignments_J.csv"),indexed_seqlist,10,true,42,48);
end_time= myclock::now();
chrono::duration<double> elapsed = end_time - begin_time;
clog<<"Alignments procedure lasted: "<<elapsed.count()<<" seconds"<<endl;
clog<<"for "<<indexed_seqlist.size()<<" TCRb sequences of 60bp(from murugan and al), against ";
clog<<v_genomic.size()<<" Vs,"<<d_genomic.size()<<" Ds, and "<<j_genomic.size()<<" Js full sequences"<<endl;
//unordered_map<int,forward_list<Alignment_data>> j_alignments = j_aligner.align_seqs(indexed_seqlist,10,true,42,48);
//j_aligner.write_alignments_seq_csv(path + string("alignments_J.csv") , j_alignments);
write_indexed_seq_csv(string(cl_path + "/murugan_naive1_noncoding_demo_seqs") + string("_indexed_seq.csv") , indexed_seqlist);
clog<<"Construct the model"<<endl;
//Construct a TCRb model
Gene_choice v_choice(V_gene,v_genomic);
v_choice.set_nickname("v_choice");
v_choice.set_priority(7);
Gene_choice d_choice(D_gene,d_genomic);
d_choice.set_nickname("d_gene");
d_choice.set_priority(6);
Gene_choice j_choice(J_gene,j_genomic);
j_choice.set_nickname("j_choice");
j_choice.set_priority(7);
Deletion v_3_del(V_gene,Three_prime,make_pair(-4,16));//16
v_3_del.set_nickname("v_3_del");
v_3_del.set_priority(5);
Deletion d_5_del(D_gene,Five_prime,make_pair(-4,16));
d_5_del.set_nickname("d_5_del");
d_5_del.set_priority(5);
Deletion d_3_del(D_gene,Three_prime,make_pair(-4,16));
d_3_del.set_nickname("d_3_del");
d_3_del.set_priority(5);
Deletion j_5_del(J_gene,Five_prime,make_pair(-4,18));
j_5_del.set_nickname("j_5_del");
j_5_del.set_priority(5);
Insertion vd_ins(VD_genes,make_pair(0,30));
vd_ins.set_nickname("vd_ins");
vd_ins.set_priority(4);
Insertion dj_ins(DJ_genes,make_pair(0,30));
dj_ins.set_nickname("dj_ins");
dj_ins.set_priority(2);
Dinucl_markov markov_model_vd(VD_genes);
markov_model_vd.set_nickname("vd_dinucl");
markov_model_vd.set_priority(3);
Dinucl_markov markov_model_dj(DJ_genes);
markov_model_dj.set_nickname("dj_dinucl");
markov_model_dj.set_priority(1);
Model_Parms parms;
//Add nodes to the graph
parms.add_event(&v_choice);
parms.add_event(&d_choice);
parms.add_event(&j_choice);
parms.add_event(&v_3_del);
parms.add_event(&d_3_del);
parms.add_event(&d_5_del);
parms.add_event(&j_5_del);
parms.add_event(&vd_ins);
parms.add_event(&dj_ins);
parms.add_event(&markov_model_vd);
parms.add_event(&markov_model_dj);
//Add correlations
parms.add_edge(&v_choice,&v_3_del);
parms.add_edge(&j_choice,&j_5_del);
parms.add_edge(&d_choice,&d_3_del);
parms.add_edge(&d_choice,&d_5_del);
parms.add_edge(&d_5_del,&d_3_del);
parms.add_edge(&j_choice,&d_choice);
//Create the corresponding marginals
Model_marginals model_marginals(parms);
model_marginals.uniform_initialize(parms); //Can also start with a random prior using random_initialize()
//Instantiate an error rate
Single_error_rate error_rate(0.001);
parms.set_error_ratep(&error_rate);
clog<<"Write and read back the model"<<endl;
//Write the model_parms into a file
parms.write_model_parms(string(cl_path + "/demo_write_model_parms.txt"));
//Write the marginals into a file
model_marginals.write2txt(string(cl_path + "/demo_write_model_marginals.txt"),parms);
//Read a model and marginal pair
Model_Parms read_model_parms;
read_model_parms.read_model_parms(string(cl_path + "/demo_write_model_parms.txt"));
Model_marginals read_model_marginals(read_model_parms);
read_model_marginals.txt2marginals(string(cl_path + "/demo_write_model_marginals.txt"),read_model_parms);
//Instantiate a Counter
map<size_t,shared_ptr<Counter>> counters_list;
//Collect gene coverage and errors
shared_ptr<Counter> coverage_counter_ptr(new Coverage_err_counter(cl_path + "/run_demo/",VJ_genes,1,false,false));
counters_list.emplace(0,coverage_counter_ptr);
//Collect 10 best scenarios per sequence during the last iteration
shared_ptr<Counter>best_sc_ptr(new Best_scenarios_counter(10 , cl_path + "/run_demo/" ,true ));
counters_list.emplace(1,best_sc_ptr);
//Collect sequence generation probability during last iteration
shared_ptr<Counter> pgen_counter_ptr(new Pgen_counter (cl_path + "/run_demo/"));
counters_list.emplace(2,pgen_counter_ptr);
shared_ptr<Counter> errors_counter(new Errors_counter (10,string(cl_path + "/run_demo/")));
counters_list.emplace(3,errors_counter);
//Instantiate the high level GenModel class
//This class allows to make most useful high level operations(model inference/Pgen computation , sequence generation)
GenModel gen_model(read_model_parms,read_model_marginals,counters_list);
//Inferring a model
//Read alignments
//vector<pair<const int, const string>> indexed_seqlist = read_indexed_csv(path+ string(argv[2]) + string("indexed_seq.csv"));
unordered_map<int,pair<string,unordered_map<Gene_class,vector<Alignment_data>>>> sorted_alignments = read_alignments_seq_csv_score_range(string(cl_path + "/murugan_naive1_noncoding_demo_seqs") + string("_alignments_V.csv"), V_gene , 55 , false , indexed_seqlist );//40//35
sorted_alignments = read_alignments_seq_csv_score_range(string(cl_path + "/murugan_naive1_noncoding_demo_seqs") + string("_alignments_D.csv"), D_gene , 35 , false , indexed_seqlist , sorted_alignments);//30//15
sorted_alignments = read_alignments_seq_csv_score_range(string(cl_path + "/murugan_naive1_noncoding_demo_seqs") + string("_alignments_J.csv"), J_gene , 10 , false , indexed_seqlist , sorted_alignments);//30//20
vector<tuple<int,string,unordered_map<Gene_class,vector<Alignment_data>>>> sorted_alignments_vec = map2vect(sorted_alignments);
//Infer the model
clog<<"Infer model"<<endl;
begin_time = myclock::now();
system(&("mkdir " + cl_path + "run_demo")[0]);
gen_model.infer_model(sorted_alignments_vec , 4 , string(cl_path + "/run_demo/") , true ,1e-35,0.0001);
end_time= myclock::now();
elapsed = end_time - begin_time;
clog<<"Model inference procedure lasted: "<<elapsed.count()<<" seconds"<<endl;
//Generate sequences
clog<<"Generate sequences"<<endl;
auto generated_seq = gen_model.generate_sequences(100,false);//Without errors
gen_model.write_seq_real2txt(string(cl_path + "/generated_seqs_indexed_demo.csv"), string(cl_path + "/generated_seqs_realizations_demo.csv") , generated_seq);//Member function will be changed
}
from igor.
Thanks a lot, Quentin, I mean the igor command line actually, not the C++ command. Now I think it is ok. I am trying to learn from your and others' comments.
from igor.
Related Issues (20)
- File not found when IGoR is installed locally HOT 1
- Errors: python to parse the output results? HOT 2
- Model edge gene choice relations differ HOT 1
- Chain IGoR commands HOT 1
- Missing unknown subargument error for -output HOT 1
- Using the --coverage output subarg HOT 1
- using the --coverage error
- make new database HOT 2
- make check failures but make install worked HOT 1
- Segmentation Fault in -run_demo HOT 5
- Limiting IGoR's CPU usage HOT 1
- Install error HOT 2
- How to get everyone sequence clonotype ?
- the result file foo_indexed_CDR3s.csv HOT 1
- segfault with gcc > 7 HOT 2
- IGoR over-estimating TCR V deletions?
- installation from source
- All 0 output in infer and question about L_thresh
- Remove superfluous IMGT information in TCR beta model parms file
- Undefined reference error during compling
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from igor.