/*
 * =====================================================================================
 *
 *       Filename:  GenResult.h
 *
 *    Description:  This is the header file for class GenResult
 *
 *        Version:  1.0
 *        Created:  04/17/2009 11:03:59 AM
 *       Revision:  none
 *       Compiler:  gcc
 *
 *         Author:  Jianxing Feng (feeldead), feeldead@gmail.com
 *        Company:  THU
 *
 * =====================================================================================
 */

#ifndef GenResult_H
#define GenResult_H 

#include <string>
#include <iostream>

#include "ToolBase.h"
#include "InstanceWriter.h"
#include "PvalueCalculator.h"
#include "InstanceReader.h"
#include "RandomExpReadAssignerIM.h"
#include "IsoInferPE.h"
#include "ExpressionCalculator.h"
#include "ResultSummary.h"


using namespace std;


/*
 * =====================================================================================
 *        Class:  GenResult
 *  Description:  This class generates all the results needed by the paper
 * =====================================================================================
 */
class GenResult : public ToolBase
{
	public:
		/* ====================  LIFECYCLE     ======================================= */
		GenResult(){};                             /* constructor */
		virtual ~GenResult(){};                             /* destructor */

		virtual
		void 
		Help()
		{
			cout << endl;
			cout << "      ============================================================================ " << endl;
			cout << "          GenResult  " << endl;
			cout << "      ---------------------------------------------------------------------------- " << endl;
			cout << endl;
			cout << "      This sub-program generates all the results needed by the paper" << endl;
			cout <<                                                   endl;
			cout <<                                                   endl;
			cout << "      USAGE:             " << endl;
			cout <<                                endl;
			cout << " 		Jobs :                                      " << endl;
			cout <<                                                          endl;
			cout << "          -existence : On each gene, when (at most) a given number (-hide) isoforms are hided randomly," << endl;
			cout << "              calculate the sensitivity and specificity of detecting whether there are hided isoforms." << endl;
			cout <<                                                      endl;
			cout << "          -predict   : Based on short reads, predict missed / new isoforms." << endl;
			cout <<                                                      endl;
			cout << "          -cal_exp   : Based on short reads, calculate the expression levels of isoforms. -max_new," << endl;
			cout << "              -rand, -hide, -reads would be invalidated on this job." << endl;
			cout <<                                                      endl;
			cout << "          -uniq_exp   : Calculate the expression levels of isoforms according to the unique exons or " << endl;
			cout << "              junctions of each isoform. -ins is needed." << endl;
			cout <<                                                      endl;
			cout << "          -gen_rand  <a number>: Based on given instances, generate random expression levels and reads." << endl;
			cout << "              -reads should be specified. The parameter is the type of random expression levels :" << endl;
			cout << "              1. pow(10, r), where r is a random variable following standard normal distribution. " << endl;
		 	cout << "              2. pow(2, r), where r .... " << endl;
		 	cout << "              3. r in [0, 1], uniformly distributed" << endl;
			cout <<                                                      endl;
			cout << "          -pe : Test paired-end utilities with all the partial combination generated from the" << endl;
			cout << "                input instances. -bpe and -bse will be effective." << endl;
			cout <<                                                      endl;
			cout <<                                                      endl;
			cout << " 		Important Parameters :                      " << endl;
			cout <<                                                          endl;
			cout << "          -h : This help" << endl; 
			cout <<                                                          endl;
			cout << "          -ins <file> : A file contains problem instances" << endl; 
			cout <<                                                          endl;
			cout << "          -o <file> : A file for output" << endl;
			cout <<                                                          endl;
			cout << "          -reads : How many reads is going to be sampled. Default 10000000" << endl;
			cout <<                                                          endl;
			cout << "          -conf_level <a number in [0,1]>: Set the confidence level. Default 0.05" << endl;
			cout <<                                                          endl;
			cout << "          -noise_level <a number in [0,1]>: Set the noise level. Default 0.03" << endl;
			cout <<                                                          endl;
			cout << "          -read_len  <a number> : The length of every read. Default 33" << endl;
			cout <<                                                          endl;
			cout << "          -cross_strength <a number> : When ext_junc_ref, this parameters speficifies how many " << endl;
			cout << "              characters on each side should be aligned. Default 5." << endl;
			cout <<                                                          endl;
			cout << "          -max_new <a number> : This parameters determine at most how many new isoforms to be searched" << endl;
			cout << "              for a gene. Default 5."                                << endl;
			cout <<                                                          endl;
			cout << "          -hide <a number> : Hide a given number of isoforms from IsoInfer. A large enough value for" << endl;
			cout << "              this parameter means to hide all the isoforms. This parameter influences -predict. Default 0." << endl;
			cout <<                                                          endl;
			cout << "          -rand <T/F> : Whether to use randomly generated expression level and reads. If this " << endl;
			cout << "              is F then the expression levels and reads in the input instance will be used and " << endl;
			cout << "              -reads will be invalidate. This parameter will only influence -predict and -existence Default T. " << endl;
			cout <<                                                          endl;
			cout << "          -bpe <T/F> : Enable paired-end or not. Default T." << endl;
			cout <<                                                          endl;
			cout << "          -bse <T/F> : Enable start-end or not. Default T." << endl;
			cout <<                                                          endl;
			cout << "          -bs2 <T/F> : Enable the step II of the algorithm. Default T." << endl;
			cout <<                                                          endl;
			cout << "          -minexp <a number> : The minimum expression level. Default 10." << endl;
			cout <<                                                          endl;
			cout << "          -mindup <a number> : The minimum effective duplication of part comb. Default 1." << endl;
			cout <<                                                          endl;
			cout << "          -miniso <a number> : Genes with at least <a number> isoforms will be considered. Default 8." << endl;
			cout <<                                                          endl;
			cout << "          -isoscale <a number> : Genes with at least <a number>-1 isoforms will be considered as having " << endl;
			cout << "              only <a number> isoforms in the result statistics. Default 9." << endl;
			cout <<                                                          endl;
			cout << "          -ps <a number> : Partition size. Default 7." << endl;
			cout <<                                                          endl;
			cout << "          -oformat <a number> : Output format" << endl;
			cout << "              0 : Full statistics. Default." << endl;
			cout << "              1 : Predicted transcripts." << endl;
			cout << "              2 : Both." << endl;
			cout <<                                                          endl;
			cout << endl;

			exit(0);
		}

		virtual 
		int 
		Main(int argc, char* argv[], int startArg)
		{
			string output_file;
			string ins_file;

			int reads = 0;
			double conf_level = 0.05;
			double noise_level = 0.03;
			int read_len = 33;
			int cross_strength = 5;
			int max_new = 5;
			int hide = 0;
			bool b_rand = true;
			double min_exp = 10;
			int min_dup = 1;
			int min_iso_cnt = 8;
			int iso_cnt_scale = 9;
			int part_size = 7;
			int rand_exp_type = 1;
			int output_format = 0;

			bool b_existence = false;
			bool b_pred = false;
			bool b_help = true;
			bool b_cal_exp = false;
			bool b_uniq_exp = false;
			bool b_gen_rand = false;
			bool b_pe = false;
			bool b_stepII = true;
			bool b_pe_enable = true;
			bool b_se_enable = true;

			for (int i = startArg; i < argc; i++)
			{
				b_help = false;
				if (strcmp(argv[i], "-h") == 0)
				{
					Help();
					return 0;
				}

				else if (strcmp(argv[i], "-ins") == 0)
					ins_file = argv[++i];
				else if (strcmp(argv[i], "-o") == 0)
					output_file = argv[++i];
				else if (strcmp(argv[i], "-reads") == 0)
					reads = atoi(argv[++i]);
				else if (strcmp(argv[i], "-conf_level") == 0)
					conf_level = atof(argv[++i]);
				else if (strcmp(argv[i], "-noise_level") == 0)
					noise_level = atof(argv[++i]);
				else if (strcmp(argv[i], "-read_len") == 0)
					read_len = atoi(argv[++i]);
				else if (strcmp(argv[i], "-cross_strength") == 0)
					cross_strength = atoi(argv[++i]);
				else if (strcmp(argv[i], "-max_new") == 0)
					max_new = atoi(argv[++i]);
				else if (strcmp(argv[i], "-hide") == 0)
					hide = atoi(argv[++i]);
				else if (strcmp(argv[i], "-rand") == 0)
					b_rand = (argv[++i][0] == 'T');
				else if (strcmp(argv[i], "-bpe") == 0)
					b_pe_enable = (argv[++i][0] == 'T');
				else if (strcmp(argv[i], "-bse") == 0)
					b_se_enable = (argv[++i][0] == 'T');
				else if (strcmp(argv[i], "-bs2") == 0)
					b_stepII = (argv[++i][0] == 'T');
				else if (strcmp(argv[i], "-minexp") == 0)
					min_exp = atof(argv[++i]);
				else if (strcmp(argv[i], "-mindup") == 0)
					min_dup = atoi(argv[++i]);
				else if (strcmp(argv[i], "-miniso") == 0)
					min_iso_cnt = atoi(argv[++i]);
				else if (strcmp(argv[i], "-isoscale") == 0)
					iso_cnt_scale = atoi(argv[++i]);
				else if (strcmp(argv[i], "-ps") == 0)
					part_size = atoi(argv[++i]);
				else if (strcmp(argv[i], "-oformat") == 0)
					output_format = atoi(argv[++i]);
				else if (strcmp(argv[i], "-predict") == 0)
					b_pred= true;
				else if (strcmp(argv[i], "-existence") == 0)
					b_existence = true;
				else if (strcmp(argv[i], "-cal_exp") == 0)
					b_cal_exp = true;
				else if (strcmp(argv[i], "-uniq_exp") == 0)
					b_uniq_exp = true;
				else if (strcmp(argv[i], "-gen_rand") == 0)
				{
					b_gen_rand= true;
					rand_exp_type = atoi(argv[++i]);
				}
				else if (strcmp(argv[i], "-pe") == 0)
					b_pe = true;
				else
				{
					cerr << "Wrong parameter " << argv[i] << endl;
					Help();
				}
			}

			if (ins_file == "")
			{
				ArgMissing("-ins_file");
				Help();
			}
			if (!b_pred && !b_existence && !b_cal_exp && !b_uniq_exp && !b_gen_rand && !b_pe)
			{
				ArgMissing("-predict or -existence or -cal_exp or -uniq_exp or -gen_rand or -pe");
				Help();
			}
			if (rand_exp_type != 1 && rand_exp_type !=2 && rand_exp_type != 3)
			{
				ArgMissing("-The parameter for -gen_rand is out of range");
				Help();
			}

			// Prepare output
			ostream* p_output;
			if (output_file != "")
			{
				p_output = new ofstream;

				((ofstream*)p_output)->open(output_file.data(), ios::out);
				if (!((ofstream*)p_output)->is_open())
				{
					cerr << "File " << output_file.data() << " can not be opened" << endl;
					return false;
				}
			}
			else
				p_output = &cout;

			if (b_existence)
			{
				LPsolver lp_solver;

				PvalueCalculator pvalue_cal(&lp_solver, p_output);
				pvalue_cal.SetConfidencelevel(conf_level);
				pvalue_cal.SetHideCnt(hide);

				RandomExpReadAssignerIM semi_rand_gen(p_output);
				semi_rand_gen.SetSEReadCnt(reads);
				semi_rand_gen.AddHandler(&pvalue_cal);

				InstanceReader reader(ins_file);
				reader.SetHandler(&pvalue_cal);

				if (b_rand)
				{
					if (0 == reads) 
					{
						cerr << "Please set -reads parameter" << endl;
						Help();
					}
					reader.SetHandler(&semi_rand_gen);
				}

				reader.Initialize();
				reader.Generate();
				reader.CleanUp();
			}
			else if (b_pred)
			{
				InstanceWriter writer;
				LPsolver lp_solver;

				/*
				IsoInfer iso_infer(&lp_solver, p_output);
				iso_infer.SetMaxNewCnt(max_new);
				iso_infer.SetConfidencelevel(conf_level);
				iso_infer.SetPartitionSize(part_size);
				iso_infer.AddHandler(&writer);

				int scale_cnt = 10;
				PredictionStat pred_stat(&iso_infer, p_output);
				pred_stat.SetParameters(hide, scale_cnt);

				RandomExpReadAssignerIM semi_rand_gen(p_output);
				semi_rand_gen.SetSEReadCnt(reads);
				semi_rand_gen.AddHandler(&pred_stat);

				InstanceReader reader(ins_file);
				reader.SetHandler(&pred_stat);

				if (b_rand)
				{
					if (0 == reads) 
					{
						cerr << "Please set -reads parameter" << endl;
						Help();
					}
					reader.SetHandler(&semi_rand_gen);
				}

				reader.Initialize();
				reader.Generate();
				reader.CleanUp();
				*/
			}
			else if (b_cal_exp)
			{
				InstanceWriter writer(p_output);
				writer.SetConfidencelevel(conf_level);
				writer.SetWriteFormat(InstanceWriter::WRITE_EXPRPKM);

				LPsolver lp_solver;
				ExpressionCalculator exp_calculator(&lp_solver, p_output);
				exp_calculator.SetMethod(ExpressionCalculator::EXP_METHOD_QUAD);
				exp_calculator.AddHandler(&writer);

				InstanceReader reader(ins_file);
				reader.SetHandler(&exp_calculator);
				reader.Initialize();
				reader.Generate();
				reader.CleanUp();
			}
			else if (b_uniq_exp)
			{
				InstanceWriter writer(p_output);
				writer.SetConfidencelevel(conf_level);
				writer.SetWriteFormat(InstanceWriter::WRITE_EXPRPKM);

				LPsolver lp_solver;
				ExpressionCalculator exp_calculator(&lp_solver, p_output);
				exp_calculator.SetMethod(ExpressionCalculator::EXP_METHOD_UNIQ);
				exp_calculator.AddHandler(&writer);

				InstanceReader reader(ins_file);
				reader.SetHandler(&exp_calculator);
				reader.Initialize();
				reader.Generate();
				reader.CleanUp();
			}
			else if (b_gen_rand)
			{
				InstanceWriter writer(p_output);

				RandomExpReadAssignerIM semi_rand_gen(p_output);
				semi_rand_gen.SetRandExpType(rand_exp_type);
				semi_rand_gen.SetSEReadCnt(reads);
				semi_rand_gen.AddHandler(&writer);

				InstanceReader reader(ins_file);
				reader.SetHandler(&semi_rand_gen);
				reader.Initialize();
				reader.Generate();
				reader.CleanUp();
			}
			else if (b_pe)
			{
				LPsolver lp_solver;

				IsoInferPE iso_infer(&lp_solver, p_output);
				iso_infer.SetConfidencelevel(conf_level);
				iso_infer.EnableSE(b_se_enable);
				iso_infer.EnablePE(b_pe_enable);
				iso_infer.EnableStepII(b_stepII);
				iso_infer.SetMinExpLevel(min_exp);
				iso_infer.SetMinEffectivePartCombDup(min_dup);
				iso_infer.SetPartitionSize(part_size);

				ResultSummary summary(&iso_infer, p_output);
				summary.SetMinIsoCnt(min_iso_cnt);
				summary.SetIsoCntScale(iso_cnt_scale);
				summary.SetOutputFormat(output_format);

				InstanceReader reader(ins_file);
				reader.SetHandler(&summary);

				reader.Initialize();
				reader.Generate();
				reader.CleanUp();
			}

			if (output_file != "")
			{
				((ofstream*)p_output)->close();
				delete p_output;
			}
			return 0;
		}

	protected:

	private:

}; /* -----  end of class GenResult  ----- */


#endif
