/*
 * =====================================================================================
 *
 *       Filename:  MishMash.h
 *
 *    Description:  This is the header file for class MishMash
 *
 *        Version:  1.0
 *        Created:  04/20/2009 04:46:21 PM
 *       Revision:  none
 *       Compiler:  gcc
 *
 *         Author:  Jianxing Feng (feeldead), feeldead@gmail.com
 *        Company:  THU
 *
 * =====================================================================================
 */

#ifndef MishMash_H
#define MishMash_H 

#include <string>
#include <iostream>
#include <cstdlib>
#include <algorithm>
#include <string.h>

#include "ToolBase.h"
#include "InstanceWriter.h"
#include "PvalueCalculator.h"
#include "InstanceReader.h"
#include "RandomExpReadAssignerIM.h"
#include "InstanceStatistics.h"
#include "Exon.h"


using namespace std;


/*
 * =====================================================================================
 *        Class:  MishMash
 *  Description:  This class do various other works
 * =====================================================================================
 */
class MishMash : public ToolBase
{
	public:
		/* ====================  LIFECYCLE     ======================================= */
		MishMash(){};                             /* constructor */
		virtual ~MishMash(){};                             /* destructor */

		virtual
		void 
		Help()
		{
			cout << endl;
			cout << "      ============================================================================ " << endl;
			cout << "          MishMash  " << endl;
			cout << "      ---------------------------------------------------------------------------- " << endl;
			cout << endl;
			cout << "      This sub-program does all the remained work" << endl;
			cout <<                                                   endl;
			cout <<                                                   endl;
			cout << "      USAGE:             " << endl;
			cout <<                                endl;
			cout << "--------Jobs : -------------------------------------------------" << endl;
			cout <<                                                          endl;
			cout << "          -ins_stat : Do statistics on the instances" << endl;
			cout <<                                                      endl;
			cout << "          -comp_exp <hui> <my> <total read cnt> : Compare my expression values and hui's. Followed by" << endl;
			cout << "              two file, hui's output and my exp and then the total number of reads." << endl;
			cout <<                                                      endl;
			cout << "          -rel_diff <truth file> <esti file> <true scale> : Compare the estimated value to the true " << endl;
			cout << "              value. Calculate the relative difference. The first column of each file will be used." << endl;
			cout << "              the value in the first file will be scaled by 'true scale'." << endl;
			cout <<                                                      endl;
			cout << "          -w_exp   : Write the expression levels of instances to a file. -ins is needed." << endl;
			cout <<                                                      endl;
			cout << "          -gen_tss_pas : Extract boundaries, gene range, TSSs and PASs from instances" << endl;
			cout <<                                                      endl;
			cout << "          -comp_iso <T/F, pred_iso_file, ben_iso_file> : Compare the prediced isoforms with " << endl;
			cout << "               a benchmark, strand specifically (T) or not (F)." << endl;
			cout << "               The format for upper two files is : " << endl;
			cout << "               name chromosome  strand  range_start range_end exon_start  exon_end" << endl;
			cout << "               exon_start or exon_end are seperated by commas." << endl;
			cout <<                                                          endl;
			cout <<                                                      endl;
			cout << "          -gen_exon : Output all the exons and the observed reads" << endl;
			cout <<                                                      endl;
			cout << "          -gen_iso : Output all known isoforms" << endl;
			cout <<                                                      endl;
			cout << "          -comp_interval <file1, file2>: Given two files of intervals, calculate the best match interval " << endl;
			cout << "              in the second file for each one in the first file. The format is:" << endl;
			cout << "              chr  start-pos  end-pos " << endl;
			cout <<                                                      endl;
			cout << "          -spliced_iso <T/F, bound_file, iso_file>: Given a set of boundaries and a set of isoforms. " << endl;
			cout << "              Find out isoforms, whose boundaries all appear in the given set of boundaries. The format" << endl;
			cout << "              of isoform file is the same as that for -comp_iso. strand specifically (T) or not (F)" << endl;
			cout <<                                                      endl;
			cout << "--------Important Parameters :-------------------------------------" << endl;
			cout <<                                                          endl;
			cout << "          -h : This help" << endl; 
			cout <<                                                          endl;
			cout << "          -ins <file> : A file contains problem instances" << endl; 
			cout <<                                                          endl;
			cout << "          -o <file> : A file for output" << endl;
			cout <<                                                          endl;
			cout << endl;

			exit(0);
		}

		virtual 
		int 
		Main(int argc, char* argv[], int startArg)
		{
			string output_file;
			string ins_file;
			string first_file;
			string second_file;

			bool b_help = true;
			bool b_ins_stat = false;
			bool b_comp_exp = false;
			bool b_gen_tss_pas = false;
			bool b_gen_exon = false;
			bool b_gen_iso = false;
			bool b_w_exp = false;
			bool b_rel_diff = false;
			bool b_comp_iso = false;
			bool b_spliced_iso = false;
			bool b_strand_specific = false;
			bool b_comp_interval = false;

			int total_reads = 10000000;

			double true_scale = 1.0;

			for (int i = startArg; i < argc; i++)
			{
				b_help = false;
				if (strcmp(argv[i], "-h") == 0)
				{
					Help();
					return 0;
				}

				else if (strcmp(argv[i], "-ins") == 0)
					ins_file = argv[++i];
				else if (strcmp(argv[i], "-o") == 0)
					output_file = argv[++i];
				else if (strcmp(argv[i], "-ins_stat") == 0)
					b_ins_stat = true;
				else if (strcmp(argv[i], "-gen_tss_pas") == 0)
					b_gen_tss_pas = true;
				else if (strcmp(argv[i], "-gen_exon") == 0)
					b_gen_exon = true;
				else if (strcmp(argv[i], "-gen_iso") == 0)
					b_gen_iso = true;
				else if (strcmp(argv[i], "-w_exp") == 0)
					b_w_exp = true;
				else if (strcmp(argv[i], "-comp_exp") == 0)
				{
					first_file = argv[++i];
					second_file = argv[++i];
					total_reads = atoi(argv[++i]);
					b_comp_exp = true;
				}
				else if (strcmp(argv[i], "-comp_interval") == 0)
				{
					b_comp_interval = true;
					first_file = argv[++i];
					second_file = argv[++i];
				}
				else if (strcmp(argv[i], "-rel_diff") == 0)
				{
					first_file = argv[++i];
					second_file = argv[++i];
					true_scale = atof(argv[++i]);
					b_rel_diff = true;
				}
				else if (strcmp(argv[i], "-comp_iso") == 0)
				{
					b_comp_iso = true;
					b_strand_specific = (argv[++i][0] == 'T');
					first_file = argv[++i];
					second_file = argv[++i];
				}
				else if (strcmp(argv[i], "-spliced_iso") == 0)
				{
					b_spliced_iso = true;
					b_strand_specific = (argv[++i][0] == 'T');
					first_file = argv[++i];
					second_file = argv[++i];
				}
				else
				{
					cerr << "Wrong parameter " << argv[i] << endl;
					Help();
				}
			}

			if (!b_ins_stat && !b_comp_exp && !b_w_exp && !b_rel_diff && 
				!b_gen_tss_pas && !b_gen_exon && !b_gen_iso && !b_comp_iso && !b_spliced_iso && !b_comp_interval)
			{
				ArgMissing("Job is missing");
				Help();
			}

			// Prepare output
			if (output_file != "")
			{
				mpOutput = new ofstream;

				((ofstream*)mpOutput)->open(output_file.data(), ios::out);
				if (!((ofstream*)mpOutput)->is_open())
				{
					cerr << "File " << output_file.data() << " can not be opened" << endl;
					return false;
				}
			}
			else
				mpOutput = &cout;

			if (b_ins_stat)
			{
				if (ins_file == "")
				{
					ArgMissing("-ins_file");
					Help();
				}

				InstanceStatistics ins_stat(mpOutput);

				InstanceReader reader(ins_file);
				reader.SetHandler(&ins_stat);

				reader.Initialize();
				reader.Generate();
				reader.CleanUp();
			}
			else if (b_comp_exp)
			{
				ExpCompare(first_file, second_file, (double)total_reads);
			}
			else if (b_comp_interval)
			{
				CompareInterval(first_file, second_file);
			}
			else if (b_gen_tss_pas)
			{
				InstanceWriter writer(mpOutput);
				writer.SetWriteFormat(InstanceWriter::WRITE_TSSPAS);

				InstanceReader reader(ins_file);
				reader.SetHandler(&writer);
				reader.Initialize();
				reader.Generate();
				reader.CleanUp();
			}
			else if (b_gen_exon)
			{
				InstanceWriter writer(mpOutput);
				writer.SetWriteFormat(InstanceWriter::WRITE_EXON);

				InstanceReader reader(ins_file);
				reader.SetHandler(&writer);
				reader.Initialize();
				reader.Generate();
				reader.CleanUp();
			}
			else if (b_gen_iso)
			{
				InstanceWriter writer(mpOutput);
				writer.SetWriteFormat(InstanceWriter::WRITE_ISO);

				InstanceReader reader(ins_file);
				reader.SetHandler(&writer);
				reader.Initialize();
				reader.Generate();
				reader.CleanUp();
			}
			else if (b_w_exp)
			{
				InstanceWriter writer(mpOutput);
				writer.SetWriteFormat(InstanceWriter::WRITE_EXP);

				InstanceReader reader(ins_file);
				reader.SetHandler(&writer);
				reader.Initialize();
				reader.Generate();
				reader.CleanUp();
			}
			else if (b_rel_diff)
			{
				RelDiff(first_file, second_file, true_scale);
			}
			else if (b_comp_iso)
			{
				CompareIsoforms(first_file, second_file, b_strand_specific);
			}
			else if (b_comp_iso)
			{
				SplicedIsoforms(first_file, second_file, b_strand_specific);
			}

			if (output_file != "")
			{
				((ofstream*)mpOutput)->close();
				delete mpOutput;
			}
			return 0;
		}

		/*
		 *--------------------------------------------------------------------------------------
		 *       Class:  MishMash
		 *      Method:  RelDiff
		 * Description:  
		 *       Param:  
		 *      Return:  
		 *--------------------------------------------------------------------------------------
		 */
		bool
		RelDiff(string true_file, string esti_file, double true_scale)
		{
			fstream infile;
			infile.open(true_file.data(), ios::in);
			if (!infile.is_open())
			{
				cerr << "File " << true_file.data() << " can not be opened" << endl;
				return false;
			}

			vector<string> splitted;

			map<string, double> name_value;

			int line_cnt = 0;
			string line;
			while (getline(infile, line))
			{
				splitted = Utility::Split('\t', line);
				name_value[splitted[0]] = atof(splitted[1].data()) * true_scale;
				line_cnt++;
			}

			infile.close();

			infile.open(esti_file.data(), ios::in);
			if (!infile.is_open())
			{
				cerr << "File " << esti_file.data() << " can not be opened" << endl;
				return false;
			}

			int no_match_cnt = 0;
			int valid_cnt = 0;

			int range_cnt = 21;
			vector<double> range_ratio;
			range_ratio.assign(range_cnt, 0);
			line_cnt = 0;
			while (getline(infile, line))
			{
				splitted = Utility::Split('\t', line);
				string name = splitted[0];
				double value = atof(splitted[1].data());

				if (name_value.find(name) == name_value.end())
				{
					no_match_cnt++;
					continue;
				}

				valid_cnt++;

				double rel_diff = abs(value - name_value[name]) / name_value[name];
				for (int i = 0; i < range_cnt; i++)
				{
					if (rel_diff <= i * 0.05) range_ratio[i] += 1;
				}
			}

			infile.close();

			(*mpOutput) << "Valid : No Match = " << valid_cnt << " : " << no_match_cnt << endl;
			for (int i = 0; i < range_cnt; i++)
				(*mpOutput) << setw(10) << i*0.05;
			(*mpOutput) << endl;
			for (int i = 0; i < range_cnt; i++)
				(*mpOutput) << setw(10) << range_ratio[i] / valid_cnt;
			(*mpOutput) << endl;
		}		/* -----  end of method RelDiff----- */

		/*
		 *--------------------------------------------------------------------------------------
		 *       Class:  MishMash
		 *      Method:  CompareInterval 
		 * Description:  
		 *       Param:  
		 *      Return:  
		 *--------------------------------------------------------------------------------------
		 */
		bool
		CompareInterval(string first_file, string second_file)
		{
			map_str2vec_seg first_intervals;
			map_str2vec_seg second_intervals;

			if (!ReadInterval(first_file, first_intervals)) return false;
			if (!ReadInterval(second_file, second_intervals)) return false;

			vector<int> score_dist;
			score_dist.assign(101, 0);

			int first_interval_cnt = 0;
			for_each_ele_in_group(iter, map_str2vec_seg, first_intervals)
			{
				string chr = iter->first;
				vector<Segment>& curr_first_segs = iter->second;
				first_interval_cnt += curr_first_segs.size();

				if (second_intervals.find(chr) == second_intervals.end())
				{
					cerr << "On chromosome " << chr << " no intervals exist in second file" << endl;
					continue;
				}

				vector<Segment>& curr_second_segs = second_intervals[chr];

				for (unsigned i = 0; i < curr_first_segs.size(); i++)
				{
					Segment& seg = curr_first_segs[i];

					double best_score = 0;
					int best_idx = -1;
					for (unsigned j = 0; j < curr_second_segs.size(); j++)
					{
						int overlap = seg.mEnd - curr_second_segs[j].mStart;
						if (overlap > curr_second_segs[j].mEnd - seg.mStart)
							overlap = curr_second_segs[j].mEnd - seg.mStart;
						if (overlap > curr_second_segs[j].mEnd - curr_second_segs[j].mStart)
							overlap = curr_second_segs[j].mEnd - curr_second_segs[j].mStart;
						if (overlap > seg.mEnd - seg.mStart)
							overlap = seg.mEnd - seg.mStart;
						if (overlap <= 0) continue;

						double score = 2.0 * overlap / (seg.mEnd - seg.mStart + curr_second_segs[j].mEnd - curr_second_segs[j].mStart);
						//double score = 1.0 * overlap / (seg.mEnd - seg.mStart);
						if (score > best_score)
						{
							best_score = score;
							best_idx = j;
						}
					}

					int idx = (int)(best_score * 100);
					score_dist[idx]++;

					if (-1 == best_idx)
						(*mpOutput) << "Segment [" << seg.mStart << "," << seg.mEnd << ") has no match" << endl;
					else
						(*mpOutput) << "Segment [" << seg.mStart << "," << seg.mEnd << ") matches "
									<< "Segment [" << curr_second_segs[best_idx].mStart << "," 
									<< curr_second_segs[best_idx].mEnd << ") with score " << best_score << endl;
				}
			}

			cout << "Summary :" << endl;
			for (unsigned i = 0; i < score_dist.size(); i++)
				cout << i << "\t" << score_dist[i] << "\t" << ((double)score_dist[i]) / first_interval_cnt << endl;
		}

		bool
		ReadInterval(string interval_file, map_str2vec_seg& intervals)
		{
			fstream infile;
			infile.open(interval_file.data(), ios::in);
			if (!infile.is_open())
			{
				cerr << "File " << interval_file.data() << " can not be opened" << endl;
				return false;
			}

			vector<string> splitted;
			int line_cnt = 0;
			string line;
			while (getline(infile, line))
			{
				line_cnt++;
				splitted = Utility::Split('\t', line);
				string chr = splitted[0];

				if (intervals.find(chr) == intervals.end())
				{
					vector<Segment> empty_vec;
					intervals[chr] = empty_vec;
				}

				vector<Segment>& curr_segs = intervals[chr];

				Segment seg;
				seg.mStart = atoi(splitted[1].data());
				seg.mEnd = atoi(splitted[2].data());

				curr_segs.push_back(seg);
			}

			infile.close();

			cout << line_cnt << " intervals are loaded." << endl;

			for_each_ele_in_group(iter, map_str2vec_seg, intervals)
				sort(iter->second.begin(), iter->second.begin());

			return true;
		}



		/*
		 *--------------------------------------------------------------------------------------
		 *       Class:  MishMash
		 *      Method:  ExpCompare
		 * Description:  
		 *       Param:  
		 *      Return:  
		 *--------------------------------------------------------------------------------------
		 */
		bool
		ExpCompare (string hui_exp_file, string my_exp_file, double total_reads)
		{
			fstream infile;
			infile.open(hui_exp_file.data(), ios::in);
			if (!infile.is_open())
			{
				cerr << "File " << hui_exp_file.data() << " can not be opened" << endl;
				return false;
			}

			vector<string> splitted;

			map<string, int> name_idx;
			vector<double> mean_exp;
			vector<double> low_exp;
			vector<double> high_exp;

			int line_cnt = 0;
			string line;
			while (getline(infile, line))
			{
				splitted = Utility::Split('\t', line);
				string name = splitted[0];
				name_idx[name] = line_cnt;
				mean_exp.push_back(atof(splitted[2].data()));
				low_exp.push_back(atof(splitted[3].data()));
				high_exp.push_back(atof(splitted[4].data()));
				line_cnt++;
			}

			infile.close();

			infile.open(my_exp_file.data(), ios::in);
			if (!infile.is_open())
			{
				cerr << "File " << my_exp_file.data() << " can not be opened" << endl;
				return false;
			}

			//double ratio = 1000 * 21925217.0 / 9728524.0;
			//double ratio = 1000.0  / (9.728524 + 1.311670);
			double ratio = 1000.0  / (total_reads / 1000000);
			int new_cnt = 0;
			int in_range_cnt = 0;
			int no_match_cnt = 0;
			int valid_cnt = 0;

			line_cnt = 0;
			while (getline(infile, line))
			{
				splitted = Utility::Split('\t', line);
				string iso_name = splitted[0];
				double iso_exp = atof(splitted[1].data());
				string gene_name = splitted[3];
				bool b_is_new = false;
				if (splitted[2] == "New")
					b_is_new = true;

				if (name_idx.find(iso_name) == name_idx.end())
				{
					no_match_cnt++;
					continue;
				}

				if (b_is_new)
				{
					new_cnt++;
					continue;
				}

				valid_cnt++;
				int idx = name_idx[iso_name];
				double exp = iso_exp * ratio;
				if (exp >= low_exp[idx] && exp <= high_exp[idx])
					in_range_cnt++;

				(*mpOutput) << gene_name << "\t" << iso_name << setw(15) << exp <<  setw(15) << mean_exp[idx] << setw(15)
					  << low_exp[idx] << setw(15) << high_exp[idx] << endl;

			}

			infile.close();

			cout << "Valid : In Range : New Cnt : No Match = " << 
				valid_cnt << " : " << in_range_cnt << " : " << new_cnt << " : " << no_match_cnt << endl;
		}		/* -----  end of method ExpCompare  ----- */

		/*
		 *--------------------------------------------------------------------------------------
		 *       Class:  MishMash
		 *      Method:  LoadTranscripts 
		 * Description:  
		 *       Param:  tran_file  :  The format of this file is :
		 *                   name  chromosome  strand  start_positions  end_positions
		 *                   start_positions or end_positions are seperated by commas.
		 *      Return:  
		 *--------------------------------------------------------------------------------------
		 */
		static
		bool
		LoadTranscripts(string tran_file, map_str2vec_gene& transcripts, bool b_strand_specific)
		{
			fstream infile;
			infile.open(tran_file.data(), ios::in);
			if (!infile.is_open())
			{
				cerr << "File " << tran_file.data() << " can not be opened" << endl;
				return false;
			}

			// read the data and fill the vector
			vector<string> splitted;
			int line_cnt = 0;
			string line;
			while (getline(infile, line))
			{
				line_cnt++;
				splitted = Utility::Split('\t', line);

				string chr;
				if (b_strand_specific)
					chr = splitted[1]+splitted[2];
				else
					chr = splitted[1];
				vector<string> starts = Utility::Split(',', splitted[5]);
				vector<string> ends = Utility::Split(',', splitted[6]);

				if (starts.size() != ends.size())
				{
					cerr << "DATA ERROR, line " << line_cnt << " : Starts cnt != Ends cnt" << endl;
					continue;
				}

				if (transcripts.find(chr) == transcripts.end())
				{
					vector<Gene> empty_vec;
					transcripts[chr] = empty_vec;
				}

				Gene a_gene;
				a_gene.mName = splitted[0];
				a_gene.mExons.resize(starts.size());
				for (unsigned i = 0; i < starts.size(); i++)
				{
					Exon& exon = a_gene.mExons[i];
					exon.mStart = atoi(starts[i].data());
					exon.mEnd = atoi(ends[i].data());
				}
				a_gene.CalculateRange();

				transcripts[chr].push_back(a_gene);
			}

			infile.close();

			for_each_ele_in_group(iter, map_str2vec_gene, transcripts)
				sort(iter->second.begin(), iter->second.end());

			return true;
		}		/* -----  end of method LoadTranscripts  ----- */

		/*
		 *--------------------------------------------------------------------------------------
		 *       Class:  MishMash
		 *      Method:  CompareTwoIsoforms
		 * Description:  
		 *       Param:  
		 *      Return:  
		 *--------------------------------------------------------------------------------------
		 */
		static
		double
		CompareTwoIsoforms(const Gene& first_transcript, const Gene& second_transcript)
		{
			double overlap_cnt = 0;
			for (unsigned i = 0; i < first_transcript.mExons.size(); i++)
			{
				int first_start = first_transcript.mExons[i].mStart;
				int first_end = first_transcript.mExons[i].mEnd;

				for (unsigned j = 0; j < second_transcript.mExons.size(); j++)
				{
					int second_start = second_transcript.mExons[j].mStart;
					int second_end = second_transcript.mExons[j].mEnd;

					int max_start = first_start;
					if (max_start < second_start) max_start = second_start;

					int min_end = first_end;
					if (min_end > second_end) min_end = second_end;

					if (min_end - max_start > 0)
						overlap_cnt += min_end - max_start;
				}
			}

			int first_len = 0;
			for (unsigned i = 0; i < first_transcript.mExons.size(); i++)
				first_len += first_transcript.mExons[i].mEnd - first_transcript.mExons[i].mStart;

			int second_len = 0;
			for (unsigned i = 0; i < second_transcript.mExons.size(); i++)
				second_len += second_transcript.mExons[i].mEnd - second_transcript.mExons[i].mStart;

			return 2 * overlap_cnt / (first_len + second_len);
		}		/* -----  end of method CompareTwoIsoforms  ----- */


		/*
		 *--------------------------------------------------------------------------------------
		 *       Class:  MishMash
		 *      Method:  SplicedIsoforms 
		 * Description:  
		 *       Param:  
		 *      Return:  
		 *--------------------------------------------------------------------------------------
		 */
		bool
		SplicedIsoforms(string boundary_file, string isoform_file, bool b_strand_specific)
		{
			/*
			map_str2vec_boundary boundaries;
			DataProcessor dp;
			if (!dp.LoadBoundary(boundary_file, boundaries)) return false;

			map_str2vec_gene isoforms;
			if (!LoadTranscripts(isoform_file, isoforms, b_strand_specific)) return false;

			for_each_ele_in_group(iter, map_str2vec_gene, isoforms)
			{
				vector<Gene>& curr_isos = iter->second;
				vector<Boundary>& curr_boundaries = boundaries[iter->first];

				set<int64> uniq_bounds;
				int64 max_bound = 0;
				for (unsigned i = 0; i < curr_boundaries.size(); i++)
					if (max_bound < curr_boundaries[i].mPos) 
						max_bound = curr_boundaries[i].mPos;
				for (unsigned i = 0; i < curr_isos.size(); i++)
					for (unsigned j = 0; j < curr_isos[i].mExons.size(); j++)
						if (max_bound < curr_isos[i].mExons[j].mEnd)
							max_bound = curr_isos[i].mExons[j].mEnd;

				for (unsigned i = 0; i < curr_boundaries.size(); i++)
				{
					int64 idx = curr_boundaries[i].mPos + max_bound * curr_boundaries[i].mType;
					uniq_bounds.insert(idx);
				}
				
				for (unsigned i = 0; i < curr_isos.size(); i++)
				{
					bool b_succ = true;
					for (unsigned j = 0; j < curr_isos[i].mExons.size(); j++)
					{
						Exon& exon = curr_isos[i].mExons[j];
						int64 idx = exon.mStart;
						if (uniq_bounds.find(idx) == uniq_bounds.end())
						int64 idx = exon.mEnd + max_bound;
					}
				}

			}  
			*/

			return true;
		}		/* -----  end of method SplicedIsoforms----- */


		/*
		 *--------------------------------------------------------------------------------------
		 *       Class:  MishMash
		 *      Method:  CompareIsoforms
		 * Description:  
		 *       Param:  
		 *      Return:  
		 *--------------------------------------------------------------------------------------
		 */
		bool
		CompareIsoforms(string predict_iso_file, string benchmark_iso_file, bool b_strand_specific)
		{
			map_str2vec_gene benchmark_transcripts;
			map_str2vec_gene predict_transcripts;

			if (!LoadTranscripts(benchmark_iso_file, benchmark_transcripts, b_strand_specific)) return false;
			if (!LoadTranscripts(predict_iso_file, predict_transcripts, b_strand_specific)) return false;

			vector<int> dist;
			dist.assign(101, 0);

			double benchmark_cnt = 0;
			for_each_ele_in_group(iter, map_str2vec_gene, benchmark_transcripts)
				benchmark_cnt += iter->second.size();

			double predict_cnt = 0;
			for_each_ele_in_group(iter, map_str2vec_gene, predict_transcripts)
			{
				vector<Gene>& curr_pred_isos = iter->second;

				if (benchmark_transcripts.find(iter->first) == benchmark_transcripts.end())
					continue;

				vector<Gene>& curr_ben_isos = benchmark_transcripts[iter->first];
				if (curr_ben_isos.size() == 0) continue;

				predict_cnt += curr_pred_isos.size();

				for (unsigned i = 0; i < curr_pred_isos.size(); i++)
				{
					double best_value = 0;
					int best_match = 0;
					for (unsigned j = 0; j < curr_ben_isos.size(); j++)
					{
						if (curr_pred_isos[i].mStart >= curr_ben_isos[j].mEnd || 
						    curr_pred_isos[i].mEnd <= curr_ben_isos[j].mStart) continue;

						double curr_value = MishMash::CompareTwoIsoforms(curr_pred_isos[i], curr_ben_isos[j]);
						if (best_value < curr_value)
						{
							best_value = curr_value;
							best_match = j;
						}

						if (1 == best_value) break;
					}

					int idx = (int)(best_value * 100);
					dist[idx]++;

					(*mpOutput) << curr_pred_isos[i].mName << "\t" << curr_ben_isos[best_match].mName << "\t" << best_value << endl;
				}
			}

			(*mpOutput) << "Summary : " << endl;
			(*mpOutput) << "Total predict cnt = " << predict_cnt << endl;
			(*mpOutput) << "Total benchmark cnt = " << benchmark_cnt << endl;
			for (int i = 0; i < dist.size(); i++)
				(*mpOutput) << i << "\t" << dist[i] << "\t" << dist[i] / predict_cnt << endl;
				

			return true;
		}		/* -----  end of method CompareIsoforms----- */


	private:
		ostream* mpOutput;

}; /* -----  end of class MishMash  ----- */


#endif
