// =====================================================================================
// 
//       Filename:  IsoInfer.cc
// 
//    Description:  This is the implementation of class IsoInfer
// 
//        Version:  1.0
//        Created:  06/08/2009 02:59:51 PM
//       Revision:  none
//       Compiler:  g++
// 
//         Author:  Jianxing Feng (feeldead), feeldead@gmail.com
//        Company:  THU
// 
// =====================================================================================

#include <map>
#include <set>
#include <list>
#include <vector>
#include <queue>
#include <iostream>
#include <string>
#include <numeric>
#include <limits>
#include <algorithm>

// For sqrt()
#include <cmath>

// For rand()
#include <time.h>

// For setw()
#include <iomanip>

// For split
#include "Utility.hpp"

#include "IsoInfer.h"
#include "InstanceWriter.h"
#include "GraphAlgorithm_Basic.hpp"
#include "glpk.h"
#include "GraphWriter.h"
#include "ReadInfoSE.hpp"

IsoInfer::IsoInfer (ExpEstimator* p_solver, ostream* p_output) : InstanceHandler(p_output)
{
	mpSolver = p_solver;

	mbEnableStartEnd = true;
	mbEnableStepII = true;
	mMinEffectivePartCombDup = 1;
	mOmittedJunctionCnt = 0;
	mConfidenceLevel = 0.05;

	mPartitionSize = 10;
} 							   /* constructor      */

IsoInfer::~IsoInfer ()
{
};                            /* destructor       */

/*virtual*/
void
IsoInfer::Initialize()
{
}

/*virtual*/
void
IsoInfer::OnInstance(Instance& an_instance)
{
	mpInstance = &an_instance;
	mMaxValidIsoformCnt = 1000;

	vector<vector<int> > start_exons = an_instance.mStartExons;
	vector<vector<int> > end_exons = an_instance.mEndExons;
	vector<vector<bool> > isoforms =  an_instance.mIsoforms;
	vector<Exon>& exons = an_instance.mExons;
	ShortReadGroup short_reads =  an_instance.mShortReadGroup;
	vector<vector<double> > junc_cnt;
	short_reads.JuncCnt(an_instance.mSegLen.size(), junc_cnt);
	vector<int> set_sizes = an_instance.mSegLen;

	mValidIsoforms.clear();
	mSolution.clear();

	// Always connect two consecutive segments of an exon if the
	// boundary has the same type. However, this step may introduce false
	// positives if an segment should be intron but falsely kepted as an
	// expressed segments
	vector<vector<bool> > b_adjacent;
	b_adjacent.resize(set_sizes.size());
	for (unsigned i = 0; i < b_adjacent.size(); ++i)
		b_adjacent[i].assign(set_sizes.size(), false);

	for (unsigned i = 0; i < b_adjacent.size()-1; ++i)
	{
		// In this case exons[i].mEndType == exons[i+1].mStartType)
		if (exons[i].mEnd == exons[i+1].mStart && exons[i].mEndType != 2)
			b_adjacent[i][i+1] = true;
	}
	AddAdjacentJuncRead(b_adjacent, junc_cnt);
	
	if (!mbEnableStartEnd)
	{
		start_exons.clear();
		end_exons.clear();

		// Only segments with 0 in degree can be the start segments
		// Only segments with 0 out degree can be the end segments
		vector<int> a_vec;
		for (unsigned i = 0; i < junc_cnt.size(); i++)
		{
			bool b_succ = true;
			for (unsigned j = 0; j < i; j++)
				if (junc_cnt[j][i] > 0)
				{
					b_succ = false;
					break;
				}
			if (b_succ)
				a_vec.push_back(i);
		}
		start_exons.push_back(a_vec);
		a_vec.clear();

		for (unsigned i = 0; i < junc_cnt.size(); i++)
		{
			bool b_succ = true;
			for (unsigned j = i+1; j < junc_cnt.size(); j++)
				if (junc_cnt[i][j] > 0)
				{
					b_succ = false;
					break;
				}
			if (b_succ)
				a_vec.push_back(i);
		}
		end_exons.push_back(a_vec);
	}

	vector<vector<bool> >& known_isoforms = mKnownIsoforms;

	GraphEx<int> valid_graph;
	int source;
	int sink;
	BuildSpliceGraph(valid_graph, junc_cnt, start_exons, end_exons, source, sink);

	list<set<int> > components;
	vector<int> source_sink;
	source_sink.push_back(source);
	source_sink.push_back(sink);

	valid_graph.MaskNode(source_sink, true);
	valid_graph.SetDirected(false);
	GraphAlgorithm_Basic::Components(&valid_graph, components);

	cout << "It has been decomposed into " << components.size() << " subinstances" << endl;

	// Each connected component corresponds to a subinstance
	for_each_ele_in_group(iter, list<set<int> >, components)
	{
		vector<int> sub_set;
		for_each_ele_in_group(iter2, set<int>, *iter)
		{
			int in_id = *iter2;
			if (in_id != source && in_id != sink)
			{
				int ex_id = valid_graph.GetNodeExID(in_id);
				sub_set.push_back(ex_id);
			}
		}
		sort(sub_set.begin(), sub_set.end());

		cout << "Sub instance : ";
		for (unsigned i = 0; i < sub_set.size(); i++)
			cout << sub_set[i] << "\t";
		cout << endl;

		vector<int> map_to_flat_idx;
		map_to_flat_idx.assign(set_sizes.size(), -1);
		for (unsigned i = 0; i < sub_set.size(); i++)
			map_to_flat_idx[sub_set[i]] = i;

		vector<vector<int> > start_exons_pro;
		vector<vector<int> > end_exons_pro;
		for (unsigned i = 0; i < start_exons.size(); i++)
		{
			vector<int> starts;
			for (unsigned j = 0; j < start_exons[i].size(); j++)
				if (-1 != map_to_flat_idx[start_exons[i][j]])
					starts.push_back(map_to_flat_idx[start_exons[i][j]]);

			vector<int> ends;
			for (unsigned j = 0; j < end_exons[i].size(); j++)
				if (-1 != map_to_flat_idx[end_exons[i][j]])
					ends.push_back(map_to_flat_idx[end_exons[i][j]]);

			if (starts.size() == 0 || ends.size() == 0) continue;

			bool b_exist = false;
			for (unsigned j = 0; j < start_exons_pro.size(); j++)
			{
				if (starts == start_exons_pro[j] && ends == end_exons_pro[j])
				{
					b_exist = true;
					break;
				}
			}

			if (!b_exist)
			{
				start_exons_pro.push_back(starts);
				end_exons_pro.push_back(ends);
			}
		}

		if (start_exons_pro.size() == 0) continue;

		vector<int> set_sizes_pro;
		ShortReadGroup short_reads_pro;

		Project(set_sizes, set_sizes_pro, short_reads, short_reads_pro, sub_set);

		vector<vector<bool> > known_isoforms_pro;
		vector<vector<int> > known_isoforms_group;
		Project(known_isoforms, known_isoforms_pro, sub_set, known_isoforms_group);

		vector<vector<bool> > valid_isoforms;
		vector<int> solution;
		vector<vector<bool> > b_adjacent_pro;
		b_adjacent_pro.resize(set_sizes_pro.size());
		for (unsigned i = 0; i < set_sizes_pro.size(); ++i)
			b_adjacent_pro[i].assign(set_sizes_pro.size(), false);

		for (unsigned i = 0; i < sub_set.size(); ++i)
			for (unsigned j = 0; j < sub_set.size(); ++j)
			{
				if (b_adjacent[sub_set[i]][sub_set[j]])
					b_adjacent_pro[i][j] = true;
			}
		OnSubInstance (set_sizes_pro, start_exons_pro, end_exons_pro, b_adjacent_pro, short_reads_pro, known_isoforms_pro, valid_isoforms, solution);

		int old_size = mValidIsoforms.size();

		for (unsigned i = 0; i < valid_isoforms.size(); i++)
		{
			vector<bool> an_iso;
			an_iso.assign(set_sizes.size(), false);
			for (unsigned j = 0; j < valid_isoforms[i].size(); j++)
				an_iso[sub_set[j]] = valid_isoforms[i][j];
			mValidIsoforms.push_back(an_iso);
		}

		for (unsigned i = 0; i < solution.size(); i++)
			mSolution.push_back(solution[i] + old_size);
	}
	
	InstanceHandler::OnInstance(an_instance);
}

// Add one junction read to adjacent junctions
// b_adjacent[i] = true iff segment i and i+1 are adjacent
void
IsoInfer::AddAdjacentJuncRead(const vector<vector<bool> >& b_adjacent, vector<vector<double> >& junc_cnt)
{
	for (unsigned i = 0; i < b_adjacent.size(); ++i)
		for (unsigned j = 0; j < b_adjacent.size(); ++j)
			if (b_adjacent[i][j] && junc_cnt[i][j] == 0) junc_cnt[i][j] = 1;
}

void
IsoInfer::OnSubInstance ( const vector<int>& set_sizes,
							const vector<vector<int> >& start_exons, 
							const vector<vector<int> >& end_exons,
							const vector<vector<bool> >& b_adjacent,
							const ShortReadGroup& short_reads, 
							const vector<vector<bool> >& known_isoforms,
							vector<vector<bool> >& valid_isoforms_expanded,
							vector<int>& solution)
{
	vector<int> shrink_map1;
	vector<int> set_sizes_shrinked1;
	vector<vector<bool> > b_adjacent_shrinked1;
	ShortReadGroup short_reads_shrinked1;
	vector<vector<int> > start_exons_shrinked1;
	vector<vector<int> > end_exons_shrinked1;

	vector<vector<bool> > valid_isoforms;
	vector<int> valid_isoform_order;

	vector<vector<double> > junc_cnt;
	short_reads.JuncCnt(set_sizes.size(), junc_cnt);
	AddAdjacentJuncRead(b_adjacent, junc_cnt);

	CalculateShrink(junc_cnt, start_exons, end_exons, shrink_map1);
	
	ShrinkInstance(shrink_map1, 
					set_sizes, set_sizes_shrinked1, 
					b_adjacent, b_adjacent_shrinked1,
					short_reads, short_reads_shrinked1,
					start_exons, start_exons_shrinked1, 
					end_exons, end_exons_shrinked1);

	vector<vector<bool> > known_isoforms_shrinked1;
	ShrinkIsoform(shrink_map1, known_isoforms, known_isoforms_shrinked1);

	EnumerateValidByExpLevel(start_exons_shrinked1, end_exons_shrinked1, set_sizes_shrinked1, 
			short_reads_shrinked1, b_adjacent_shrinked1, valid_isoforms, valid_isoform_order);

//	for (unsigned i = 0; i < valid_isoforms.size(); i++)
//	{
//		for (unsigned j = 0; j < valid_isoforms[i].size(); j++)
//			cout << valid_isoforms[i][j] << " ";
//		cout << endl;
//	}

	//Remove valid_isoforms that are known
	int cnt = 0;
	for (unsigned i = 0; i < valid_isoforms.size(); i++)
	{
		bool b_known = false;
		for (unsigned j = 0; j < known_isoforms_shrinked1.size(); j++)
			if (valid_isoforms[i] == known_isoforms_shrinked1[j])
			{
				b_known = true;
				break;
			}

		if (!b_known)
			valid_isoforms[cnt++] = valid_isoforms[i];
	}
	valid_isoforms.resize(cnt);

	mWeight.assign(valid_isoforms.size(), 1);

	// Append known_isoforms to the valid_isoforms
	vector<bool> b_known_isoforms;
	b_known_isoforms.assign(valid_isoforms.size(), false);
	for (unsigned i = 0; i < known_isoforms_shrinked1.size(); i++)
	{
		valid_isoforms.push_back(known_isoforms_shrinked1[i]);
		b_known_isoforms.push_back(true);
		mWeight.push_back(0);
	}

	vector<int> shrink_map2;
	vector<int> set_sizes_shrinked2;
	vector<vector<bool> > b_adjacent_shrinked2;
	vector<vector<bool> > valid_isoforms_shrinked2;
	ShortReadGroup short_reads_shrinked2;
	vector<vector<int> > start_exons_shrinked2;
	vector<vector<int> > end_exons_shrinked2;
	vector<double> exp_shrinked2;

	if (valid_isoforms.size() != 0)
	{

//		shrink_map2.resize(valid_isoforms[0].size());
//		for (unsigned i = 0; i < shrink_map2.size(); i++)
//			shrink_map2[i] = i;

		// Shrink the instance according to valid isoforms.
		CalculateShrink(valid_isoforms, shrink_map2);

		ShrinkInstance(shrink_map2,
					set_sizes_shrinked1, set_sizes_shrinked2, 
					b_adjacent_shrinked1, b_adjacent_shrinked2, 
					short_reads_shrinked1, short_reads_shrinked2,
					start_exons_shrinked1, start_exons_shrinked2, 
					end_exons_shrinked1, end_exons_shrinked2);

		ShrinkIsoform(shrink_map2, valid_isoforms, valid_isoforms_shrinked2);

		// It is possible that some start-end exon pair does not appear in valid_isoforms.
		// Therefore, reextract start-end exon pair from shrinked valid isoforms.

		cout << "ShrinkInstance : from " << set_sizes.size() << " to " << shrink_map1.size()
			<< " to " << shrink_map2.size() << " with " << valid_isoforms_shrinked2.size() 
			<< " valid isoforms." << endl;

		// Merge shrink_map1 and shrink_map2
		for (unsigned i = 0; i < shrink_map1.size(); i++)
			shrink_map1[i] = shrink_map2[shrink_map1[i]];

		/*
		// Debug
		cout << "Map1 : ";
		for (unsigned i = 0; i < shrink_map1.size(); i++)
			cout << shrink_map1[i] << "\t";
		cout << endl;
		cout << "Map2 : ";
		for (unsigned i = 0; i < shrink_map2.size(); i++)
			cout << shrink_map2[i] << "\t";
		cout << endl;
		for (unsigned i = 0; i < valid_isoforms.size(); i++)
		{
			for (unsigned j = 0; j < valid_isoforms[i].size(); j++)
				cout << valid_isoforms[i][j] << " ";
			cout << endl;
		}
		for (unsigned i = 0; i < pe_info_shrinked2[0].mPartComb.size(); i++)
		{
			for (unsigned j = 0; j < pe_info_shrinked2[0].mPartComb[i].size(); j++)
				cout << pe_info_shrinked2[0].mPartComb[i][j] << " ";
			cout << pe_info_shrinked2[0].mPartCombDup[i] << endl;
		}
		  */

		if (mbEnableStepII)
			InferNew(set_sizes_shrinked2, valid_isoforms_shrinked2, b_known_isoforms, 
					start_exons_shrinked2, end_exons_shrinked2, short_reads_shrinked2, solution);

		ExpandIsoforms(shrink_map1, valid_isoforms_shrinked2, valid_isoforms_expanded);
	}
}

//--------------------------------------------------------------------------------------
//       Class:  IsoInfer
//      Method:  CalculateShrink
// Description:  Given junction information, calculate the shrinking
//  Parameters:  shrink_map[i] stores the exon index in the new instance 
//               corresponding to i'th exon in the original instance. 
//        Note:  The shrinking will be consistent with known_isoforms
//      Return:  The size after shrinking
//--------------------------------------------------------------------------------------
int 
IsoInfer::CalculateShrink ( const vector<vector<double> >& junc_cnt, const vector<vector<int> >& start_exons, 
							  const vector<vector<int> >& end_exons, vector<int>& shrink_map)
{
	GraphEx<int> valid_graph;
	int source;
	int sink;
	BuildSpliceGraph(valid_graph, junc_cnt, start_exons, end_exons, source, sink);

	vector<bool> b_start_exons;
	b_start_exons.assign(junc_cnt.size(), false);
	for (unsigned i = 0; i < start_exons.size(); ++i)
		for (unsigned j = 0; j < start_exons[i].size(); ++j)
			b_start_exons[start_exons[i][j]] = true;

	vector<bool> b_end_exons;
	b_end_exons.assign(junc_cnt.size(), false);
	for (unsigned i = 0; i < end_exons.size(); ++i)
		for (unsigned j = 0; j < end_exons[i].size(); ++j)
			b_end_exons[end_exons[i][j]] = true;

	int largest_idx = 0;
	shrink_map.resize(junc_cnt.size());
	for (unsigned i = 0; i < junc_cnt.size(); i++)
	{
		shrink_map[i] = largest_idx;
		// All the start exons should have not be merged with their previous nodes
		if (!b_start_exons[i] && i > 0)
		{
			int curr_node = valid_graph.GetNodeInID(i);
			const int* in_edges = valid_graph.InEdges(curr_node);
			if (valid_graph.InDegree(curr_node) == 1)
			{
				int prev_node = valid_graph.FromNode(in_edges[0]);
				if (valid_graph.OutDegree(prev_node) == 1)
				{
					int exon_id = valid_graph.GetNodeExID(prev_node);
					// Note that the order don't have to be kept.
					// If the previous node of current exon is an end exon, do not
					// merge current exon with its previous node.
					if (exon_id >= 0 && exon_id < junc_cnt.size() && !b_end_exons[exon_id])
						shrink_map[i] = shrink_map[exon_id];
				}
			}
		}
		if (shrink_map[i] == largest_idx) largest_idx++;
	}
	return largest_idx;
}

//--------------------------------------------------------------------------------------
//       Class:  IsoInfer
//      Method:  CalculateShrink
// Description:  Given a set of isoforms, calculate the shrinking
//  Parameters:  shrink_map[i] stores the exon index in the new instance 
//               corresponding to i'th exon in the original instance. 
//        Note:  
//      Return:  The size after shrinking
//--------------------------------------------------------------------------------------
int
IsoInfer::CalculateShrink ( const vector<vector<bool> >& isoforms, vector<int>& shrink_map)
{
	ASSERT((isoforms.size() > 0), "There are 0 isoforms used to calculate shrinking");

	vector<vector<bool> > tran_isoforms;
	tran_isoforms.resize(isoforms[0].size());
	for (unsigned i = 0; i < tran_isoforms.size(); i++)
	{
		tran_isoforms[i].resize(isoforms.size());
		for (unsigned j = 0; j < isoforms.size(); j++)
			tran_isoforms[i][j] = isoforms[j][i];
	}

	int set_cnt = isoforms[0].size();

	shrink_map.resize(set_cnt);
	int exon_idx = 0;
	// Only shrink consecutive exons.
	for (int i = 0; i < set_cnt; i++)
	{
		if (i > 0 && tran_isoforms[i-1] != tran_isoforms[i])
			exon_idx++;
		shrink_map[i] = exon_idx;
	}
	return exon_idx+1;
}


//--------------------------------------------------------------------------------------
//       Class:  IsoInfer
//      Method:  ShrinkInstance
// Description:  If a set of consecutive exons always appear or disappear together
//				 in the given isoforms, they can be treated as a single "exon". This should 
//				 improve the result and help to handle instances with a large number 
//				 of exons. 
//  Parameters:  ...   :  The original instance
//               ..._shrinked :  The shrinked instance
//               isoforms  : Normally, pass all valid_isoforms to this parameter. It will
//                   be used to determine which exons should be grouped together.
//        Note:  
//--------------------------------------------------------------------------------------
	void
IsoInfer::ShrinkInstance (  const vector<int>& shrink_map,
							const vector<int>& seg_lens, vector<int>& seg_lens_shrinked, 
							const vector<vector<bool> >& b_adjacent, vector<vector<bool> >& b_adjacent_shrinked,
							const ShortReadGroup& short_reads, ShortReadGroup& short_reads_shrinked,
							const vector<vector<int> >& start_exons, vector<vector<int> >& start_exons_shrinked, 
							const vector<vector<int> >& end_exons, vector<vector<int> >& end_exons_shrinked)
{
	// Note that shrink_map could be like :
	// 0 0 0 1 1 2 2 1 1 2 2 3 3
	
	vector<vector<int> > grouped_exons;
	for (unsigned i = 0; i < shrink_map.size(); i++)
	{
		if (grouped_exons.size() <= shrink_map[i]) 
			grouped_exons.resize(shrink_map[i]+1);
		grouped_exons[shrink_map[i]].push_back(i);
	}

	int new_size = grouped_exons.size();

	seg_lens_shrinked.resize(new_size);
	for (unsigned i = 0; i < grouped_exons.size(); i++)
	{
		//DEBUG
		assert(grouped_exons[i].size() > 0);

		int exon_len = 0;
		for (int j = 0; j < grouped_exons[i].size(); j++)
		{
			int idx = grouped_exons[i][j];
			if (seg_lens[idx] > 0)
				exon_len += seg_lens[idx];
		}

		seg_lens_shrinked[i] = exon_len;
	}

	b_adjacent_shrinked.resize(new_size);
	for (unsigned i = 0; i < new_size; ++i)
	{
		b_adjacent_shrinked[i].resize(new_size);
		for (unsigned j = i+1; j < new_size; ++j)
		{
			int from = grouped_exons[i][grouped_exons[i].size()-1];
			int to = grouped_exons[j][0];
			if (b_adjacent[from][to])
				b_adjacent_shrinked[i][j] = true;
			// Note that it is impossible b_adjacent_shrinked[i][j] = true if i > j.
		}
	}

	short_reads_shrinked = short_reads;
	short_reads_shrinked.Shrink(shrink_map);

	// For start_exons_shrinked and end_exons_shrinked
	start_exons_shrinked.resize(start_exons.size());
	end_exons_shrinked.resize(end_exons.size());

	for (unsigned i = 0; i < start_exons.size(); i++)
	{
		set<int> indexes;
		for (unsigned j = 0; j < start_exons[i].size(); j++)
			indexes.insert(shrink_map[start_exons[i][j]]);
		start_exons_shrinked[i].assign(indexes.begin(), indexes.end());

		indexes.clear();
		for (unsigned j = 0; j < end_exons[i].size(); j++)
			indexes.insert(shrink_map[end_exons[i][j]]);
		end_exons_shrinked[i].assign(indexes.begin(), indexes.end());

		// Do current start and end exons appear before?
		for (unsigned j = 0; j < i; j++)
		{
			if (start_exons_shrinked[j] == start_exons_shrinked[i] &&
			    end_exons_shrinked[j] == end_exons_shrinked[i])
			{
				set<int> uniq;
				for (unsigned k = 0; k < start_exons_shrinked[j].size(); k++)
					uniq.insert(start_exons_shrinked[j][k]);
				for (unsigned k = 0; k < start_exons_shrinked[i].size(); k++)
					uniq.insert(start_exons_shrinked[i][k]);
				start_exons_shrinked[j].assign(uniq.begin(), uniq.end());

				uniq.clear();
				for (unsigned k = 0; k < end_exons_shrinked[j].size(); k++)
					uniq.insert(end_exons_shrinked[j][k]);
				for (unsigned k = 0; k < end_exons_shrinked[i].size(); k++)
					uniq.insert(end_exons_shrinked[i][k]);
				end_exons_shrinked[j].assign(uniq.begin(), uniq.end());

				start_exons_shrinked[i].clear();
				end_exons_shrinked[i].clear();
			}
		}
	}

	int cnt = 0;
	for (unsigned i = 0; i < start_exons_shrinked.size(); i++)
	{
		if (start_exons_shrinked[i].size() != 0 && end_exons_shrinked[i].size() != 0)
		{
			start_exons_shrinked[cnt] = start_exons_shrinked[i];
			end_exons_shrinked[cnt] = end_exons_shrinked[i];
			cnt++;
		}
	}

	return ;
}		// -----  end of method IsoInfer::ShrinkInstance  -----

//--------------------------------------------------------------------------------------
//       Class:  IsoInfer
//      Method:  ExtractStartEndExons
// Description:  Extract the start-end exon pair of each isoform
//  Parameters:  
//--------------------------------------------------------------------------------------
	void
IsoInfer::ExtractStartEndExons(const vector<vector<bool> >& isoforms, vector<int>& start_exons,
								 vector<int>& end_exons)
{
	int exon_cnt = isoforms[0].size();
	start_exons.resize(isoforms.size());
	end_exons.resize(isoforms.size());
	for (unsigned iso_cnt = 0; iso_cnt < isoforms.size(); iso_cnt++)
	{
		int start = 0;
		int end = exon_cnt - 1;
		for (int start_exon = 0; start_exon < exon_cnt; start_exon++)
			if (isoforms[iso_cnt][start_exon])
			{
				start = start_exon;
				break;
			}

		for (int end_exon = exon_cnt - 1; end_exon >= 0; end_exon--)
			if (isoforms[iso_cnt][end_exon])
			{
				end = end_exon;
				break;
			}
		ASSERT((start < exon_cnt && end >= 0), "An empty isoform is encountered");
		start_exons[iso_cnt] = start;
		end_exons[iso_cnt] = end;
	}
}

//--------------------------------------------------------------------------------------
//       Class:  IsoInfer
//      Method:  ShrinkIsoform
// Description:  Call this function after ShrinkInstance is called. This function 
//               shrink the given isoforms.
//  Parameters:  
//--------------------------------------------------------------------------------------
	void
IsoInfer::ShrinkIsoform( const vector<int>& shrink_map, const vector<vector<bool> >& isoforms, 
							vector<vector<bool> >& isoforms_shrinked)
{
	int new_exon_cnt = 0;
	for (unsigned i = 0; i < shrink_map.size(); i++)
		if (new_exon_cnt < shrink_map[i]) new_exon_cnt = shrink_map[i];
	new_exon_cnt++;

	isoforms_shrinked.clear();
	for (unsigned i = 0; i < isoforms.size(); i++)
	{
		// Check whether the shrinking is consistent with the existing isoforms 
		bool succ = true;
		vector<bool> b_visited;
		vector<bool> an_iso;
		b_visited.assign(new_exon_cnt, false);
		an_iso.assign(new_exon_cnt, false);

		for (unsigned j = 0; j < shrink_map.size(); j++)
		{
			int idx = shrink_map[j];
			if (b_visited[idx] && an_iso[idx] != isoforms[i][j])
			{
				succ = false; break;
			}
			else
				an_iso[idx] = isoforms[i][j];
		}

		if (!succ)
		{
			cerr << "WARNING : isoform " << i << " is inconsistent with the shrinking. This isoform is skipped." << endl;
			continue;
		}

		isoforms_shrinked.push_back(an_iso);
	}
	return ;
}		// -----  end of method IsoInfer::ShrinkIsoform  -----

//--------------------------------------------------------------------------------------
//       Class:  IsoInfer
//      Method:  ExpandIsoforms
// Description:  Given a set of isoforms based on the shrinked instance, this method 
//               expands it to the isoforms for the original problem.
//  Parameters:
//--------------------------------------------------------------------------------------
	void
IsoInfer::ExpandIsoforms ( const vector<int>& shrink_map, const vector<vector<bool> >& isoforms_shrinked, 
							vector<vector<bool> >& isoforms_expanded )
{
	isoforms_expanded.resize(isoforms_shrinked.size());
	for (unsigned i = 0; i < isoforms_shrinked.size(); i++)
	{
		isoforms_expanded[i].resize(shrink_map.size());
		for (int j = 0; j < shrink_map.size(); j++)
			isoforms_expanded[i][j] = isoforms_shrinked[i][shrink_map[j]];
	}

	return ;
}		// -----  end of method IsoInfer::ExpandIsoforms  -----



//--------------------------------------------------------------------------------------
//       Class:  IsoInfer
//      Method:  BuildSpliceGraph
// Description:  Build the splice graph based on splice reads information and the start
//               and end exon information
//  Parameters:  start_exons  :  All possible start exons. If the length of start_exons 
//                   is zero, all the exons can be the start exon.
//               end_exons    :  All possible end exons. If the length of end_exons is
//                   zeror, all the exons can be the end exon.
//               source[o]    :  The internal ID of the source in the graph
//               sink[o]      :  The internal ID of the sink in the graph
//--------------------------------------------------------------------------------------
	void
IsoInfer::BuildSpliceGraph ( GraphEx<int>& splice_graph, const vector<vector<double> >& junc_cnt, 
		const vector<vector<int> >& start_exons, const vector<vector<int> >& end_exons, int& source, int& sink)
{
	int set_cnt = junc_cnt[0].size();

	splice_graph.BeginAddNodeOrEdge();
	for (int i = 0; i < set_cnt; i++)
		splice_graph.AddNodeEx(i);

	source = -1;
	sink = -2;

	splice_graph.AddNodeEx(source); // source;
	splice_graph.AddNodeEx(sink); // sink;

	// Add edges according to splice information.
	for (int i = 0; i < set_cnt; i++)
		for (int j = i+1; j < set_cnt; j++)
			if (junc_cnt[i][j] >= mMinEffectivePartCombDup) splice_graph.AddEdgeEx(i, j, i * set_cnt + j);

	set<int> flat_start_exons;
	set<int> flat_end_exons;
	for (unsigned i = 0; i < start_exons.size(); i++)
	{
		for (unsigned j = 0; j < start_exons[i].size(); j++)
			flat_start_exons.insert(start_exons[i][j]);
		for (unsigned j = 0; j < end_exons[i].size(); j++)
			flat_end_exons.insert(end_exons[i][j]);
	}

	int ex_edge_id = set_cnt*set_cnt;

	// Add edges from the source to each exon that can be the first exon
	for_each_ele_in_group(iter, set<int>, flat_start_exons)
		splice_graph.AddEdgeEx(source, *iter, ex_edge_id++);
	// Add edges from each exon that can be the end exon to the sink 
	for_each_ele_in_group(iter, set<int>, flat_end_exons)
		splice_graph.AddEdgeEx(*iter, sink, ex_edge_id++);

	splice_graph.EndAddNodeOrEdge();

	source = splice_graph.GetNodeInID(source);
	sink = splice_graph.GetNodeInID(sink);

	return ;
}		// -----  end of method IsoInfer::BuildSpliceGraph  -----

//--------------------------------------------------------------------------------------
//       Class:  IsoInfer
//      Method:  InferNew
// Description:  Given the all possible valid isoforms and other information, infer 
//               isoforms from scratch.
//  Parameters:  valid_isoforms  :  All possible valid isoforms, the infered isoforms 
//                   will only be generated from this set.
//--------------------------------------------------------------------------------------
void
IsoInfer::InferNew (const vector<int>& set_sizes, 
					  const vector<vector<bool> >& valid_isoforms,
					  const vector<bool>& b_known_isoforms,
					  const vector<vector<int> >& start_exons, 
					  const vector<vector<int> >& end_exons, 
					  const ShortReadGroup& short_reads,
					  vector<int>& solution)
{
	if (set_sizes.size() < mPartitionSize)
		BestCombination(set_sizes, valid_isoforms, b_known_isoforms, start_exons, end_exons, short_reads, true, solution);
	else
		PartitionAndSearch(set_sizes, valid_isoforms, b_known_isoforms, start_exons, end_exons, short_reads, solution);
		
	return;
}


//--------------------------------------------------------------------------------------
//       Class:  IsoInfer
//      Method:  PartitionAndSearch
// Description:  Partition the whole instance into sub instances and combine the result.
//  Parameters:  
//               
//  	return:
//--------------------------------------------------------------------------------------
void
IsoInfer::PartitionAndSearch (  const vector<int>& set_sizes, 
								const vector<vector<bool> >& valid_isoforms,	
								const vector<bool>& b_known_isoforms,
								const vector<vector<int> >& start_exons, 
								const vector<vector<int> >& end_exons, 
								const ShortReadGroup& short_reads,
								vector<int>& solution)
{
	vector<vector<bool> > valid_isoforms_filtered = valid_isoforms;

	vector<vector<int> > sc_sets_init;
	int largest_ele = ConstructSetCover(set_sizes, valid_isoforms, start_exons, end_exons, short_reads, true, sc_sets_init);

	vector<int> ele_covered_times;
	ele_covered_times.assign(largest_ele+1, 0);
	for (unsigned i = 0; i < sc_sets_init.size(); i++)
		for (unsigned j = 0; j < sc_sets_init[i].size(); j++)
			ele_covered_times[sc_sets_init[i][j]]++;

	vector<bool> removed_isoforms;
	removed_isoforms.assign(valid_isoforms.size(), false);

	vector<double> valid_isoforms_priority;
	valid_isoforms_priority.assign(valid_isoforms_filtered.size(), 0);

	int start_part_size = mPartitionSize-5;
	if (start_part_size < 5) start_part_size = 5;
	int end_part_size = mPartitionSize;
	if (end_part_size <= set_sizes.size()) end_part_size = set_sizes.size()-1;
	for (int part_size = start_part_size; part_size <= mPartitionSize; part_size++)
	{
		valid_isoforms_priority.assign(valid_isoforms_filtered.size(), 0);

		for (unsigned pro_start = 0; pro_start <= set_sizes.size() - part_size; pro_start++)
		{
			vector<int> set_sizes_pro;
			vector<vector<bool> > valid_isoforms_pro;
			ShortReadGroup short_reads_pro;
			vector<vector<int> > valid_isoform_group;

			vector<int> sub_set;
			sub_set.resize(part_size);
			for (int i = 0; i < part_size; i++)
				sub_set[i] = pro_start + i;

			Project(set_sizes, set_sizes_pro, short_reads, short_reads_pro, sub_set);

			// For valid_isoforms_pro and valid_isoform_group
			Project(valid_isoforms_filtered, valid_isoforms_pro, sub_set, valid_isoform_group);
			vector<bool> b_known_isoforms_pro;
			b_known_isoforms_pro.assign(valid_isoform_group.size(), false);
			for (unsigned i = 0; i < valid_isoform_group.size(); i++)
				for (unsigned j = 0; j < valid_isoform_group[i].size(); j++)
					b_known_isoforms_pro[i] = b_known_isoforms_pro[i] | b_known_isoforms[valid_isoform_group[i][j]];
					
			vector<vector<int> > empty_vec;

			vector<int> curr_solution;
			BestCombination(set_sizes_pro, valid_isoforms_pro, b_known_isoforms_pro, 
					empty_vec, empty_vec, short_reads_pro, false, curr_solution);

			for (unsigned i = 0; i < curr_solution.size(); i++)
			{
				int curr_group = curr_solution[i];
				int curr_group_size = valid_isoform_group[curr_group].size();
				for (unsigned j = 0; j < valid_isoform_group[curr_group].size(); j++)
				{
					int curr_iso = valid_isoform_group[curr_group][j];
					valid_isoforms_priority[curr_iso] += 1.0 / curr_group_size;
					
					// Make sure that known isoforms will not be removed
					if (b_known_isoforms[curr_iso]) valid_isoforms_priority[curr_iso] += 2;
				}
			}
		}

		double min_prio = 1;

		// Remove low priority valid isoforms
		vector<int> remove_candidate;
		vector<double> remove_candidate_prio;
		for (unsigned i = 0; i < valid_isoforms_filtered.size(); i++)
			if (!removed_isoforms[i] && valid_isoforms_priority[i] < min_prio)
			{
				remove_candidate.push_back(i);
				remove_candidate_prio.push_back(valid_isoforms_priority[i]);
			}
		vector<int> sortedIdx;
		UtilityTempComp<double, less<double> >::Sort(remove_candidate_prio, sortedIdx);
		UtilityTemp<int>::SortByIndex(remove_candidate, sortedIdx);

		int cnt = 0;
		for (unsigned i = 0; i < remove_candidate.size(); i++)
		{
			int curr_set = remove_candidate[i];
			// If current set does not make the original set cover infeasible, remove it.
			// otherwise, keep it.
			bool b_remove = true;
			for (unsigned j = 0; j < sc_sets_init[curr_set].size(); j++)
				if (ele_covered_times[sc_sets_init[curr_set][j]] <= 1)
				{
					b_remove = false;
					break;
				}

			if (b_remove)
			{
				cnt++;
				for (unsigned j = 0; j < sc_sets_init[curr_set].size(); j++)
					ele_covered_times[sc_sets_init[curr_set][j]]--;
				valid_isoforms_filtered[curr_set].assign(valid_isoforms_filtered[curr_set].size(), false);
				removed_isoforms[curr_set] = true;
			}
		}

		cout << cnt << " valid_isoforms are filtered out" << endl;
	}

	vector<vector<int> > sc_sets_final;
	vector<double> sc_set_weight_final;

	vector<int> idx_map;
	idx_map.resize(valid_isoforms_filtered.size());
	int cnt = 0;
	// Remove isoforms that have been filtered out.
	for (unsigned i = 0; i < valid_isoforms_filtered.size(); i++)
	{
		if (!removed_isoforms[i])
		{
			idx_map[cnt] = i;
			valid_isoforms_filtered[cnt] = valid_isoforms_filtered[i];
			mWeight[cnt] = mWeight[i];
			valid_isoforms_priority[cnt] = valid_isoforms_priority[i];
			cnt++;
		}
	}
	valid_isoforms_filtered.resize(cnt);

	largest_ele = ConstructSetCover(set_sizes, valid_isoforms_filtered, start_exons, end_exons, short_reads, true, sc_sets_final);

	sc_set_weight_final.resize(mWeight.size());
	for (unsigned i = 0; i < mWeight.size(); i++)
		if (valid_isoforms_priority[i] != 0)
			sc_set_weight_final[i] = mWeight[i] / valid_isoforms_priority[i];
		else
			sc_set_weight_final[i] = 1000;  // In this case, valid_isoforms_filtered[i] will be empty.
	SolveSetCover(largest_ele, sc_set_weight_final, sc_sets_final, solution);

	for (unsigned i = 0; i < solution.size(); i++)
		solution[i] = idx_map[solution[i]];
}

//--------------------------------------------------------------------------------------
//       Class:  IsoInfer
//      Method:  Project
// Description:  Project the valid isoforms to a sub set of exons.
//  Parameters:  The exons are [pro_start, pro_end)
//               valid_isoform_group   :   The isoforms are grouped by the projection
//  	return:
//--------------------------------------------------------------------------------------
void
IsoInfer::Project(const vector<vector<bool> >& isoforms, vector<vector<bool> >& isoforms_pro,
					const vector<int>& sub_set, vector<vector<int> >& isoform_group)
{
	isoforms_pro.clear();
	isoform_group.clear();
	int pro_size = sub_set.size();
	for (unsigned i = 0; i < isoforms.size(); i++)
	{
		vector<bool> iso_pro;
		iso_pro.resize(pro_size);
		for (int j = 0; j < sub_set.size(); j++)
			iso_pro[j] = isoforms[i][sub_set[j]];

		bool b_exist = false;
		unsigned idx = 0;
		for (idx = 0; idx < isoforms_pro.size(); idx++)
			if (iso_pro == isoforms_pro[idx])
			{
				b_exist = true;
				break;
			}

		if (b_exist)
			isoform_group[idx].push_back(i);
		else
		{
			vector<int> a_group;
			a_group.push_back(i);
			isoform_group.push_back(a_group);
			isoforms_pro.push_back(iso_pro);
		}
	}
}

//--------------------------------------------------------------------------------------
//       Class:  IsoInfer
//      Method:  Project
// Description:  Project the valid isoforms and related information to a sub set of exons.
//  Parameters:  The indexes of exons are sub_set
//               valid_isoform_group   :   The valid isoforms are grouped by the projection
//  	return:
//--------------------------------------------------------------------------------------
void
IsoInfer::Project(const vector<int>& set_sizes, vector<int>& set_sizes_pro,
					const ShortReadGroup& short_reads, ShortReadGroup& short_reads_pro,
					const vector<int>& sub_set)
{
	vector<int> sorted_sub_set;
	sorted_sub_set = sub_set;
	sort(sorted_sub_set.begin(), sorted_sub_set.end());

	vector<int> mapping;
	mapping.assign(set_sizes.size(), -1);

	int pro_size = sub_set.size();
	set_sizes_pro.resize(pro_size);
	for (int i = 0; i < sub_set.size(); i++)
	{
		int idx = sub_set[i];
		set_sizes_pro[i] = set_sizes[idx];
		mapping[sub_set[i]] = i;
	}

	short_reads_pro = short_reads;
	short_reads_pro.Shrink(mapping);
}

//--------------------------------------------------------------------------------------
//       Class:  IsoInfer
//      Method:  SolveSetCover
// Description:  This method solve a set cover problem. 
//  Parameters:  sc_set_weight  : The weight of each set. 
//               sc_sets  :  Each element of the vector would be a set of the set cover
//                   instance. There are "valid_isoforms.size()" sets of the instance.
//                   The elements are [0, largest_ele]
//               solution :  The solution of the problem. This array contains all the 
//                   selected sets.
//      return:  The optimized objective value
//--------------------------------------------------------------------------------------
/*virtual*/
double
IsoInfer::SolveSetCover (int largest_ele,
                           const vector<double>& sc_set_weight, 
						   const vector<vector<int> >& sc_sets,
						   vector<int>& solution)
{
//	// debug : output the problem
//	for (unsigned i = 0; i < sc_sets.size(); i++)
//	{
//		cout << i << "  :  " << sc_set_weight[i] << "  :  ";
//		for (unsigned j = 0; j < sc_sets[i].size(); j++)
//			cout << sc_sets[i][j] << ",";
//		cout << endl;
//	}
	vector<bool> is_measure_covered;
	is_measure_covered.assign(largest_ele+1, false);
	for (unsigned i = 0; i < sc_sets.size(); i++)
		for (unsigned j = 0; j < sc_sets[i].size(); j++)
			is_measure_covered[sc_sets[i][j]] = true;
	for (unsigned i = 0; i < is_measure_covered.size(); i++)
		if (!is_measure_covered[i])
		{
			solution.resize(sc_sets.size());
			for (unsigned i = 0; i < solution.size(); i++)
				solution[i] = i;
			cerr << "WARNING : This set cover problem is not feasible. largest_ele = " << largest_ele << endl;
			return 0;
		}

	glp_prob *blp;
	int row_cnt = largest_ele + 1;
	int col_cnt = sc_sets.size();
	int ele_cnt = row_cnt * col_cnt;
	int* im = new int[ele_cnt + 1];
	int* jm = new int[ele_cnt + 1];
	double* ar = new double[ele_cnt + 1];

	blp = glp_create_prob();
	//glp_term_out(false);
	glp_set_obj_dir(blp, GLP_MIN);
	glp_add_rows(blp, row_cnt);
	glp_add_cols(blp, col_cnt);

	// set row bound such that each element will be covered at least once.
	for (int i = 0; i < row_cnt; i++)
		glp_set_row_bnds(blp, i+1, GLP_LO, 1, 0.0);

	// set binary kind of all the variables
	for (int i = 0; i < col_cnt; i++)
		glp_set_col_kind(blp, i+1, GLP_BV);

	// set index and coefficients
	for (int i = 0; i < ele_cnt; i++)
	{
		im[i+1] = i / col_cnt + 1;
		jm[i+1] = i % col_cnt + 1;
		ar[i+1] = 0;
	}
	for (unsigned i = 0; i < sc_sets.size(); i++)
		for (unsigned j = 0; j < sc_sets[i].size(); j++)
		{
			int idx = sc_sets[i][j] * col_cnt + i + 1;
			im[idx] = sc_sets[i][j] + 1;
			jm[idx] = i + 1;
			ar[idx] = 1;
		}
	// set coefficients for the objective function
	for (int i = 0; i < col_cnt; i++)
		glp_set_obj_coef(blp, i + 1, sc_set_weight[i]);

	glp_load_matrix(blp, ele_cnt, im, jm, ar);

	glp_iocp parm;
	glp_init_iocp(&parm);
	parm.presolve = GLP_ON;
	glp_intopt(blp, &parm);

	double obj = glp_mip_obj_val(blp);

	// solution:
	for (int i = 0; i < col_cnt; i++)
		if (glp_mip_col_val(blp, i+1) > 0.5) solution.push_back(i);

	glp_delete_prob(blp);
	delete[] im;
	delete[] jm;
	delete[] ar;
	
	return obj;
}

//--------------------------------------------------------------------------------------
//       Class:  IsoInfer
//      Method:  StartEndInIsoform
// Description:  Given isoforms, and start-end pairs, find which start-end pair is covered
//               by each isoform.
//  Parameters:  start_end_pair_in_isoform.size() = 0 if the corresponding isoform is empty
//      return:  How many start-end exon pairs in all the isoforms.
//--------------------------------------------------------------------------------------
int
IsoInfer::StartEndInIsoform (const vector<vector<bool> >& isoforms,
							   const vector<vector<int> >& start_exons, 
							   const vector<vector<int> >& end_exons, 
							   vector<vector<int> >& start_end_pair_in_isoform)
{
	if (isoforms.size() == 0) return 0;

	vector<int> start_exons_in_iso;
	vector<int> end_exons_in_iso;

	ExtractStartEndExons(isoforms, start_exons_in_iso, end_exons_in_iso);

	set<int> all_start_exons;
	set<int> all_end_exons;

	for (unsigned i = 0; i < start_exons.size(); i++)
	{
		for (unsigned j = 0; j < start_exons[i].size(); j++)
			all_start_exons.insert(start_exons[i][j]);
		for (unsigned j = 0; j < end_exons[i].size(); j++)
			all_end_exons.insert(end_exons[i][j]);
	}

	int exon_cnt = isoforms[0].size();

	start_end_pair_in_isoform.clear();
	start_end_pair_in_isoform.resize(isoforms.size());

	for (unsigned i = 0; i < isoforms.size(); i++)
	{
		vector<int>& av = start_end_pair_in_isoform[i];

		av.push_back(start_exons_in_iso[i]);
		av.push_back(end_exons_in_iso[i]);

		for (unsigned j = 0; j < start_exons.size(); j++)
		{
			bool b_succ = false;
			for (unsigned k = 0; k < start_exons[j].size(); k++)
				if (start_exons[j][k] == start_exons_in_iso[i])
				{
					b_succ = true;
					break;
				}
			if (!b_succ) continue;

			for (unsigned k = 0; k < end_exons[j].size(); k++)
				if (end_exons[j][k] == end_exons_in_iso[i])
				{
					b_succ = true;
					break;
				}
			if (!b_succ) continue;
			av.push_back(exon_cnt + j);
		}
	}

	vector<int> b_visited;
	b_visited.assign(exon_cnt + start_exons.size(), false);
	for (unsigned i = 0; i < start_end_pair_in_isoform.size(); i++)
		for (unsigned j = 0; j < start_end_pair_in_isoform[i].size(); j++)
			b_visited[start_end_pair_in_isoform[i][j]] = true;

	vector<int> map_to_flax_idx;
	map_to_flax_idx.resize(b_visited.size());
	int cnt = 0;
	for (unsigned i = 0; i < b_visited.size(); i++)
		if (b_visited[i])
			map_to_flax_idx[i] = cnt++;
	for (unsigned i = 0; i < start_end_pair_in_isoform.size(); i++)
		for (unsigned j = 0; j < start_end_pair_in_isoform[i].size(); j++)
			start_end_pair_in_isoform[i][j] = map_to_flax_idx[start_end_pair_in_isoform[i][j]];

	return cnt;
}

//--------------------------------------------------------------------------------------
//       Class:  IsoInfer
//      Method:  ConstructSetCover
// Description:  Given the valid isoforms, the start, end exons and the part_comb, 
//               construct a set cover problem instance.
//               
//  Parameters:  b_enable_pe  :  Whether enable start-end exons or not.
//      return:  The largest element in the base set. The base set is [0, largest_ele].
//--------------------------------------------------------------------------------------
int
IsoInfer::ConstructSetCover ( const vector<int>& set_sizes, 
								const vector<vector<bool> >& isoforms,
							    const vector<vector<int> >& start_exons, 
							    const vector<vector<int> >& end_exons, 
								const ShortReadGroup& short_reads,
								bool b_enable_se,
								vector<vector<int> >& sc_sets)
{
	vector<double> measure_read;
	vector<vector<double> > virtual_len_matrix;
	short_reads.ContructMeasures(isoforms, set_sizes, measure_read, virtual_len_matrix);

	vector<vector<int> >& measure_in_isoform = sc_sets;
	vector<vector<double> > measure_virtual_length;

	measure_in_isoform.resize(isoforms.size());
	measure_virtual_length.resize(isoforms.size());
	for (unsigned i = 0; i < measure_read.size(); ++i)
		for (unsigned j = 0; j < isoforms.size(); ++j)
		{
			if (virtual_len_matrix[i][j] > 0)
			{
				measure_in_isoform[j].push_back(i);
				measure_virtual_length[j].push_back(virtual_len_matrix[i][j]);
			}
		}
	
	if (b_enable_se)
	{
		vector<vector<int> > start_end_in_isoform;
		vector<vector<double> > start_end_virtual_length;
		vector<double> start_end_read;

		int se_cnt = StartEndInIsoform (isoforms, start_exons, end_exons, start_end_in_isoform);
		start_end_virtual_length.resize(isoforms.size());
		for (unsigned i = 0; i < start_end_virtual_length.size(); i++)
			start_end_virtual_length[i].assign(start_end_in_isoform[i].size(), 1);
		start_end_read.assign(se_cnt, 1);

		CombineMeasure(measure_in_isoform, measure_virtual_length, measure_read,
				       start_end_in_isoform, start_end_virtual_length, start_end_read);
	}

	return measure_read.size()-1;
}

//--------------------------------------------------------------------------------------
//       Class:  IsoInfer
//      Method:  CombineMeasure
// Description:  Combine the second set of measures into the first set of measures.
//               
//  Parameters:  
//--------------------------------------------------------------------------------------
void
IsoInfer::CombineMeasure(vector<vector<int> >& first_measure_in_isoform,
							vector<vector<double> >& first_measure_virtual_length,
							vector<double>& first_measure_read,
							const vector<vector<int> >& second_measure_in_isoform,
							const vector<vector<double> >& second_measure_virtual_length,
							const vector<double>& second_measure_read)
{
	unsigned base = first_measure_read.size();
	for (unsigned i = 0; i < second_measure_in_isoform.size(); i++)
	{
		for (unsigned j = 0; j < second_measure_in_isoform[i].size(); j++)
		{
			first_measure_in_isoform[i].push_back(base + second_measure_in_isoform[i][j]);
			first_measure_virtual_length[i].push_back(base + second_measure_virtual_length[i][j]);
		}
	}

	for (unsigned i = 0; i < second_measure_read.size(); i++)
		first_measure_read.push_back(second_measure_read[i]);
}


/*virtual*/
void
IsoInfer::CleanUp()
{
}

/*
 *--------------------------------------------------------------------------------------
 *        Class:  IsoInfer
 *       Method:  EnumerateValidByExpLevel
 *  Description:  Enumerate all the valid isoforms according to the junc reads and put the 
 *                result in valid_isoforms. The valid isoforms are enumerated in the order
 *                of the minimum number of junction read in isoforms.
 *        Param:  
 *       Return:  The isoforms are enumerated in the decreasing order of expression levels
 *--------------------------------------------------------------------------------------
 */
int
IsoInfer::EnumerateValidByExpLevel(const vector<vector<int> >& start_exons, 
								   const vector<vector<int> >& end_exons, 
								   const vector<int>& set_sizes,
								   const ShortReadGroup& short_reads,
								   const vector<vector<bool> >& b_adjacent,
								   vector<vector<bool> >& valid_isoforms,
								   vector<int>& valid_isoform_order)
{
	vector<vector<double> > junc_cnt;
	short_reads.JuncCnt(set_sizes.size(), junc_cnt);
	AddAdjacentJuncRead(b_adjacent, junc_cnt);

#ifdef DEBUG1
	cout << __func__  << "Junc cnts" << endl;
	for (unsigned i = 0; i < junc_cnt.size(); ++i)
	{
		for (unsigned j = 0; j < junc_cnt.size(); ++j)
			cout << junc_cnt[i][j] << "\t";
		cout << endl;
	}
#endif

	GraphEx<int> valid_graph;
	int source;
	int sink;
	BuildSpliceGraph(valid_graph, junc_cnt, start_exons, end_exons, source, sink);

	// Enumerate in the order of the minimum number of junction reads in isoforms.
	vector<double> sorted_junc_cnt;
	vector<int> sorted_edge_id;
	for (unsigned i = 0; i < junc_cnt.size(); i++)
		for (unsigned j = i+1; j < junc_cnt.size(); j++)
		{
			int edge_id = valid_graph.EdgeID(valid_graph.GetNodeInID(i), valid_graph.GetNodeInID(j));
			if (-1 == edge_id) continue;
			sorted_junc_cnt.push_back(junc_cnt[i][j]);
			sorted_edge_id.push_back(edge_id);
		}

	vector<int> sortedIdx;
	UtilityTempComp<double>::Sort(sorted_junc_cnt, sortedIdx);
	UtilityTemp<int>::SortByIndex(sorted_edge_id, sortedIdx);

	vector<int> grouped_edge_id;
	for (unsigned i = 0; i < sorted_edge_id.size(); i++)
		if (0 == i || sorted_junc_cnt[i] != sorted_junc_cnt[i-1])
			grouped_edge_id.push_back(i);
	grouped_edge_id.push_back(sorted_edge_id.size());

	int tot_node_cnt = valid_graph.NodeCnt(false);
	int tot_edge_cnt = valid_graph.EdgeCnt(false);
	bool* b_node_visited = new bool[tot_node_cnt];
	int* visited_nodes = new int[tot_node_cnt];
	int* visited_edges = new int[tot_edge_cnt];

	int order = 1;
	for (unsigned g = 0; g < grouped_edge_id.size()-1; g++)
	{
		// In case of isoforms containing at leas two exons
		for (int i = grouped_edge_id[g]; i < grouped_edge_id[g+1]; i++)
		{
			// Make sure that the isoforms enumerated in this round contains edge sorted_edge_id[i]
			int curr_edge_id = sorted_edge_id[i];

			// Mask all the edges with orders after the current edge
			vector<int> masked_edge;
			masked_edge.resize(sorted_edge_id.size()-i-1);
			for (int j = 0; j + i + 1 < sorted_edge_id.size(); j++)
				masked_edge[j] = sorted_edge_id[j+i+1];

			valid_graph.MaskAll(false);
			valid_graph.MaskEdge(masked_edge, true);

			// Find out all the edges that can be reached from the end point
			// of the current edge and Find out all the edges that can be reached reversely
			// from the start point of the current edge.
			for (int i = 0; i < tot_node_cnt; i++)
				b_node_visited[i] = false;
			int node_visited_cnt = 0;
			int edge_visited_cnt = 0;
			GraphAlgorithm_Basic::DFS(&valid_graph, valid_graph.ToNode(curr_edge_id), b_node_visited, 
									  visited_nodes, &node_visited_cnt, NULL, visited_edges, &edge_visited_cnt);

//			cout << "effective edges = " << edge_visited_cnt << "  " << endl;
//			for (int i = 0; i < edge_visited_cnt; i++)
//				cout << visited_edges[i] << "\t";
//			cout << endl;

			GraphAlgorithm_Basic::DFSReverse(&valid_graph, valid_graph.FromNode(curr_edge_id), b_node_visited, 
									  visited_nodes, &node_visited_cnt, NULL, visited_edges, &edge_visited_cnt);
			visited_edges[edge_visited_cnt++] = curr_edge_id;

//			cout << "effective edges = " << edge_visited_cnt << "  " << endl;
//			for (int i = 0; i < edge_visited_cnt; i++)
//				cout << visited_edges[i] << "\t";
//			cout << endl;

			// Keep all the visited edges and current edge
			valid_graph.MaskAll(true);
			valid_graph.MaskNode(visited_nodes, node_visited_cnt, false, false);
			valid_graph.MaskEdge(visited_edges, edge_visited_cnt, false);


			EnumerateValid(valid_graph, source, sink, start_exons, end_exons, set_sizes, short_reads, valid_isoforms);
		}

		unsigned old_size = valid_isoform_order.size();
		order++;
		valid_isoform_order.resize(valid_isoforms.size());
		for (unsigned j = old_size; j < valid_isoforms.size(); j++)
			valid_isoform_order[j] = order;
		cout << "valid_isoforms.size() = " << valid_isoforms.size() 
					 << " min junction read = " << sorted_junc_cnt[grouped_edge_id[g]] << endl;
		if (valid_isoforms.size() > mMaxValidIsoformCnt) 
		{
			valid_isoforms.resize(mMaxValidIsoformCnt);
			mOmittedJunctionCnt = sorted_junc_cnt[grouped_edge_id[g]];
			cout << "Part of junctions with reads less than " << mOmittedJunctionCnt << " are omitted" << endl;
			break;
		}
	}

	delete[] b_node_visited;
	delete[] visited_edges;

	unsigned old_size = valid_isoform_order.size();
	// In case of isoforms containing a single exon
	valid_graph.MaskAllEdges(false);
	valid_graph.MaskEdge(sorted_edge_id, true);
	EnumerateValid(valid_graph, source, sink, start_exons, end_exons, set_sizes, short_reads, valid_isoforms);

	valid_isoform_order.resize(valid_isoforms.size());
	for (unsigned i = old_size; i < valid_isoforms.size(); i++)
		valid_isoform_order[i] = order;

	return valid_isoforms.size();
}		/* -----  end of method IsoInfer::EnumerateValidByExpLevel  ----- */

/*
 *--------------------------------------------------------------------------------------
 *        Class:  IsoInfer
 *       Method:  EnumerateValid
 *  Description:  Enumerate all the valid seeds according to the junc reads
 *        Param:  node   :   The internal ID of current node
 *                sink   :   The internal ID of the sink
 *       Return:  the last elements of existing_isoform will be the seed
 *--------------------------------------------------------------------------------------
 */
void
IsoInfer::EnumerateValid( GraphEx<int>& valid_graph, 
							int source, int sink, 
							const vector<vector<int> >& start_exons, 
							const vector<vector<int> >& end_exons, 
							const vector<int>& set_sizes,
							const ShortReadGroup& short_reads,
							vector<vector<bool> > & valid_isoforms)
{
	// Maybe some edges related to the start or end exons are masked
	vector<vector<int> > effective_start_exons_in_id;
	vector<vector<int> > effective_end_exons_in_id;
	for (unsigned i = 0; i < start_exons.size(); i++)
	{
		vector<int> eff_start;
		for (unsigned j = 0; j < start_exons[i].size(); j++)
		{
			int in_id = valid_graph.GetNodeInID(start_exons[i][j]);
			if (valid_graph.InDegree(in_id) > 0)
				eff_start.push_back(in_id);
		}
		vector<int> eff_end;
		for (unsigned j = 0; j < end_exons[i].size(); j++)
		{
			int in_id = valid_graph.GetNodeInID(end_exons[i][j]);
			if (valid_graph.OutDegree(in_id) > 0)
				eff_end.push_back(in_id);
		}
		if (eff_start.size() != 0 && eff_end.size() != 0)
		{
			effective_start_exons_in_id.push_back(eff_start);
			effective_end_exons_in_id.push_back(eff_end);
		}
	}

	int tot_node_cnt = valid_graph.NodeCnt(false);
	bool* b_node_visited = new bool[tot_node_cnt];
	int* visited_nodes = new int[tot_node_cnt];

	vector<int> init_unmasked_edges;
	vector<int> init_unmasked_nodes;
	const int* edges = valid_graph.Edges();
	for (int i = 0; i < valid_graph.EdgeCnt(); i++)
		init_unmasked_edges.push_back(edges[i]);
	const int* nodes = valid_graph.Nodes();
	for (int i = 0; i < valid_graph.NodeCnt(); i++)
		init_unmasked_nodes.push_back(nodes[i]);

	// For each start-end exon pair, mask all the edges that would not lead to
	for (unsigned i = 0; i < effective_start_exons_in_id.size(); i++)
	{
		//cout << effective_start_exons_in_id[i][0] << " , " << effective_end_exons_in_id[i][0] << endl;
		set<int> starts;
		for (unsigned j = 0; j < effective_start_exons_in_id[i].size(); j++)
			starts.insert(effective_start_exons_in_id[i][j]);
		set<int> ends;
		for (unsigned j = 0; j < effective_end_exons_in_id[i].size(); j++)
			ends.insert(effective_end_exons_in_id[i][j]);

		set<int> masked_edges;
		// Disconnect all other starts from the source and ends from the sink
		const int* out_edges = valid_graph.OutEdges(source);
		for (int j = 0; j < valid_graph.OutDegree(source); j++)
			if (starts.find(valid_graph.ToNode(out_edges[j])) == starts.end())
				masked_edges.insert(out_edges[j]);

		const int* in_edges = valid_graph.InEdges(sink);
		for (int j = 0; j < valid_graph.InDegree(sink); j++)
			if (ends.find(valid_graph.FromNode(in_edges[j])) == ends.end())
				masked_edges.insert(in_edges[j]);

		vector<int> kept_edges;
		for (unsigned j = 0; j < init_unmasked_edges.size(); j++)
			if (masked_edges.find(init_unmasked_edges[j]) == masked_edges.end())
				kept_edges.push_back(init_unmasked_edges[j]);

//		cout << "Kept edges  = " << kept_edges.size()  << "  " << endl;
//		for (int j = 0; j < kept_edges.size(); j++)
//			cout << kept_edges[j] << ",";
//		cout << endl;

		vector<int> removed_edges;
		removed_edges.assign(masked_edges.begin(), masked_edges.end());
		valid_graph.MaskEdge(removed_edges, true);

		//cout << "Unmasked node 1 = " << valid_graph.NodeCnt() << endl;
		//cout << "Unmasked edge 1 = " << valid_graph.EdgeCnt() << endl;

		// Find out reachable nodes from the source
		for (int j = 0; j < tot_node_cnt; j++)
			b_node_visited[j] = false;

		int node_visited_cnt = 0;
		int edge_visited_cnt = 0;
		GraphAlgorithm_Basic::DFS(&valid_graph, source, b_node_visited, visited_nodes, 
								  &node_visited_cnt, NULL, NULL, &edge_visited_cnt);

		//cout << "Reachable from source : ";
		set<int> reachable_nodes_from_source;
		for (int j = 0; j < node_visited_cnt; j++)
		{
			//cout << visited_nodes[j] << ",";
			reachable_nodes_from_source.insert(visited_nodes[j]);
		}
		//cout << endl;

		// Find out nodes which can get to the sink
		for (int j = 0; j < tot_node_cnt; j++)
			b_node_visited[j] = false;
		node_visited_cnt = 0;
		edge_visited_cnt = 0;
		GraphAlgorithm_Basic::DFSReverse(&valid_graph, sink, b_node_visited, visited_nodes, 
										 &node_visited_cnt, NULL, NULL, &edge_visited_cnt);

		//cout << "Reachable to sink : ";
		vector<int> good_nodes;
		for (int j = 0; j < node_visited_cnt; j++)
		{
			//cout << visited_nodes[j] << ",";
			if (reachable_nodes_from_source.find(visited_nodes[j]) != reachable_nodes_from_source.end())
				good_nodes.push_back(visited_nodes[j]);
		}
		//cout << endl;

		//cout << " Total " << good_nodes.size() << " good nodes" << endl;

		valid_graph.MaskAll(true);
		valid_graph.MaskNode(good_nodes, false, false);
		valid_graph.MaskEdge(kept_edges, false);

		vector<bool> b_visited;
		b_visited.assign(set_sizes.size() + 2, false);

		vector<bool> an_iso;
		an_iso.assign(set_sizes.size(), false);

		EnumerateValidDFS(valid_graph, source, sink, set_sizes, short_reads, b_visited, an_iso, valid_isoforms);

		if (valid_isoforms.size() > mMaxValidIsoformCnt)
			break;

		valid_graph.MaskAll(true);
		valid_graph.MaskNode(init_unmasked_nodes, false);
		valid_graph.MaskEdge(init_unmasked_edges, false);
	}

	delete[] b_node_visited;
	delete[] visited_nodes;
}

/*
 *--------------------------------------------------------------------------------------
 *        Class:  IsoInfer
 *       Method:  EnumerateValid
 *  Description:  Enumerate all the valid seeds according to the junc reads
 *        Param:  node   :   The internal ID of current node
 *                sink   :   The internal ID of the sink
 *       Return:  
 *--------------------------------------------------------------------------------------
 */
void
IsoInfer::EnumerateValidDFS(GraphEx<int>& valid_graph, 
							int node, int sink, 
							const vector<int>& set_sizes,
							const ShortReadGroup& short_reads,
							vector<bool>& b_visited, 
							vector<bool>& an_iso,
							vector<vector<bool> > & valid_isoforms)
{
	if (node == sink)
	{
		valid_isoforms.push_back(an_iso);
		return;
	}

	b_visited[node] = true;
	int curr_exon = valid_graph.GetNodeExID(node);
	if (curr_exon >= 0 && curr_exon < an_iso.size())  // not source
		an_iso[curr_exon] = true;

	bool b_pass = true;

	// Check the partial iso
	if (curr_exon >= 0)
		b_pass = short_reads.IsCurrentSegValid(an_iso, set_sizes, curr_exon);

	if (b_pass)
	{
		const int* out_edges = valid_graph.OutEdges(node);
		for (int i = 0; i < valid_graph.OutDegree(node); i++)
		{
			int to_node = valid_graph.ToNode(out_edges[i]);
			EnumerateValidDFS(valid_graph, to_node, sink, set_sizes, short_reads, b_visited, an_iso, valid_isoforms);
			if (valid_isoforms.size() > mMaxValidIsoformCnt) 
				break;
		}
	}

	b_visited[node] = false;
	if (curr_exon >= 0 && curr_exon < an_iso.size())  // not source
		an_iso[curr_exon] = false;
}		/* -----  end of method IsoInfer::EnumerateValidDFS  ----- */

//--------------------------------------------------------------------------------------
//       Class:  IsoInfer
//      Method:  BestCombination
// Description:  If the number of valid isoforms is small (less than 20), this method
//               enumerates all possible combinations and find the best one. This method
//               also needs the constructed set cover problem.
//  Parameters:  largest_ele : The largest element of the set cover instance.
//--------------------------------------------------------------------------------------
	void
IsoInfer::BestCombination ( const vector<int>& set_sizes, 
							const vector<vector<bool> >& valid_isoforms,
							const vector<bool>& b_known_isoforms,
							const vector<vector<int> >& start_exons, 
							const vector<vector<int> >& end_exons, 
							const ShortReadGroup& short_reads,
							bool b_enable_se,
							vector<int>& solution)
{
	vector<vector<int> > sc_sets;
	int largest_ele = ConstructSetCover(set_sizes, valid_isoforms, start_exons, end_exons, short_reads, b_enable_se, sc_sets);

	if (valid_isoforms.size() > 20)
	{
		vector<double> sc_set_weight;
		sc_set_weight.assign(valid_isoforms.size(), 1);
		for (unsigned i = 0; i < valid_isoforms.size(); i++)
			if (b_known_isoforms[i]) sc_set_weight[i] = 0;

		SolveSetCover(largest_ele, sc_set_weight, sc_sets, solution);
		return;
	}

	//cout << "Find the best combination from " << valid_isoforms.size() << " isoforms." << endl;
	double best_obj = numeric_limits<double>::max();
	vector<int> best_solution;
	best_solution.resize(sc_sets.size());
	for (unsigned i = 0; i < best_solution.size(); i++)
		best_solution[i] = i;

	time_t start_time, end_time;
	time(&start_time);

	vector<vector<double> > measure_virtual_length;
	vector<double> measure_read;
	short_reads.ContructMeasures(valid_isoforms, set_sizes, measure_read, measure_virtual_length);

	vector<int> new_valid_isoforms_idx;
	vector<int> known_isoforms_idx;
	for (unsigned i = 0; i < valid_isoforms.size(); i++)
		if (b_known_isoforms[i]) known_isoforms_idx.push_back(i);
		else new_valid_isoforms_idx.push_back(i);

	int measure_cnt = measure_virtual_length.size();
	vector<int> selected_new_idx;
	vector<bool> is_measure_covered;
	bool b_break = false;
	// Enumerate all possible combinations
	for (unsigned iso_cnt = 0; iso_cnt <= new_valid_isoforms_idx.size(); iso_cnt++)
	{
		selected_new_idx.resize(iso_cnt+1);
		for (int i = 0; i <= iso_cnt; i++)
			selected_new_idx[i] = i;
		do
		{
			solution.clear();
			for (unsigned i = 0; i < known_isoforms_idx.size(); i++)
				solution.push_back(known_isoforms_idx[i]);
			for (int i = 0; i < iso_cnt; i++)
				solution.push_back(new_valid_isoforms_idx[selected_new_idx[i]]);

			is_measure_covered.assign(largest_ele+1, false);
			for (unsigned i = 0; i < solution.size(); i++)
			{
				int curr_iso = solution[i];
				for (unsigned j = 0; j < sc_sets[curr_iso].size(); j++)
					is_measure_covered[sc_sets[curr_iso][j]] = true;
			}
			bool b_feasible = true;
			for (unsigned i = 0; i < is_measure_covered.size(); i++)
				if (!is_measure_covered[i]) b_feasible = false;

			// If current combination is feasible.
			if (b_feasible)
			{
				vector<vector<double> > selected_measure_virtual_length;
				selected_measure_virtual_length.resize(measure_cnt);
				for (unsigned i = 0; i < measure_cnt; i++)
				{
					selected_measure_virtual_length[i].resize(iso_cnt);
					for (unsigned j = 0; j < iso_cnt; j++)
						selected_measure_virtual_length[i][j] = measure_virtual_length[i][solution[j]];
				}

				double obj = mpSolver->Solve(selected_measure_virtual_length, measure_read);
				double pvalue = mpSolver->ResultPvalue();

				if (best_obj > obj)
				{
					best_obj = obj;
					best_solution = solution;
				}

				time(&end_time);
				double time_diff = difftime(end_time, start_time);
				if (time_diff > 200) 
				{
					b_break = true;
					break;
				}

				if (pvalue > 0.05)
					b_break = true;
			}

		} while (Utility::NextCombination(selected_new_idx, valid_isoforms.size()));

		if (b_break) break;
	}

	solution = best_solution;
}


