// =====================================================================================
// 
//       Filename:  IsoInfer.h
// 
//    Description:  This is the header file for class IsoInfer
// 
//        Version:  1.0
//        Created:  06/08/2009 02:56:10 PM
//       Revision:  none
//       Compiler:  g++
// 
//         Author:  Jianxing Feng (feeldead), feeldead@gmail.com
//        Company:  THU
// 
// =====================================================================================

#ifndef IsoInfer_H
#define IsoInfer_H

#include "InstanceHandler.h"
#include "ExpEstimator.h"
#include "GraphEx.hpp"
#include "Exon.h"
#include "ShortReadGroup.hpp"

/*
 * =====================================================================================
 *        Class:  IsoInfer
 *  Description:  Provided with paired-end reads and single-end read, this class infers
 *                the isoforms.
 * =====================================================================================
 */
class IsoInfer : public InstanceHandler
{
	public:

		/* ====================  LIFECYCLE     ======================================= */
		IsoInfer (ExpEstimator* p_solver, ostream* p_output = NULL);		/* constructor      */
		virtual ~IsoInfer ();                          						    /* destructor       */

		void
		EnableSE(bool b_enable_start_end){mbEnableStartEnd = b_enable_start_end;};

		void
		EnableStepII(bool b_enable){mbEnableStepII = b_enable;};

		void
		SetMinEffectivePartCombDup(int min_dup){mMinEffectivePartCombDup = min_dup;};

		void
		SetPartitionSize(int part_size){mPartitionSize = part_size;}

		void
		SetConfidencelevel(double confi_level){mConfidenceLevel = confi_level;}

		void
		SetKnownIsoforms(const vector<vector<bool> >& known_isoforms){mKnownIsoforms = known_isoforms;}

		const vector<vector<bool> >&
		GetValidIsoforms(){return mValidIsoforms;}

		const vector<int>&
		GetSolution(){return mSolution;}

		virtual
		void
		Initialize();

		virtual
		void
		OnInstance(Instance& an_instance);

		void
		SetInstance(Instance* p_instance){mpInstance = p_instance;}

		virtual
		void
		CleanUp();

		void
		CalculateExonType( const vector<vector<int> >& start_exons, const vector<vector<int> >& end_exons, 
						   int exon_cnt, vector<int>& exon_type );

		void
		ConstructOptModel ( const vector<int>& set_sizes, 
							const vector<vector<bool> >& isoforms,
							const ShortReadGroup& short_reads,
							vector<vector<int> >& measure_in_isoform,
							vector<vector<double> >& measure_virtual_length,
							vector<double>& measure_read);

	protected:

		void
		AddAdjacentJuncRead(const vector<vector<bool> >& b_adjacent, vector<vector<double> >& junc_cnt);

		void
		OnSubInstance ( const vector<int>& set_sizes,
						const vector<vector<int> >& start_exons, 
						const vector<vector<int> >& end_exons,
						const vector<vector<bool> >& b_adjacent,
						const ShortReadGroup& short_reads,
						const vector<vector<bool> >& known_isoforms,
						vector<vector<bool> >& valid_isoforms_expanded,
						vector<int>& solution);

		void
		InferNew (const vector<int>& set_sizes, 
				  const vector<vector<bool> >& valid_isoforms,
				  const vector<bool>& b_known_isoforms,
				  const vector<vector<int> >& start_exons, 
				  const vector<vector<int> >& end_exons, 
				  const ShortReadGroup& short_reads,
				  vector<int>& solution);


		int
		ConstructSetCover ( const vector<int>& set_sizes, 
							const vector<vector<bool> >& isoforms,
							const vector<vector<int> >& start_exons, 
							const vector<vector<int> >& end_exons, 
							const ShortReadGroup& short_reads,
							bool b_enable_se,
							vector<vector<int> >& sc_sets);

		double
		SolveSetCover (int largest_ele,
				       const vector<double>& sc_set_weight, 
					   const vector<vector<int> >& sc_sets,
					   vector<int>& solution);


		int
		EnumerateValidByExpLevel(  const vector<vector<int> >& start_exons, 
								   const vector<vector<int> >& end_exons, 
								   const vector<int>& set_sizes,
								   const ShortReadGroup& short_reads,
								   const vector<vector<bool> >& b_adjacent,
								   vector<vector<bool> >& valid_isoforms,
								   vector<int>& valid_isoform_order);
		
		void
		EnumerateValid( GraphEx<int>& valid_graph, 
						int source, int sink, 
						const vector<vector<int> >& start_exons, 
						const vector<vector<int> >& end_exons, 
						const vector<int>& set_sizes,
					    const ShortReadGroup& short_reads,
						vector<vector<bool> > & valid_isoforms);
		void
		EnumerateValidDFS(GraphEx<int>& valid_graph, 
						int node, int sink, 
						const vector<int>& set_sizes,
					    const ShortReadGroup& short_reads,
						vector<bool>& b_visited, 
						vector<bool>& an_iso,
						vector<vector<bool> > & valid_isoforms);

		void
		BuildSpliceGraph (  GraphEx<int>& splice_graph, 
							const vector<vector<double> >& junc_cnt, 
							const vector<vector<int> >& start_exons, 
							const vector<vector<int> >& end_exons, 
							int& source, int& sink);

		int
		CalculateShrink ( const vector<vector<double> >& junc_cnt, const vector<vector<int> >& start_exons, 
					  	  const vector<vector<int> >& end_exons, vector<int>& shrink_map);

		int
		CalculateShrink ( const vector<vector<bool> >& isoforms, vector<int>& shrink_map);

		void
		ShrinkInstance (const vector<int>& shrink_map,
						const vector<int>& set_sizes, vector<int>& set_sizes_shrinked, 
						const vector<vector<bool> >& b_adjacent, vector<vector<bool> >& b_adjacent_shrinked,
						const ShortReadGroup& short_reads, ShortReadGroup& short_reads_shrinked,
						const vector<vector<int> >& start_exons, vector<vector<int> >& start_exons_shrinked, 
						const vector<vector<int> >& end_exons, vector<vector<int> >& end_exons_shrinked);

		void
		ShrinkIsoform( const vector<int>& shrink_map, const vector<vector<bool> >& isoforms, vector<vector<bool> >& isoforms_shrinked);

		void
		ExpandIsoforms ( const vector<int>& shrink_map, const vector<vector<bool> >& isoforms_shrinked, vector<vector<bool> >& isoforms_expanded );

		void
		BestCombination(const vector<int>& set_sizes, 
						const vector<vector<bool> >& valid_isoforms,
						const vector<bool>& b_known_isoforms,
						const vector<vector<int> >& start_exons, 
						const vector<vector<int> >& end_exons, 
						const ShortReadGroup& short_reads,
						bool b_enable_se,
						vector<int>& solution);
		
		void
		OutputPrediction(const vector<Exon>& exons, 
						 const vector<vector<bool> >& valid_isoforms, 
						 const vector<int>& solution);

		void
		PartitionAndSearch (const vector<int>& set_sizes, 
							const vector<vector<bool> >& valid_isoforms,
							const vector<bool>& b_known_isoforms,
							const vector<vector<int> >& start_exons, 
							const vector<vector<int> >& end_exons, 
							const ShortReadGroup& short_reads,
							vector<int>& solution);

		void
		ExtractStartEndExons(const vector<vector<bool> >& isoforms, vector<int>& start_exons, vector<int>& end_exons);

		void
		Project(const vector<vector<bool> >& isoforms, vector<vector<bool> >& isoforms_pro,
				const vector<int>& sub_set, vector<vector<int> >& isoform_group);

		void
		Project(const vector<int>& set_sizes, vector<int>& set_sizes_pro, 
				const ShortReadGroup& short_reads, ShortReadGroup& short_reads_pro,
				const vector<int>& sub_set);
		
		void
		CombineMeasure(vector<vector<int> >& first_measure_in_isoform,
						vector<vector<double> >& first_measure_virtual_length,
						vector<double>& first_measure_read,
						const vector<vector<int> >& second_measure_in_isoform,
						const vector<vector<double> >& second_measure_virtual_length,
						const vector<double>& second_measure_read);

		int
		StartEndInIsoform (const vector<vector<bool> >& isoforms, const vector<vector<int> >& start_exons, 
						   const vector<vector<int> >& end_exons, vector<vector<int> >& start_end_pair_in_isoform);

	private:
		vector<int> mShrinkExonMap;
		ExpEstimator* mpSolver;
		double mConfidenceLevel;
		double mOmittedJunctionCnt;

		bool mbEnableStartEnd;   // Whether enable the start and end pairs or not?
		bool mbEnableStepII;    // Whether execute the step II of the algorithm

		// Only partial combination with duplications not less than 
		// mMinEffectivePartCombDup will take effects in the algorithm.
		// This parameter will influence ConstructSetCover, IsPairSupported 
		// and IsTripleSupported.
		int mMinEffectivePartCombDup; 

		int mPartitionSize;      // Used by PartitionAndSearch
		int mMaxValidIsoformCnt; // Maximum number of allowed valid isoforms.

		vector<double> mWeight;

		vector<vector<bool> > mKnownIsoforms;
		vector<vector<bool> > mValidIsoforms;   // Always stores the valid isoforms of the latest instance.
		vector<int>           mSolution;        // Stores the indexes of mValidIsoforms.
}; /* -----  end of class IsoInfer  ----- */

#endif
