// =====================================================================================
// 
//       Filename:  IsoInferPE.h
// 
//    Description:  This is the header file for class IsoInferPE
// 
//        Version:  1.0
//        Created:  06/08/2009 02:56:10 PM
//       Revision:  none
//       Compiler:  g++
// 
//         Author:  Jianxing Feng (feeldead), feeldead@gmail.com
//        Company:  THU
// 
// =====================================================================================

#ifndef IsoInferPE_H
#define IsoInferPE_H

#include "InstanceHandler.h"
#include "LPsolver.h"
#include "GraphEx.hpp"
#include "Exon.h"

/*
 * =====================================================================================
 *        Class:  IsoInferPE
 *  Description:  Provided with paired-end reads and single-end read, this class infers
 *                the isoforms.
 * =====================================================================================
 */
class IsoInferPE : public InstanceHandler
{
	public:

		/* ====================  LIFECYCLE     ======================================= */
		IsoInferPE (LPsolver* p_solver, ostream* p_output = NULL);		/* constructor      */
		virtual ~IsoInferPE ();                          						    /* destructor       */

		void
		EnableSE(bool b_enable_start_end){mbEnableStartEnd = b_enable_start_end;};

		void
		EnablePE(bool b_enable_pe){mbEnablePE= b_enable_pe;};

		void
		EnableStepII(bool b_enable){mbEnableStepII = b_enable;};

		void
		SetMinEffectivePartCombDup(int min_dup){mMinEffectivePartCombDup = min_dup;};

		void
		SetMinExpLevel(double min_exp){mMinExpLevel = min_exp;}

		void
		SetPartitionSize(int part_size){mPartitionSize = part_size;}

		void
		SetConfidencelevel(double confi_level){mConfidenceLevel = confi_level;}

		void
		SetKnownIsoforms(const vector<vector<bool> >& known_isoforms){mKnownIsoforms = known_isoforms;}

		const vector<vector<bool> >&
		GetValidIsoforms(){return mValidIsoforms;}

		const vector<int>&
		GetSolution(){return mSolution;}

		virtual
		void
		Initialize();

		virtual
		void
		OnInstance(Instance& an_instance);

		void
		SetInstance(Instance* p_instance){mpInstance = p_instance;}

		virtual
		void
		CleanUp();

		void
		CalculateExonType( const vector<vector<int> >& start_exons, const vector<vector<int> >& end_exons, 
						   int exon_cnt, vector<int>& exon_type );
		void
		ConstructLPInstance(const vector<int>& set_sizes, 
							const vector<int>& exon_type,
							const vector<double>& start_from_cnt, 
							const vector<double>& end_at_cnt, 
							const vector<vector<double> >& junc_cnt, 
							const vector<vector<bool> >& isoforms,
							const vector<PEInfo>& pe_info,
							vector<vector<int> >& measure_in_isoform,
							vector<vector<double> >& measure_virtual_length,
							vector<double>& measure_read);


	protected:

		void
		OnSubInstance ( const vector<int>& set_sizes,
						const vector<double>& start_from_cnt,
						const vector<double>& end_at_cnt,
						const vector<vector<double> >& junc_cnt, 
						const vector<vector<int> >& start_exons, 
						const vector<vector<int> >& end_exons,
						const vector<PEInfo>& pe_info, 
						const vector<vector<bool> >& known_isoforms,
						vector<vector<bool> >& valid_isoforms_expanded,
						vector<int>& solution);

		void
		InferNew (const vector<int>& set_sizes, 
				  const vector<double>& start_from_cnt, 
				  const vector<double>& end_at_cnt, 
				  const vector<vector<double> >& junc_cnt, 
				  const vector<vector<bool> >& valid_isoforms,
				  const vector<bool>& b_known_isoforms,
				  const vector<vector<int> >& start_exons, 
				  const vector<vector<int> >& end_exons, 
				  const vector<PEInfo>& pe_info,
				  vector<int>& solution);

		int
		ConstructSetCover ( const vector<int>& set_sizes, 
							const vector<int>& exon_type,
							const vector<double>& start_from_cnt, 
							const vector<double>& end_at_cnt, 
							const vector<vector<double> >& junc_cnt, 
							const vector<vector<bool> >& isoforms,
							const vector<vector<int> >& start_exons, 
							const vector<vector<int> >& end_exons, 
							const vector<PEInfo>& pe_info,
							bool b_enable_se,
							vector<vector<int> >& sc_sets);

		double
		SolveSetCover (int largest_ele,
				       const vector<double>& sc_set_weight, 
					   const vector<vector<int> >& sc_sets,
					   vector<int>& solution);


		int
		EnumerateValidByExpLevel(const vector<vector<double> >& junc_cnt, 
					   const vector<vector<int> >& start_exons, 
					   const vector<vector<int> >& end_exons, 
					   const vector<int>& set_sizes,
					   const vector<PEInfo>& pe_info,
					   vector<vector<bool> >& valid_isoforms,
					   vector<int>& valid_isoform_order);
		
		void
		EnumerateValid( GraphEx<int>& valid_graph, 
						int source, int sink, 
						const vector<vector<int> >& start_exons, 
						const vector<vector<int> >& end_exons, 
						const vector<int>& set_sizes,
						const vector<PEInfo>& pe_info,
						vector<vector<bool> > & valid_isoforms);
		void
		EnumerateValidDFS(GraphEx<int>& valid_graph, 
						int node, int sink, 
						const vector<int>& set_sizes,
						const vector<PEInfo>& pe_info,
						vector<bool>& b_visited, 
						vector<bool>& an_iso,
						vector<vector<bool> > & valid_isoforms);

		bool
		CheckPEConsistency(const vector<bool>& a_valid_isoform, const vector<int>& set_sizes, const vector<PEInfo>& pe_info);

		bool
		CheckStartEnd(const vector<bool>& a_valid_isoform, const vector<vector<int> >& start_exons, const vector<vector<int> >& end_exons);

		double
		MaxConsistExp(const vector<bool>& a_valid_isoform, const vector<int>& set_sizes, const vector<PEInfo>& pe_info);

		bool
		IsInformative(int len_exon1, int len_exon2, int gap, const vector<PEInfo>& pe_info);

		bool
		IsPartCombSupported(const vector<bool>& part_comb, const vector<PEInfo>& pe_info);

		bool
		IsPairSupported(const vector<int>& set_sizes, int exon1, int exon2, int span, const vector<PEInfo>& pe_info);

		bool
		IsTripleSupported(const vector<int>& set_sizes, int exon1, int exon2, int exon, int span, const vector<PEInfo>& pe_info);

		void
		BuildSpliceGraph ( GraphEx<int>& splice_graph, const vector<vector<double> >& junc_cnt, 
			const vector<vector<int> >& start_exons, const vector<vector<int> >& end_exons, int& source, int& sink);

		int
		CalculateShrink ( const vector<vector<double> >& junc_cnt, const vector<vector<int> >& start_exons, 
					  	  const vector<vector<int> >& end_exons, vector<int>& shrink_map);

		int
		CalculateShrink ( const vector<vector<bool> >& isoforms, vector<int>& shrink_map);

		void
		ShrinkInstance ( const vector<int>& shrink_map,
						const vector<int>& set_sizes, vector<int>& set_sizes_shrinked, 
						const vector<double>& start_from_cnt, vector<double>& start_from_cnt_shrinked, 
						const vector<double>& end_at_cnt, vector<double>& end_at_cnt_shrinked, 
						const vector<vector<double> >& junc_cnt, vector<vector<double> >& junc_cnt_shrinked,
						const vector<PEInfo>& pe_info, vector<PEInfo>& pe_info_shrinked,
						const vector<vector<int> >& start_exons, vector<vector<int> >& start_exons_shrinked, 
						const vector<vector<int> >& end_exons, vector<vector<int> >& end_exons_shrinked);

		void
		ShrinkIsoform( const vector<int>& shrink_map, 
				       const vector<vector<bool> >& isoforms, vector<vector<bool> >& isoforms_shrinked);

		void
		ExpandIsoforms ( const vector<int>& shrink_map, const vector<vector<bool> >& isoforms_shrinked, vector<vector<bool> >& isoforms_expanded );

		void
		BestCombination(const vector<int>& set_sizes, 
						const vector<int>& exon_type, 
						const vector<double>& start_from_cnt, 
						const vector<double>& end_at_cnt, 
						const vector<vector<double> >& junc_cnt, 
						const vector<vector<bool> >& valid_isoforms,
						const vector<bool>& b_known_isoforms,
						const vector<vector<int> >& start_exons, 
						const vector<vector<int> >& end_exons, 
						const vector<PEInfo>& pe_info,
						bool b_enable_se,
						vector<int>& solution);
		
		void
		ResultStatistics(const vector<string>& iso_names,
						 const vector<double>& known_isoforms_exp, 
						 const vector<vector<bool> >& known_isoforms, 
						 const vector<vector<bool> >& valid_isoforms, 
						 const vector<vector<double> >& junc_cnt,
						 const vector<int>& solution);

		void
		OutputPrediction(const vector<Exon>& exons, 
						 const vector<vector<bool> >& valid_isoforms, 
						 const vector<int>& solution);

		void
		PartitionAndSearch (const vector<int>& set_sizes, 
							const vector<int>& exon_type,
							const vector<double>& start_from_cnt, 
							const vector<double>& end_at_cnt, 
							const vector<vector<double> >& junc_cnt, 
							const vector<vector<bool> >& valid_isoforms,
							const vector<bool>& b_known_isoforms,
							const vector<vector<int> >& start_exons, 
							const vector<vector<int> >& end_exons, 
							const vector<PEInfo>& pe_info,
							vector<int>& solution);

		void
		ExtractStartEndExons(const vector<vector<bool> >& isoforms, vector<int>& start_exons,
						     vector<int>& end_exons);

		void
		Project(const vector<vector<bool> >& isoforms, vector<vector<bool> >& isoforms_pro,
				const vector<int>& sub_set, vector<vector<int> >& isoform_group);

		void
		Project(const vector<int>& set_sizes, vector<int>& set_sizes_pro,
				const vector<int>& exon_type, vector<int>& exon_type_pro, 
				const vector<double>& start_from_cnt, vector<double>& start_from_cnt_pro,
				const vector<double>& end_at_cnt, vector<double>& end_at_cnt_pro,
				const vector<vector<double> >& junc_cnt, vector<vector<double> >& junc_cnt_pro,
				const vector<PEInfo>& pe_info, vector<PEInfo>& pe_info_pro,
				const vector<int>& sub_set);
		
		void
		CombineMeasure(vector<vector<int> >& first_measure_in_isoform,
						vector<vector<double> >& first_measure_virtual_length,
						vector<double>& first_measure_read,
						const vector<vector<int> >& second_measure_in_isoform,
						const vector<vector<double> >& second_measure_virtual_length,
						const vector<double>& second_measure_read);
		void
		RemoveMeasures(vector<vector<int> >& measure_in_isoform,
						vector<vector<double> >& measure_virtual_length,
						vector<double>& measure_read,
						const vector<bool>& b_measure_removed);

		void
		ExonJuncVirtualLength(const vector<int>& set_sizes, 
							  const vector<int>& exon_type,
							  const vector<double>& start_from_cnt, 
							  const vector<double>& end_at_cnt, 
							  const vector<vector<bool> >& isoforms, 
							  const vector<vector<double> >& junc_cnt, 
							  vector<vector<int> >& exon_junc_in_isoform, 
							  vector<double>& exon_junc_read, 
							  vector<vector<double> >& exon_junc_virtual_length);


		void
		PairTripleVirtualLength(const vector<int>& set_sizes, const vector<vector<bool> >& isoforms, 
								const vector<PEInfo>& pe_info, vector<vector<int> >& pair_triple_in_isoform, 
								vector<double>& pair_triple_read, vector<vector<double> >& pair_triple_virtual_length,
								bool b_allpt);

		double
		VirtualLength(const vector<bool>& an_isoform, const vector<int>& set_sizes, const PEInfo& pe_info, map<int64, double>& virtual_length);

		double
		VirtualLength(const PEInfo& pe_info, int len_first_interval, int len_second_interval, int len_third_interval);

		int
		StartEndInIsoform (const vector<vector<bool> >& isoforms, const vector<vector<int> >& start_exons, 
						   const vector<vector<int> >& end_exons, vector<vector<int> >& start_end_pair_in_isoform);

	private:
		vector<int> mShrinkExonMap;
		LPsolver* mpSolver;
		double mConfidenceLevel;
		double mOmittedJunctionCnt;

		bool mbEnableStartEnd;   // Whether enable the start and end pairs or not?
		bool mbEnablePE;         // Whether enable the paired-end info or not?
		bool mbEnableStepII;    // Whether execute the step II of the algorithm

		// Only partial combination with duplications not less than 
		// mMinEffectivePartCombDup will take effects in the algorithm.
		// This parameter will influence ConstructSetCover, IsPairSupported 
		// and IsTripleSupported.
		int mMinEffectivePartCombDup; 
		double mMinExpLevel; 

		int mPartitionSize;      // Used by PartitionAndSearch
		int mMaxValidIsoformCnt; // Maximum number of allowed valid isoforms.

		vector<double> mWeight;

		vector<vector<bool> > mKnownIsoforms;
		vector<vector<bool> > mValidIsoforms;   // Always stores the valid isoforms of the latest instance.
		vector<int>           mSolution;        // Stores the indexes of mValidIsoforms.
}; /* -----  end of class IsoInferPE  ----- */

#endif
