/*
 * =====================================================================================
 *
 *       Filename:  DataProcessor.h
 *
 *    Description:  The header file for class DataProcessor
 *
 *        Version:  1.0
 *        Created:  04/17/2009 04:33:29 PM
 *       Revision:  none
 *       Compiler:  gcc
 *
 *         Author:  Jianxing Feng (feeldead), feeldead@gmail.com
 *        Company:  THU
 *
 * =====================================================================================
 */

#ifndef DataProcessor_H 
#define DataProcessor_H

#include <string>
#include <iostream>
#include <vector>
#include <list>
#include <map>
#include "Utility.hpp"
#include "Utility2.h"
#include "Exon.h"
#include "Instance.h"
#include "RandomExpReadAssignerIM.h"

#include "ToolBase.h"

using namespace std;

/*
 * =====================================================================================
 *        Class:  DataProcessor
 *  Description:  This class provides methods to operates and processes the data.
 * =====================================================================================
 */
class DataProcessor : public ToolBase
{
	public:
		/* ====================  LIFECYCLE     ======================================= */
		DataProcessor ();                             /* constructor */

		virtual 
		void 
		Help();
		
		virtual 
		int 
		Main(int argc, char* argv[], int startArg);


		bool
		ExtractExon(string from_file, map_str2vec_exon& exons);


		bool
		ExtractRef(string& refseq_file, map<string, map<int, string> >& pos2refseq, int cross_len, int start_pos_of_first_nt);

		bool
		LoadNonJunctionReads(string from_file, map_str2vec_exon& exons, 
							map<string, map_64_double>& junc_counts, int& mapped_cnt);

		bool
		LoadJunctionReads(string from_file, map<string, vector<Exon> >& exons, map<string, map_64_double>& junc_counts);

		bool
		LoadPEInfo(string pe_info_file, vector<PEInfo>& pe_infos, vector<string>& mapping_files);

		void
		LoadPEReads(vector<PEInfo>& pe_infos, vector<string> mapping_files, vector<Instance>& all_instances);

		bool
		LoadPEReads(string from_file, map_str2vec_exon& exons, PEInfo& pe_info_seed, vector<Instance>& all_instances);

		bool
		AppendStrictPEInfo(string pe_info_file, RandomExpReadAssignerIM& semi_rand_gen);

		void
		Decompose(vector<Exon>& exons, int start, int end, vector<int>& exons_idx);

		int
		FindExon(vector<Exon>& exons, int64 pos);


		bool
		ExtractInstances(string transcript_file, 
						 map_str2vec_exon& exons, 
						 map<string, map_64_double>& junc_counts,
						 vector<Instance>& all_instances);

		bool
		LoadGenes(string gene_file_name, map_str2vec_gene& genes);

		bool
		LoadBoundary(string boundary_file_name, map_str2vec_boundary& boundaries);

		bool
		LoadTSSPAS(string tss_pas_file_name, map_str2vec_gene& genes);

		bool
		LoadGeneAndExon(string boundary_file_name, string gene_file_name, 
					   string tss_pas_file_name, map_str2vec_gene& genes, map_str2vec_exon& exons);

		bool
		LoadGeneAndExon(string gene_file_name, map_str2vec_gene& genes, map_str2vec_exon& exons);

		void
		GroupGeneAndExon(map_str2vec_gene& genes, map_str2vec_int& gene_color,
						 map_str2vec_exon& exons, map_str2vec_int& exon_color);

		bool
		ExtractJunctionRef(map_str2vec_exon& exons, map_str2vec_int& exon_color, 
						   map_str2vec_gene& genes, map_str2vec_int& gene_color, 
						   string& refseq_file, int start_pos_of_first_nt);

		bool
		ExtractJunctionRef(string tran_file, string& refseq_file, int start_pos_of_first_nt);

		bool
		HighIsoform(string tran_file, string junc_map_file);

		bool
		ExtractInstances(map_str2vec_gene& genes, map_str2vec_int& gene_color,
						map_str2vec_exon& exons, map_str2vec_int& exon_color,
						map<string, map_64_double>& junc_counts, vector<Instance>& all_instances,
						bool b_use_provided_exons, double noise_level);

		bool
		HighIsoforms(map_str2vec_gene& genes, map_str2vec_int& gene_color,
					 map_str2vec_exon& exons, map_str2vec_int& exon_color,
					 map<string, map_64_double>& junc_counts, bool b_check_start_end);

	public:
		int mReadLen;
		int mCrossStrength;
		bool mbStrandSpecific;
		ostream* mpOutput;
		string mOutputFile;

}; /* -----  end of class DataProcessor  ----- */

#endif
