/*
 * =====================================================================================
 *
 *       Filename:  DataProcessor.h
 *
 *    Description:  The header file for class DataProcessor
 *
 *        Version:  1.0
 *        Created:  04/17/2009 04:33:29 PM
 *       Revision:  none
 *       Compiler:  gcc
 *
 *         Author:  Jianxing Feng (feeldead), feeldead@gmail.com
 *        Company:  THU
 *
 * =====================================================================================
 */

#ifndef DataProcessor_H 
#define DataProcessor_H

#include <string>
#include <iostream>
#include <vector>
#include <list>
#include <map>
#include "Utility.hpp"
#include "Exon.h"
#include "Instance.h"
#include "RandomExpReadAssignerIM.h"

using namespace std;

/*
 * =====================================================================================
 *        Class:  DataProcessor
 *  Description:  This class provides methods to operates and processes the data.
 * =====================================================================================
 */
class DataProcessor
{
	public:
		/* ====================  LIFECYCLE     ======================================= */
		DataProcessor ();                             /* constructor */

		bool
		ExtractExon(string from_file, map_str2vec_exon& exons, int read_len);


		bool
		ExtractRef(string& refseq_file, map<string, map<int, string> >& pos2refseq, int cross_len, int start_pos_of_first_nt);

		bool
		CreateReadInfo(string read_info_file, vector<ReadInfoBase*>& read_infos, vector<string>& mapping_files);

		bool
		WriteReadInfo(string read_info_file, vector<ReadInfoBase*>& read_infos, vector<string>& mapping_files);

		int
		GetInvolvedExons(string& a_line, int line_cnt, map<string, vector<Exon> >& exons, map_str2vec_int& exon_2_instance, 
						map_str2vec_int& exon_2_local_idx, vector<int>& involved_exons);

		bool
		LoadShortReads(string from_file, map<string, vector<Exon> >& exons, ReadInfoBase* p_read_info, vector<Instance>& all_instances);

		void
		Decompose(vector<Exon>& exons, int start, int end, vector<int>& exons_idx);

		int
		FindExon(vector<Exon>& exons, int64 pos);

		int
		RemoveIntrons(Instance& an_instance, bool b_use_provided_exons, double noise_level);

		bool
		LoadGenes(string gene_file_name, map_str2vec_gene& genes);

		bool
		LoadBoundary(string boundary_file_name, map_str2vec_boundary& boundaries);

		bool
		LoadTSSPAS(string tss_pas_file_name, map_str2vec_gene& genes);

		bool
		LoadGeneAndExon(string boundary_file_name, string gene_file_name, 
					   string tss_pas_file_name, map_str2vec_gene& genes, map_str2vec_exon& exons);

		bool
		LoadGeneAndExon(string gene_file_name, map_str2vec_gene& genes, map_str2vec_exon& exons);

		void
		GroupGeneAndExon(map_str2vec_gene& genes, map_str2vec_int& gene_color,
						 map_str2vec_exon& exons, map_str2vec_int& exon_color);

		bool
		ExtractJunctionRef(map_str2vec_exon& exons, map_str2vec_int& exon_color, 
						  map_str2vec_gene& genes, map_str2vec_int& gene_color, 
						  string& refseq_file, int start_pos_of_first_nt,
						  int read_len, int cross_strength);

		bool
		ExtractJunctionRef(string tran_file, string& refseq_file, 
				int start_pos_of_first_nt, int read_len, int cross_strength);

		bool
		HighIsoform(string tran_file, string junc_map_file);

		bool
		ExtractInstances(map_str2vec_gene& genes, map_str2vec_int& gene_color,
						map_str2vec_exon& exons, map_str2vec_int& exon_color,
						vector<Instance>& all_instances);

		bool
		HighIsoforms(Instance& an_instance, bool b_check_start_end);

	public:
		bool mbStrandSpecific;
		ostream* mpOutput;
		string mOutputFile;

}; /* -----  end of class DataProcessor  ----- */

#endif
