/*
 * =====================================================================================
 *
 *       Filename:  InstanceReader.h
 *
 *    Description:  The header file for class InstanceReader
 *
 *        Version:  1.0
 *        Created:  04/09/2009 01:21:42 PM
 *       Revision:  none
 *       Compiler:  gcc
 *
 *         Author:  Jianxing Feng (feeldead), feeldead@gmail.com
 *        Company:  THU
 *
 * =====================================================================================
 */

#ifndef InstanceReader_H 
#define InstanceReader_H

#include <string>
#include <iostream>
#include "InstanceHandler.h"
#include "InstanceGenerator.h"
#include "ReadInfoBase.hpp"

using namespace std;

/*
 * =====================================================================================
 *        Class:  InstanceReader
 *  Description:  Read instances from file
 * =====================================================================================
 */
class InstanceReader : public InstanceGenerator
{
	public:
		/* ====================  LIFECYCLE     ======================================= */
		InstanceReader (string input_file, vector<ReadInfoBase*> read_infos)
		{
			mInputFile = input_file;
			mReadInfos = read_infos;
		};                             /* constructor */

		virtual ~InstanceReader (){};                             /* constructor */

		virtual
		bool
		Generate()
		{
			fstream infile;
			infile.open(mInputFile.data(), ios::in);
			if (!infile.is_open())
			{
				cerr << "File " << mInputFile.data() << " can not be opened" << endl;
				return false;
			}

			int line_cnt = 0;
			int loaded_cnt = 0;
			while (!infile.eof())
			{
				Instance an_instance;
				if (!ReadAnInstance(infile, line_cnt, an_instance)) break;
				if (mpHandler) mpHandler->OnInstance(an_instance);
				loaded_cnt++;
			}

			cout << "Totally, " << loaded_cnt << " instances are loaded" << endl;
			infile.close();
		}


		//--------------------------------------------------------------------------------------
		//       Class:  InstanceReader
		//      Method:  ReadAllInstances
		// Description:  This function read all the instances into an array from a file.
		//               It is very similar to Generate.
		//--------------------------------------------------------------------------------------
		bool
		ReadAllInstances(vector<Instance>& all_instances)
		{
			fstream infile;
			infile.open(mInputFile.data(), ios::in);
			if (!infile.is_open())
			{
				cerr << "File " << mInputFile.data() << " can not be opened" << endl;
				return false;
			}

			int line_cnt = 0;
			int loaded_cnt = 0;
			while (!infile.eof())
			{
				all_instances.resize(loaded_cnt+1);
				Instance& an_instance = all_instances[loaded_cnt];
				if (!ReadAnInstance(infile, line_cnt, an_instance)) break;
				loaded_cnt++;
			}
			all_instances.resize(loaded_cnt);

			cout << "Totally, " << loaded_cnt << " instances are loaded" << endl;
			infile.close();
		}
	private:
		//--------------------------------------------------------------------------------------
		//       Class:  InstanceReader
		//      Method:  ReadAnInstance
		// Description:  This function read an instance from a input stream.
		//--------------------------------------------------------------------------------------
		bool
		ReadAnInstance(istream& infile, int& line_cnt, Instance& an_instance)
		{
			vector<string> splitted;
			string line;

			if (!getline(infile, line)) return false;

			line_cnt++;

			int& instance_cnt                      = an_instance.mInstanceCnt;
			vector<int>& seg_len 				   = an_instance.mSegLen;
			vector<vector<bool> >& isoforms        = an_instance.mIsoforms;
			vector<double>& iso_exp                = an_instance.mIsoExp;
			vector<Exon>& exons                    = an_instance.mExons;
			vector<Gene>& genes                    = an_instance.mGenes;
			int& known_cnt                         = an_instance.mKnownCnt;
			vector<ShortRead>& short_reads         = an_instance.mShortReadGroup.mShortReads;

			an_instance.mPvalue = 1;

			splitted = Utility::Split('\t', line);
			if ("Instance" != splitted[0])
			{
				cerr << "ERROR : line " << line_cnt << " 'Instance' is expected" << endl;
				cerr << line << endl;
			}
			instance_cnt = atoi(splitted[1].data());

			getline(infile, line); line_cnt++;
			splitted = Utility::Split('\t', line);
			if ("genes" != splitted[0])
				cerr << "ERROR : 'genes' is expected at line " << line_cnt << endl;
			genes.resize(atoi(splitted[1].data()));
			for (unsigned i = 0; i < genes.size(); i++)
			{
				getline(infile, line); line_cnt++;
				genes[i].Read(line);
			}

			getline(infile, line); line_cnt++;
			splitted = Utility::Split('\t', line);
			if ("segs" != splitted[0])
				cerr << "ERROR : 'segs' is expected at line " << line_cnt << endl;
			int set_cnt = atoi(splitted[1].data());
			exons.resize(set_cnt);
			seg_len.resize(set_cnt);
			for (int i = 0; i < set_cnt; i++)
			{
				getline(infile, line); line_cnt++;
				exons[i].Read(line);
				seg_len[i] = exons[i].mEnd - exons[i].mStart;
			}

			// Skip the seg lens
			getline(infile, line); line_cnt++;

			getline(infile, line); line_cnt++;
			splitted = Utility::Split('\t', line);
			if ("isoforms" != splitted[0])
			{
				cerr << "ERROR : 'isoforms' is expected at line " << line_cnt << endl;
				exit(0);
			}
			int iso_cnt = atoi(splitted[1].data());
			known_cnt = atoi(splitted[2].data());
			isoforms.resize(iso_cnt);
			iso_exp.resize(iso_cnt);

			for (int i = 0; i < iso_cnt; i++)
			{
				getline(infile, line); line_cnt++;
				splitted = Utility::Split('\t', line);
				isoforms[i].resize(set_cnt);
				for (int j = 0; j < set_cnt; j++)
				{
					if (atoi(splitted[j+1].data()) == 0)
						isoforms[i][j] = false;
					else
						isoforms[i][j] = true;
				}
				iso_exp[i] = atof(splitted[set_cnt+1].data());
			}

			// Load short read information
			getline(infile, line); line_cnt++;
			splitted = Utility::Split('\t', line);
			int read_info_cnt = 0;
			while ("Reads" == splitted[0])
			{
				short_reads.resize(short_reads.size()+1);
				if (read_info_cnt < mReadInfos.size())
					short_reads[short_reads.size()-1].mpReadInfo = mReadInfos[read_info_cnt++];
				int patt_cnt = atoi(splitted[1].data());
				vector<Pattern>& patterns = short_reads[short_reads.size()-1].mPatterns;
				vector<double>& pattern_dup = short_reads[short_reads.size()-1].mPatternDup;
				patterns.resize(patt_cnt);
				pattern_dup.resize(patt_cnt);

				for (unsigned j = 0; j < patterns.size(); ++j)
				{
					getline(infile, line); line_cnt++;
					splitted = Utility::Split(';', line);
					if (splitted.size() < 2)
					{
						cerr << "ERROR : not enough fields at line " << line_cnt << endl;
						exit(0);
					}

					for (unsigned k = 0; k < splitted.size()-1; ++k)
					{
						vector<int> segs;
						vector<string> sub_splitted = Utility::Split('\t', splitted[k]);
						for (unsigned l = 0; l < sub_splitted.size(); ++l)
							segs.push_back(atoi(sub_splitted[l].data()));
						patterns[j].mMappedSegs.push_back(segs);
					}
					pattern_dup[j] = atof(splitted[splitted.size()-1].data());
				}

				getline(infile, line); line_cnt++;
				splitted = Utility::Split('\t', line);
			}

			// Load start end exons, optional
			if ("SESegs" == splitted[0])
			{
				int se_pair_cnt = atoi(splitted[1].data());
				an_instance.mStartExons.resize(se_pair_cnt);
				an_instance.mEndExons.resize(se_pair_cnt);
				for (int i = 0; i < se_pair_cnt; i++)
				{
					getline(infile, line); line_cnt++;
					splitted = Utility::Split('\t', line);

					vector<string> indexes = Utility::Split(',', splitted[0]);
					for (unsigned j = 0; j < indexes.size(); j++)
						an_instance.mStartExons[i].push_back(atoi(indexes[j].data()));

					indexes = Utility::Split(',', splitted[1]);
					for (unsigned j = 0; j < indexes.size(); j++)
						an_instance.mEndExons[i].push_back(atoi(indexes[j].data()));
				}
			}
			return true;
		}

	private:
		vector<ReadInfoBase*> mReadInfos;
		string mInputFile;

}; /* -----  end of class InstanceReader  ----- */

#endif
