/*
 * Score.cpp
 *
 */

#include "Score.h"

Score::Score() {
	// TODO Auto-generated constructor stub
}

Score::~Score() {
	// TODO Auto-generated destructor stub
}

///This is the version that is called
Score::Score(Alignment& alg, string tf_name, ParameterFile params){
	alignment = alg;
	tf = tf_name;
	parameters = params;
	PRECISION = parameters.give_precision();
	LAMBDA = parameters.give_lambda();//Default is 1/8, making default PWM pseudocount 4/8 = 1/2
	alignment.set_logprobs(LAMBDA);

	background = BackgroundModel(parameters.give_background());
		
	mode = parameters.give_mode();

	//This needs to be calculated even when in mode PWM, because size is used later on
	logR = calculate_logR();

	if(mode != PWM){
	  alpha_exponent = find_alpha_exponent(logR);
	  //cerr << "alpha_exponent matrix " << alpha_exponent << endl;

	  //For the purpose of motif finding and site prediction rho is set to 1.0
	  laplacian_determinant = rescale_laplacian_determinant(logR, alpha_exponent,1.0);
	  //cerr << "determinant " << laplacian_determinant << endl;
	}
}


double Score::determinant(){
  return laplacian_determinant;
}

std::vector <std::vector<double> > Score::calculate_logR(){
	std::vector <std::vector<double> > M;
	M.resize(alignment.ncols());
	for(unsigned int i = 0; i < alignment.ncols(); i++){
		M[i].resize(alignment.ncols(), .0);
	}

	double logP_indep = 0.;
	double logP_dep = 0;

	for (unsigned short int i=0; i < alignment.ncols() ; i++){
	  for (unsigned short int j=0; j < i ; j++){
	    logP_indep = 0.0;
	    logP_dep = lgamma(alignment.nrows()+16*LAMBDA) - lgamma(16*LAMBDA);
	    for(unsigned short int alpha = 0; alpha < ALPH_NUM; alpha++){
	      logP_indep += lgamma(alignment.get_n_i(alpha, i) + 4*LAMBDA);	// Gamma(n_alpha^i + Lambda)
	      logP_indep += lgamma(alignment.get_n_i(alpha, j) + 4*LAMBDA);	// Gamma(n_beta^j + Lambda)
	      logP_indep -= LGAMMA_4LAMBDA; // Gamma(4Lambda)^2 _ defined in constants.h
	      //this last term subtracts 2 * LogGamma[4 Lambda]
	      for(unsigned short int beta = 0; beta < ALPH_NUM; beta++){
		logP_dep += lgamma(alignment.get_n_ij(alpha,beta,i,j) + LAMBDA);	// Gamma(n_ij^alpha,beta + Lambda)
		logP_dep -= LGAMMA_LAMBDA;	// Gamma(Lambda) _ defined in constants.h
	      }
	    }
	    M[i][j] = M[j][i] = (logP_dep-logP_indep); // last version
	  }
	}
	return M;
}

double Score::find_alpha_exponent(std::vector <std::vector<double> > logR) {
  double max = logR[1][0];
  double min = logR[1][0];
  double alpha = 1.;
  double range;
  
  //Find maximal and minimal score of the logR entries
  if(mode == DWT){
    for (unsigned int i = 0; i < logR.size(); i++){
      for(unsigned int j = 0; j < i; j++){
	if (logR[i][j] > max)
	  max = logR[i][j];
	if (logR[i][j] < min)
	  min = logR[i][j];
      }
    }
  }
  else if(mode == ADJ){
    for (unsigned int i = 1; i < logR.size(); i++){
      unsigned int j=i-1;
      if (logR[i][j] > max)                
	max = logR[i][j];
      if (logR[i][j] < min)
	min = logR[i][j];
    }
  }
  else{
    cerr << "Got into find_alpha_exponent in mode " << mode << " should not be possible. Exiting" << endl;
    exit(1);
  }

  alpha = 1.0;
  range = max-min;
  if (range  > PRECISION){
    alpha = PRECISION / range;
  }
  return alpha;
}

// streamlined calculations: rescale/exponentiate/Laplacian/Determinant.
double Score::rescale_laplacian_determinant(std::vector <std::vector<double> > N, double alpha, double rho) {
  //copy the matrix
  std::vector <std::vector<double> > M;
  M.resize(N.size());
  for(unsigned int i = 0; i < N.size(); i++){
    M[i].resize(N.size(), .0);
  }

  if(mode == DWT) {
    for (unsigned int i = 0; i < N.size(); i++){
      for(unsigned int j = 0; j < i; j++){
	//add the forest part, i.e. possibility of no links
	M[j][i] = M[i][j] = (1.0-rho)+rho*exp(alpha*N[i][j]);
      }
    }
  }
  else if(mode == ADJ) {
    for (unsigned int i = 1; i < N.size(); i++){
      unsigned int j = i - 1;
      M[j][i] = M[i][j] = (1.0-rho)+rho*exp(alpha*N[i][j]);
    }
  }
  else{
    cerr << "Got into rescale_laplacian in mode " << mode << " should not be possible. Exiting." << endl;
    exit(1);
  }

  for(unsigned short int i = 0; i < M.size(); i++){
    double sum = 0.0;
    for(unsigned short int j = 0; j< M.size(); j++){
      sum += M[i][j];
    }
    M[i][i] = -sum ;
  }

  Decomposition D;
  double determinant = 1.0;

  determinant = D.determinant(M, N.size()-1);

  return determinant;
}

std::vector<std::vector<double> > Score::contract_edge(unsigned int node1, unsigned int node2){
  std::vector <std::vector<double> > M;
  M.resize(logR.size()-1);
  for(unsigned int i = 0; i < logR.size()-1; i++){
    M[i].resize(logR.size()-1, .0);
  }
  for(unsigned int i=0;i<logR.size();i++){
    if((i!=node1) && (i!=node2)){
      for(unsigned int j=0;j<i;j++){
	if((j!=node1) && (j!=node2)){
	  unsigned int index1=i;
	  unsigned int index2=j;
	  //take out rows/columns given by node1 and node2
	  if(index1>=node1){index1--;}
	  if(index1>=node2){index1--;}
	  if(index2>=node1){index2--;}
	  if(index2>=node2){index2--;}
	  if((index1>=0) && (index2>=0)){
	    M[index1][index2]=logR[i][j];
	    M[index2][index1]=logR[i][j];
	  }
	}
      }
    }
  }
  //add contracted edge in the last column
  unsigned int last_index=logR.size()-2;
  for(unsigned int i=0;i<logR.size();i++){
    if((i!=node1) && (i!=node2)){
      unsigned int current_index=i;
      if(current_index>=node1){current_index--;}
      if(current_index>=node2){current_index--;}
      if(current_index>=0){
	if(logR[node1][i] > logR[node2][i]){
	  M[last_index][current_index] = logR[node1][i] + log(1.0+exp(alpha_exponent*(logR[node2][i]-logR[node1][i])))/alpha_exponent;
	  //logR-contracted = LogR1 + log(1+exp(alpha(logR2-LogR1)))/alpha -> alpha LogR-contrac = Log(exp(alpha LogR1) + exp(alpha LogR2))
	}
	else{
	  M[last_index][current_index] = logR[node2][i] + log(1.0+exp(alpha_exponent*(logR[node1][i]-logR[node2][i]))/alpha_exponent);
	}
	M[current_index][last_index]=M[last_index][current_index];
      }
    }
  }
  return M;
}


double Score::posterior(unsigned int i, unsigned int j){
    if(i>logR.size() || j>logR.size()){
            return -1.;
    }
    double posterior = 0.;
    double bestrho = 1.0;
    double bestscore = -1000000000.0;
    for(int irho=0;irho<=20;++irho){
	double rho= 0.05*((double) irho);
	double tmp = rescale_laplacian_determinant(logR, alpha_exponent,rho);
	if(tmp > bestscore){
	  bestscore = tmp;
	  bestrho = rho;
	}
      }

    std::vector <std::vector<double> > M = contract_edge(i, j);
    double numerator_determinant = rescale_laplacian_determinant(M, alpha_exponent,bestrho);
   
    posterior = alpha_exponent*logR[i][j] + log(bestrho)+numerator_determinant;
    double tmp = rescale_laplacian_determinant(logR, alpha_exponent,bestrho);
    posterior -= tmp;

    return exp(posterior);
}

double Score::give_logR(unsigned int i, unsigned int j){
  return logR[i][j];
}


std::vector<double> Score::score_sequence(string seq){
  /*
    Wrapper function
    Takes a sequence of arbitrary length
    Returns an array of scores, one for each window of the motif size in the input sequence
  */
  int seqlen = seq.length();
  int motiflen = alignment.ncols();

  if (seqlen < motiflen) {
    // the sequence that we have to score is shorter than the motif length
    // should not be possible
    cerr << "Sequence length shorter than motif length. Quitting..." << endl;
    exit(1);
  }

  std::vector<double> scores;
  scores.resize(seqlen-motiflen+1, DEF_SCORE);
  for(int i = 0; i < seqlen-motiflen+1; i++) {
    string subsequence = seq.substr(i, motiflen);
    if(is_valid(subsequence)) {
      scores[i] = score_window(subsequence);
    }
  }
  return scores;
}

double Score::score_window(string seq){
  /*
   * in this method score of a given sequence will be calculated.
   * the formula for this calculation is:
   * P(s|S) = [det(L(new_logR)) / det(L(old_logR))] * \prod_i^|seq| (n^i_si + 4*LAMBDA)
   */

  double score = 0.0;

  //WM score
  for(unsigned short int i = 0; i < seq.size(); i++){
    if(seq[i] != 'N'){
      score += alignment.get_lp_i(seq[i],i);
      score -= background.get_lp(seq[i]);//my new way of doing background
    }
  }

  //Contribution from the determinants
  if(mode != PWM)
    {
      std::vector <std::vector<double> > new_logR = logR_plus_seq_alpha(seq,alpha_exponent);//update logR with sequence, rescale alpha to correct
      double determinant_new_R = rescale_laplacian_determinant(new_logR, alpha_exponent,1.0);
      score += determinant_new_R;	    // det(L(min(R_new))) / det(L(min(R_old)))
      score -= laplacian_determinant;	// determinant is always positive
    }

  return score;	// log-odd of the given sequence
}

std::vector <std::vector<double> > Score::logR_plus_seq_alpha(string seq,double alpha){
  std::vector <std::vector<double> > logR_new;
  logR_new.resize(logR.size());   // initialization of logR_new matrix  
  for(unsigned short int i = 0; i < logR.size(); i++)
    logR_new[i].resize(logR.size(), .0);
  
  for(unsigned short int i = 0; i < seq.size(); i++){
    for(unsigned short int j = 0; j < i; j++){
      logR_new[i][j] = logR[i][j];
      if(seq[i] != 'N' && seq[j] != 'N'){
	logR_new[i][j] += alignment.get_lp_ij(seq[i],seq[j],i,j)/alpha;
	logR_new[i][j] -= alignment.get_lp_i(seq[i],i)/alpha;
	logR_new[i][j] -= alignment.get_lp_i(seq[j],j)/alpha;
      }
      logR_new[j][i] = logR_new[i][j];
    }
  }

  return logR_new;
}


string Score::get_TF(){
	return tf;
}


bool Score::char_is_valid(char c){
  switch (c){
    case 'A':
    case 'a':
      return true;
    case 'T':
    case 't':
      return true;
    case 'C':
    case 'c':
      return true;
    case 'G':
    case 'g':
      return true;
    case 'N':
    case 'n':
      return true;
    default:
      return false;
  }
}

bool Score::is_valid(string str){
  for (unsigned short int i = 0; i < str.length(); i++){
    if (!char_is_valid(str[i]))
      return false;
  }
  
  return true;
}
