#include <cmath>
#include <vector>
#include <limits>
#include <algorithm>
#include "log_linear_sgd_parser_learner.hpp"

namespace maeda {

namespace {

void NormalizeWeights(const double &scale, WeightMap *weight) {
  for (WeightMap::iterator w_it = weight->begin(); w_it != weight->end();
       ++w_it) {
    *w_it *= scale;
  }
}

} // namespace

LogLinearSGDParserLearner::LogLinearSGDParserLearner() {}

void LogLinearSGDParserLearner::Learn(
    const std::vector<FeatureVectorList > *feature_vector_list_list,
    const std::vector<int> *gold_standard_index_list,
    WeightMap *weight) {
  feature_vector_list_list_ = feature_vector_list_list;

  gold_standard_index_list_ = gold_standard_index_list;

  weight_                   = weight;

  c1_                       = 0.0;
  c2_                       = 1.0 / feature_vector_list_list_->size();
  learning_rate_            = 1.0;
  scale_                    = 1.0;
  num_iterations_           =   0;

  for (int i = 0; i < 30; i++) {
    Iterate();
    ShowStatistics();
  }
}

void LogLinearSGDParserLearner::Iterate() {
  size_t training_data_size = feature_vector_list_list_->size();

  std::vector<size_t> rand_vec(training_data_size);
  for (size_t i = 0; i < training_data_size; ++i) { rand_vec[i] = i; }
  random_shuffle(rand_vec.begin(), rand_vec.end(), rand_generator);

  obj_func_ = 0.0;
  for (std::vector<size_t>::const_iterator r_it = rand_vec.begin();
       r_it != rand_vec.end(); ++r_it) {
    const std::vector<FeatureVector> &feature_vector_list =
        feature_vector_list_list_->at(*r_it);
    const int gold_index = gold_standard_index_list_->at(*r_it);

    double score = feature_vector_list[gold_index].Product(*weight_);
    double log_z = 0.0;

    const double eta =
          learning_rate_
            / (1.0 + (c2_ + 0.00001 / training_data_size) * num_iterations_
                   + (c1_ > 0.0 ? sqrt(num_iterations_) : 0.0));

    // c1_ <= 0.0
    // without l1-regularization
    {
      const size_t num_feature_vectors = feature_vector_list.size();
      double scores[num_feature_vectors];
      double sum = 0;

      for (size_t t = 0; t < num_feature_vectors; ++t) {
        scores[t] = exp(feature_vector_list[t].Product(*weight_));
        sum += scores[t];
      }
      for (size_t t = 0; t < num_feature_vectors; ++t) {
        for (FeatureVector::const_iterator it
               = feature_vector_list[t].begin();
             it != feature_vector_list[t].end(); ++it) {
          weight_->at(it->id) -= eta * it->value * (scores[t] / sum);
        }
      }
      log_z = log(sum);
    }

    for (FeatureVector::const_iterator it
           = feature_vector_list[gold_index].begin();
         it != feature_vector_list[gold_index].end(); ++it) {
      weight_->at(it->id) += eta * it->value;
    }

    obj_func_ -= (score - log_z) * scale_;
    scale_    *= 1.0 - eta * c2_;  // l2 regularization
    if (scale_ < 1.0e-8 || scale_ > 1.0e+8) {
      // re-scaling
      NormalizeWeights(scale_, weight_);
      scale_ = 1.0;
    }
    ++num_iterations_;
  }
  obj_func_ /= rand_vec.size();
  if (scale_ != 1.0) {
    // re-scaling
    NormalizeWeights(scale_, weight_);
    scale_ = 1.0;
  }
  // apply regularization terms of the objective
  for (WeightMap::iterator w_it = weight_->begin();
       w_it != weight_->end(); ++w_it) {
      obj_func_ += *w_it * *w_it * 0.5 * c2_;  // l2
      obj_func_ += fabs(*w_it) * c1_;   // l1
  }
}

void LogLinearSGDParserLearner::ShowStatistics() const {
  std::cerr << "obj=" << obj_func_ << std::endl;
}


} // maeda
