/* Copyright (c) 2009, NICTA
 * All rights reserved. 
 * 
 * The contents of this file are subject to the Mozilla Public License 
 * Version 1.1 (the "License"); you may not use this file except in 
 * compliance with the License. You may obtain a copy of the License at 
 * http://www.mozilla.org/MPL/ 
 * 
 * Software distributed under the License is distributed on an "AS IS" 
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
 * License for the specific language governing rights and limitations 
 * under the License. 
 * 
 * Authors: Choon Hui Teo (ChoonHui.Teo@anu.edu.au)
 *
 * Created: (02/11/2007) 
 *
 * Last Updated: (12/01/2009)   
 */

#ifndef _HINGELOSS_CPP_
#define _HINGELOSS_CPP_

#include "hingeloss.hpp"

using namespace std;



/**  
 *  Compute hinge loss. CAUTION: f is passed by reference and is
 *  changed within this function. This is done for efficiency reasons,
 *  otherwise we would have had to create a new copy of f. 
 *   
 *  @param loss [write] loss value computed.
 *  @param f [read/write] prediction vector = X*w. 
 */
void CHingeLoss::Loss(double& loss, TheMatrix& f)
{
   loss = 0;
   f.ElementWiseMult(_data->labels());
   double* f_array = f.Data();  // pointer to memory location of f (faster element access)
   int len = f.Length();
   for(int i=0; i < len; i++)
      if(f_array[i] < 1.0) loss += (1 - f_array[i]);
}

/**  
 *  Compute loss and partial derivative of hinge loss w.r.t f
 *   
 *  @param loss [write] loss value computed.
 *  @param f [r/w] = X*w
 *  @param l [write] partial derivative of loss w.r.t. f
 */
void CHingeLoss::LossAndGrad(double& loss, TheMatrix& f, TheMatrix& l)
{
   f.ElementWiseMult(_data->labels());
   double* f_array = f.Data();  // pointer to memory location of f (faster element access)
   int len = f.Length();
   
   l.Zero();  // grad := l'*X
   
   for(int i=0; i < len; i++) {
      if(f_array[i] < 1.0) {
         loss += 1 - f_array[i];
         
         l.Set(i, -1);
      }
   }
   l.ElementWiseMult(_data->labels());
}


/**
*  Compute Y X^T X Y f
*   
*  @param dualObj   [write] = x^k
*  @param grad      [write] partial derivative of loss w.r.t. f
*  @param curVar    [read]  partial derivative of loss w.r.t. f
*/
void CHingeLoss::ComputeDualLossAndGradient(double& dualObj, TheMatrix& grad, 
                                             TheMatrix& curVar)
{
    TheMatrix temp_dim(_data->dim(), 1, SML::DENSE);
    TheMatrix temp_ex(curVar, SML::DENSE);

//     printf("showing curVar:\n");       curVar.Print();      printf("-------------------");
    temp_ex.ElementWiseMult(_data->labels());
    _data->XTMultW(temp_ex, temp_dim);
    _data->XMultW(temp_dim, temp_ex);
    temp_ex.ElementWiseMult(_data->labels());

//     printf("showing X^TX\alpha:\n");       temp_ex.Print();      printf("-------------------");

    temp_ex.Dot(curVar, dualObj);

    if (verbosity > 0)
        std::cout << "Value Ratio quad : line = " << 0.5 * dualObj * varScaleFactor / _data->size()
                  << " : " << curVar.Norm1() << std::endl;

    dualObj = (0.5 * dualObj * varScaleFactor / _data->size() - curVar.Norm1()) / _data->size() * varScaleFactor;
    
    grad.Assign(temp_ex);

//     printf("temp_ex later:\n");     temp_ex.Print();      printf("-------------------");
//     printf("grad assign:\n");     grad.Print();      printf("-------------------");

    grad.Scale(varScaleFactor * varScaleFactor / _data->size() / _data->size());

    if (verbosity > 0)
        std::cout << "||quad grad||_1 : ||lin grad||_1  = " 
                    << grad.Norm1() << " : " << gradConst.Norm1() << std::endl;

    grad.Add(gradConst);
}


void CHingeLoss::getLabels(boost::numeric::ublas::vector<double> &value) 
{
    unsigned int useless;
    _data->labels().GetRow(0, useless, &(value.data()[0]));
}


void CHingeLoss::getLabels(TheMatrix & value) 
{
    value = _data->labels();
}

double CHingeLoss::ComputePrimalObj(TheMatrix &dualVar, double lambda, TheMatrix &weight, 
                                    int num_pos, bool useBias, double & bias_value)
{
    TheMatrix temp_ex(dualVar, SML::DENSE);    

    double loss;
    int num_example = temp_ex.Length();
    
    temp_ex.ElementWiseMult(_data->labels());
    _data->XTMultW(temp_ex, weight);
    _data->XMultW(weight, temp_ex);
    
    if (useBias)
    {        
        if (!aux_buffer)
            aux_buffer = new double [temp_ex.Length()];  
        bias_value = find_bias(temp_ex, _data->labels(), temp_ex.Length(), num_pos); 
        temp_ex.ScaleAdd(bias_value, biasConst);
    }

    loss = 0;
    temp_ex.ElementWiseMult(_data->labels());
    double* f_array = temp_ex.Data();  // pointer to memory location of f (faster element access)
    int len = temp_ex.Length();
    for(int i=0; i < len; i++)
        if(f_array[i] < 1.0) loss += (1 - f_array[i]);

    return lambda * pow(weight.Norm2(), 2) / 2.0 + loss / _data->size();    
}


double CHingeLoss::find_bias(TheMatrix &disc, const TheMatrix &label, int num_example, int num_pos)
{
    int i;
    double* disc_array = disc.Data();
    double* label_array = label.Data();

    for (i = 0; i < num_example; i ++)
        aux_buffer[i] = label_array[i] - disc_array[i];

    // next, sort aux_buf_pointer
    sort(aux_buffer, aux_buffer + num_example);

    //     int cur_obj = num_pos;

    //     struct aux_bias * p1, **p2;
    //     p2 = aux_buf_pointer;

    return aux_buffer[num_pos];
}








void CHingeLoss::ComputeDualLossAndGradient03(double& dualObj, TheMatrix& grad, 
                                            TheMatrix& curVar, double lambda)
{
    TheMatrix temp_dim(_data->dim(), 1, SML::DENSE);
    TheMatrix temp_ex(curVar, SML::DENSE);

    //     printf("Now show X:\n");     _data->ShowFeature();

    //     printf("showing curVar:\n");       curVar.Print();      printf("-------------------");
    temp_ex.ElementWiseMult(_data->labels());
    _data->XTMultW(temp_ex, temp_dim);
    _data->XMultW(temp_dim, temp_ex);
    temp_ex.ElementWiseMult(_data->labels());

    //     printf("showing X^TX\alpha:\n");       temp_ex.Print();      printf("-------------------");

    temp_ex.Dot(curVar, dualObj);

    //     printf("First part = %lg\n", dualObj);
    
    std::cout<<"Value Ratio quad : line = " << dualObj << " : " << curVar.Norm1() << std::endl;

    dualObj = -0.5 * dualObj / lambda / pow(varScaleFactor03 * _data->size(), 2) 
                + curVar.Norm1() / varScaleFactor03 / _data->size();

    //     std::cout<<"Look:\n";     curVar.Print();

    grad.Assign(temp_ex);

    //     printf("temp_ex later:\n");     temp_ex.Print();      printf("-------------------");
    //     printf("grad assign:\n");     grad.Print();      printf("-------------------");

    grad.Scale(-1.0 / lambda / pow(varScaleFactor03 * _data->size(), 2));

    //     printf("Quadratic part gradient:\n");     grad.Print();      printf("-------------------");

    grad.Add(gradConst);
    //     printf("Final gradient:\n");     grad.Print();      printf("-------------------");
}


void CHingeLoss::map_alpha_to_w(TheMatrix &alpha, TheMatrix &w, double scale)
{
    TheMatrix temp_ex(alpha, SML::DENSE);
    temp_ex.ElementWiseMult(_data->labels());
    _data->XTMultW(temp_ex, w);
    w.Scale(scale);
}


void CHingeLoss::aux_map_w_to_alpha(TheMatrix &w, TheMatrix &output)
{
    _data->XMultW(w, output);
    output.ElementWiseMult(_data->labels());
}
#endif
