/**
 *
 * @file processing_element.cc
 * @author Lasse Lehtonen
 *
 *
 */

/*
 * Copyright 2010 Tampere University of Technology
 * 
 *  This file is part of Transaction Generator.
 *
 *  Transaction Generator is free software: you can redistribute it and/or modify
 *  it under the terms of the Lesser GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  Transaction Generator is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  Lesser GNU General Public License for more details.
 *
 *  You should have received a copy of the Lesser GNU General Public License
 *  along with Transaction Generator.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * $Id: processing_element.cc 1416 2010-09-06 12:06:17Z lehton87 $
 *
 */

#include "processing_element.hh"
#include <iostream>
#include <iomanip>
#include <exception>

namespace sctg
{
   ProcessingElement::ProcessingElement(sc_core::sc_module_name name, 
					const boost::property_tree::ptree& pt,
					const boost::property_tree::ptree& peLib,
					sctg::Configuration& config)
      : sctg::Resource(name, pt),
	_config(config),
	_measureStart(),
	_measureEnd()
   {
      _currentTask = 0;
      _currentState = IDLE;
      _dmaEnabled = true;
    
      _intraGroupRxCost = 0;
      _intraGroupTxCost = 0;
      _interGroupRxCost = 0;
      _interGroupTxCost = 0;
      _interPeRxCost = 0;
      _interPeTxCost = 0;
      _cycleLength =  sc_core::sc_time(1.0 / getFrequency(), sc_core::SC_US);
      using boost::property_tree::ptree;
      ptree node = peLib.get_child("processing_element_lib");
      // Search for correct pe type
      ptree::const_iterator iter;
      for(iter = node.begin(); iter != node.end(); ++iter)
      {	
	 if((*iter).first == "processing_element" &&
	    (*iter).second.get<std::string>("<xmlattr>.type") == getType() )
	 {
	    break; // Found, stop search
	 }
      }
      if(iter == node.end())
      {
	 // Did not find
	 std::string err = "Did not find processing element \"" + getType() +
	    "\" from pe_lib";
	 throw std::runtime_error(err.c_str());
      }
      _intOps = (*iter).second.get<double>("<xmlattr>.int_ops");
      _floatOps = (*iter).second.get<double>("<xmlattr>.float_ops");
      _memOps = (*iter).second.get<double>("<xmlattr>.mem_ops");

      // Add communication costs if they exists
      boost::optional<const ptree&> costs = (*iter).second.get_child_optional
	 ("communication_costs");
    
      boost::optional<const ptree&> temp;
      if(costs)
      {
	 for(ptree::const_iterator i = (*costs).begin(); i != (*costs).end(); ++i)
	 {
	    if((*i).first == "intragroup")
	    {
	       temp = (*i).second.get_child_optional("receive");
	       if(temp) {_intraGroupRxCost = 
		     new Amount<unsigned long int>((*temp), _config);}
	       temp = (*i).second.get_child_optional("send");
	       if(temp) {_intraGroupTxCost = 
		     new Amount<unsigned long int>((*temp), _config);}
	    }
	    else if((*i).first == "intergroup")
	    {
	       temp = (*i).second.get_child_optional("receive");
	       if(temp) {_interGroupRxCost = 
		     new Amount<unsigned long int>((*temp), _config);}
	       temp = (*i).second.get_child_optional("send");
	       if(temp) {_interGroupTxCost = 
		     new Amount<unsigned long int>((*temp), _config);}
	    }
	    else if((*i).first == "interpe")
	    {
	       temp = (*i).second.get_child_optional("receive");
	       if(temp) {_interPeRxCost = 
		     new Amount<unsigned long int>((*temp), _config);}
	       temp = (*i).second.get_child_optional("send");
	       if(temp) {_interPeTxCost = 
		     new Amount<unsigned long int>((*temp), _config);}
	    }
	 }
      }
    
      std::cout << getName() << ": int (" << _intOps << ") float (" << _floatOps
		<< ") mem (" << _memOps << ")" << std::endl;
      if(costs)
	 std::cout << " Found costs" << std::endl;

      // Create buffer for PE
      unsigned long int txBuffSize = 
	 pt.get<unsigned long int>("<xmlattr>.tx_buffer_size", 0);
      unsigned long int rxBuffSize = 
	 pt.get<unsigned long int>("<xmlattr>.rx_buffer_size", 0);
      _packetSize = 
	 pt.get<unsigned long int>("<xmlattr>.packet_size", 0);

      Buffer* buff = new Buffer(rxBuffSize, txBuffSize, _config);
      _buffer = std::auto_ptr<Buffer>(buff);

      _config.addBufferToPe(buff, getId());
    
      SC_THREAD(thread);

      if(_dmaEnabled)
      {
	 SC_THREAD(txDma);
	 SC_THREAD(rxDma);
      }
   }

   ProcessingElement::~ProcessingElement()
   {
      if(_intraGroupRxCost) { delete _intraGroupRxCost; _intraGroupRxCost = 0; }
      if(_intraGroupTxCost) { delete _intraGroupTxCost; _intraGroupTxCost = 0; }
      if(_interGroupRxCost) { delete _interGroupRxCost; _interGroupRxCost = 0; }
      if(_interGroupTxCost) { delete _interGroupTxCost; _interGroupTxCost = 0; }
      if(_interPeRxCost) { delete _interPeRxCost; _interPeRxCost = 0; }
      if(_interPeTxCost) { delete _interPeTxCost; _interPeTxCost = 0; }

      updateMeasurements();

      double util = 0.0;

      if(_measurements.execCycles != 0.0)
      {
	 util = double(_measurements.execCycles) / 
	    double(_measurements.idleCycles + _measurements.execCycles);
      }

      if(_config.getSummaryStream())
      {
	 **_config.getSummaryStream()
	    << "- PE " << getName() 
	    << " average utilization during simulation was "
	    << util << std::endl
	    << "  idle: " << std::setw(10) << _measurements.idleCycles
	    << "  exec: " << std::setw(10) << _measurements.execCycles
	    << "  total: " << std::setw(10)
	    << (_measurements.idleCycles + _measurements.execCycles)
	    << std::endl
	    << "  sent     " 
	    << std::setw(10) << _buffer->getMeasurements().txBytes
	    << " bytes" << std::endl
	    << "  received " 
	    << std::setw(10) << _buffer->getMeasurements().rxBytes - 
	    _buffer->getMeasurements().internalBytes
	    << " bytes" << std::endl
	    << "  intra tx " 
	    << std::setw(10) << _buffer->getMeasurements().internalBytes
	    << " bytes (traffic between tasks)"
	    << std::endl;
      }

   }

   void ProcessingElement::thread()
   {
      double waitTime = 1.0;

      // Schedule first task
      bool sched = true;
      bool sendStalled = false;
    
      while(true)
      {
	 if(_currentTask == 0 || sched || _currentTask->getState() != READY)
	 {
	    Task* old = _currentTask;
	    _currentTask = schedule();    
	    sched = false;
	    if(old != _currentTask)
	    {
	       sendStalled = false;
	    }
	 }
	
	 /*
	  * 1. If something is coming in handle it
	  * 2. Otherwise execute task
	  */
	 if(_buffer->rxTokenAvailable())
	 {
	    tgToken tokenIn = _buffer->rxGetToken();
	    
	    _config.addReceiveTime(tokenIn.destination);

	    Task* receiver = 
	       _config.getTaskByInPort(tokenIn.destination);//
	    
	    receiver->receive(tokenIn);

	    _fifoScheduler.push(receiver);

#ifdef SCTG_USE_EXECMON	    
	    if(!tokenIn.isEvent)
	    {
	       _tokenQueue.push(tokenIn);
	    }
#endif

	 }
	 else if(_currentTask == 0 || sendStalled) // No tasks to execute
	 {
	    sendStalled = false;
	    _currentState = IDLE;
	    //_measureStart = sc_core::sc_time_stamp();
	    wait(_eventEvent 
		 | *(_buffer->rxGetTokenAvailableEvent())
		 | *(_buffer->txGetReadEvent()));
	    updateMeasurements();	    
	 }
	 else
	 {
	    // Check what is current operation
	    /*
	     * 1. If sending send packets one by one
	     * 2. Otherwise execute until all executed or incoming packet
	     */
	    tgToken token;
	    tgPacket* packet;
	    unsigned long int port;
	    unsigned long int task;
	    unsigned long int size;
	    ProcessingElement* pe;
	    switch(_currentTask->getOperation())
	    {
	       case EXECUTE:
		  _currentState = EXECUTING;
		  _currentTask->changeState(RUN);
		  /*std::cout << "At " << sc_core::sc_time_stamp() 
		    << " Task " << _currentTask->getName()
		    << " has " << _currentTask->getAmount()
		    << " clock cycles left to execute" << std::endl;
		  */
		  //waitTime = _cycleLength * _currentTask->getAmount();
		  //std::cout << " Time to wait " << waitTime << std::endl;
		  wait(_cycleLength * _currentTask->getAmount());
		  //_measurements.execCycles += _currentTask->getAmount();
		  updateMeasurements();
		  _currentTask->consume(_currentTask->getAmount());	    
		  _currentTask->changeState(WAIT);
		  break;

	       case SEND:
		  _currentTask->changeState(RUN);
		  _currentState = SENDING;
		  /*std::cout << "At " << sc_core::sc_time_stamp() 
		    << " Task " << _currentTask->getName()
		    << " has " << _currentTask->getAmount()
		    << " bytes left to send" << std::endl;
		  */
		  // Construct token if this is first time for this send
		  if(_currentTask->isNewOperation())
		  {
		     token.size = _currentTask->getAmount();
		     token.destination = 
			_config.getDestination(_currentTask->getOutPort());
		     token.source = _currentTask->getOutPort();
		     token.timeSent = sc_core::sc_time_stamp();
		     token.id = _config.getTokenId();
		     task = 
			_config.getTaskByInPort(token.destination)->getId();
		     _config.getBufferByPe(_config.getPeByTask(task)->getId())
			->expectToken(token);
		     _sendTokens[_currentTask->getId()] = token;

		     _config.addSendTime(token.source);
		  }
		    
		  // Create packet
		  packet = new tgPacket;
		  if(_packetSize > 0) // Zero packet_size means "don't cut tokens"
		  {
		     size = 
			_sendTokens[_currentTask->getId()].size < _packetSize ?
			_sendTokens[_currentTask->getId()].size : _packetSize;
		  }
		  else
		  {
		     size = _sendTokens[_currentTask->getId()].size;
		  }
		  size = size < 4 ? 4 : size; // Four bytes is minimum packet size
		  packet->size = size;
		  //std::cout << "packet size " << packet->size << std::endl;
		  port = _sendTokens[_currentTask->getId()].destination;
		  //std::cout << "packet port " << port << std::endl;
		  task = _config.getTaskByInPort(port)->getId();
		  //std::cout << "packet task " << task << std::endl;
		  pe = _config.getPeByTask(task);
		  packet->address = pe->getTerminalAddress(pe->getTerminal());
		  //std::cout << "packet address " << std::hex 
		  //	      << packet->address << std::dec << std::endl;
		  packet->id   = _sendTokens[_currentTask->getId()].id;
		  //packet->data = new unsigned char[packet->size];
		  //*(reinterpret_cast<unsigned int*>(packet->data)) =
		  //   _sendTokens[_currentTask->getId()].id;

		  // Check where packet is going
		  if(pe->getId() == getId())
		  {		       
		     // Packet is for this PE
		     if(_buffer->rxSpaceLeft() < packet->size)
		     {
			/*std::cout << "No space for intra_tx (S:" 
				  << size << "B,P:" << port
				  << ",T:"<< task <<  ")" << std::endl;*/
			sched = true;
			_currentTask->changeState(WAIT);
			_fifoScheduler.push(_currentTask);
			//delete packet->data; packet->data = 0;
			delete packet; packet = 0;
			break;
		     }
		     
		     while(_buffer->rxSpaceLeft() < packet->size)
		     {
			_currentState = INTRA_TX_WAIT;
			wait(*(_buffer->rxGetReadEvent()));
			updateMeasurements();
		     }
		     		     
		     _currentTask->addLocalBytesSent(packet->size);

		     packet->data = new unsigned char[packet->size];
		     *(reinterpret_cast<unsigned int*>(packet->data)) =
		        _sendTokens[_currentTask->getId()].id;

		     if(!_dmaEnabled)
		     {
			_buffer->rxReserveSpace(packet->size);
			_currentState = SENDING;
			waitTime =
			   (1.0 / getFrequency()) * 
			   (size / (getTerminalWidth(getTerminal())/8));
			
			wait(waitTime, sc_core::SC_US);
			updateMeasurements();
			_buffer->rxFreeSpace(packet->size);

			_buffer->addInternalBytes(packet->size);
			_buffer->rxPutPacket(packet);
		     }
		     else
		     {			
			_rxDmaQueue.push(packet);
			_rxDmaEvent.notify(sc_core::SC_ZERO_TIME);
		     }		     
		  }
		  else
		  {
		     // Packet is heading for another PE
		     if(_buffer->txSpaceLeft() < packet->size)
		     {
			sched = true;
			sendStalled = true;
			_fifoScheduler.push(_currentTask);
			//delete packet->data; packet->data = 0;
			delete packet; packet = 0;
			break;
		     }

		     // while(_buffer->txSpaceLeft() < packet->size)
		     // {			
		     // 	_currentState = TX_WAIT;
		     // 	wait(*(_buffer->txGetReadEvent()));			
		     // 	updateMeasurements();
		     // }

		     _currentTask->addRemoteBytesSent(packet->size);
		
		     if(!_dmaEnabled)
		     {
			_buffer->txReserveSpace(packet->size);
			_currentState = SENDING;
			waitTime =
			   (1.0 / getFrequency()) * 
			   (size / (getTerminalWidth(getTerminal())/8));
			
			wait(waitTime, sc_core::SC_US);
			updateMeasurements();
			_buffer->txFreeSpace(packet->size);
			_buffer->txPutPacket(packet);
		     }
		     else
		     {
			_txDmaQueue.push(packet);
			_txDmaEvent.notify(sc_core::SC_ZERO_TIME);
		     }
       		     
		  }		  
		    
		  _currentTask->consume(size);
		  // unsigned long int bytesLeft = _currentTask->consume(size);
		  // if(bytesLeft == 0)
		  // {
		  //    _sendTokens[_currentTask->getId()]
		  // }
		  _currentTask->changeState(WAIT);		    
		  break;

	       default:
		  std::cout << "At " << sc_core::sc_time_stamp() 
			    << " Task " << _currentTask->getName()
			    << " FAILURE" << std::endl;
		  sc_core::sc_stop();
		  wait(10, sc_core::SC_NS);
		  break;
	    }		
	 }	    	    
      }
    
   }

   double ProcessingElement::getIntOps()
   { return _intOps; }
  
   double ProcessingElement::getFloatOps()
   { return _floatOps; }

   double ProcessingElement::getMemOps()
   { return _memOps; }

   unsigned long int ProcessingElement::getIntraGroupRxCost()
   { return (_intraGroupRxCost == 0 ? 0 : _intraGroupRxCost->value()); }

   unsigned long int ProcessingElement::getIntraGroupTxCost()
   { return (_intraGroupTxCost == 0 ? 0 : _intraGroupTxCost->value()); }

   unsigned long int ProcessingElement::getInterGroupRxCost()
   { return (_interGroupRxCost == 0 ? 0 : _interGroupRxCost->value()); }

   unsigned long int ProcessingElement::getInterGroupTxCost()
   { return (_interGroupTxCost == 0 ? 0 : _interGroupTxCost->value()); }

   unsigned long int ProcessingElement::getInterPeRxCost()
   { return (_interPeRxCost == 0 ? 0 : _interPeRxCost->value()); }

   unsigned long int ProcessingElement::getInterPeTxCost()
   { return (_interPeTxCost == 0 ? 0 : _interPeTxCost->value()); }

   sctg::Task* ProcessingElement::schedule()
   {
      Task* retval = 0;
      while(!_fifoScheduler.empty())
      {
	 retval = _fifoScheduler.front();
	 _fifoScheduler.pop();
	 if(retval->getState() == READY || retval->getState() == RUN)
	 {	    
	    return retval;
	 }
      }

      // loop over all tasks
      for(unsigned int i = 0; i < _tasks.size(); ++i)
      {
	 /*std::cout << "At " << sc_core::sc_time_stamp() 
	   << " Task (" << i << ") " << _tasks.at(i)->getName()
	   << "'s state is " 
	   << stateToString(_tasks.at(i)->getState())
	   << std::endl;*/
	 if(_tasks.at(i)->getState() == READY || _tasks.at(i)->getState() == RUN)
	 { return _tasks.at(i); }
      }
      return 0;
   }

   void ProcessingElement::mapTask(Task* task)
   {
      std::cout << "Task " << task->getName() << " mapped to PE "
		<< getName() << std::endl;
      _tasks.push_back(task);
   }

   bool ProcessingElement::hasInPort(unsigned long int port)
   {
      for(unsigned int i = 0; i < _tasks.size(); ++i)
      {
	 if(_tasks.at(i)->hasInPort(port)) {return true;}
      }
      return false;
   }
  
   void ProcessingElement::receiveEvent(tgToken token)
   {
      /*std::cout << "PE " << getName() << " got token from event to port "
	<< token.destination << std::endl;*/
      // Forward token to all tasks with correct in_port

      _config.addSendTime(token.source);

      for(unsigned int i = 0; i < _tasks.size(); ++i)
      {
	 if(_tasks.at(i)->hasInPort(token.destination))
	 {
	    _tasks.at(i)->receive(token);
	    _fifoScheduler.push(_tasks.at(i));
	 }
      }
      _eventEvent.notify(sc_core::SC_ZERO_TIME);
   }

   const sctg::PeMeasurements& ProcessingElement::getMeasurements()
   {
      /*
	if(_currentTask)
	{
	std::cout << "At " << sc_core::sc_time_stamp() 
	<< "on PE " << getName() 
	<< " Task " << _currentTask->getName() << " is ";
	if(_currentTask->getOperation() == EXECUTE)
	{ std::cout << "excuting ";}
	else if(_currentTask->getOperation() == SEND)
	{ std::cout << "sending ";}
	else
	{ std::cout << "FAILING ";}
	
	std::cout << "(" << _currentTask->getAmount() << " left)"
	<< std::endl;
	}
	else
	{
	std::cout << "At " << sc_core::sc_time_stamp() 
	<< " PE " << getName() 
	<< " is idling" << std::endl;
	}
	std::cout << "At " << sc_core::sc_time_stamp() 
	<< " PE " << getName() << " state is ";
	switch(_currentState)
	{
	case IDLE: std::cout << " IDLE"; break;
	case EXECUTING: std::cout << " EXECUTING"; break;
	case INTRA_TX_WAIT: std::cout << " INTRA_TX_WAIT"; break;
	case TX_WAIT: std::cout << " TX_WAIT"; break;
	default: std::cout << " DEFAULT"; break;
	}
	std::cout << std::endl;

    

	for(unsigned int i = 0; i < _tasks.size(); ++i)
	{
	std::cout << "At " << sc_core::sc_time_stamp() 
	<< " on PE " << getName() 
	<< " Task (" << i << ") " << _tasks.at(i)->getName()
	<< "'s state is " 
	<< stateToString(_tasks.at(i)->getState())
	<< std::endl;
	}

      */

      updateMeasurements();

      return _measurements;
   }

   void ProcessingElement::updateMeasurements()
   {
      _measureEnd   = sc_core::sc_time_stamp();

      switch(_currentState)
      {
	 case IDLE:	
	    _measurements.idleTime += 
	       _measureEnd - _measureStart;	
	    break;
	 case EXECUTING:	
	    _measurements.execTime += 
	       _measureEnd - _measureStart;
	    break;
	 case INTRA_TX_WAIT:
	    _measurements.execTime += 
	       _measureEnd - _measureStart;
	    break;
	 case TX_WAIT:
	    _measurements.execTime += 
	       _measureEnd - _measureStart;
	    break;
	 case RX_WAIT:
	    _measurements.execTime += 
	       _measureEnd - _measureStart;
	    break;
	 default:
	    break;
      }

      _measurements.idleCycles = 
	 static_cast<unsigned long int>(_measurements.idleTime/_cycleLength);
      _measurements.execCycles = 
	 static_cast<unsigned long int>(_measurements.execTime/_cycleLength);
    
      _measureStart = _measureEnd;

   }

   double ProcessingElement::getAvgUtilization()
   {
      if(_measurements.execTime+_measurements.idleTime != sc_core::SC_ZERO_TIME)
      {
	 return (_measurements.execTime / 
		 (_measurements.execTime + _measurements.idleTime));
      }
      else
      {
	 return 0.0;
      }
   }

   void ProcessingElement::txDma()
   {
      tgPacket* packet = 0;
      double waitTime  = 0.0;

      while(true)
      {
	 if(_txDmaQueue.empty())
	 {
	    wait(_txDmaEvent);
	 }

	 while(!_txDmaQueue.empty())
	 {
	    packet = _txDmaQueue.front();
	    _txDmaQueue.pop();

	    while(_buffer->txSpaceLeft() < packet->size)
	    {				       
	       wait(*(_buffer->txGetReadEvent()));			
	    }	    	    

	    _buffer->txReserveSpace(packet->size);

	    waitTime =
	       (1.0 / getFrequency()) * 
	       (packet->size / (getTerminalWidth(getTerminal())/8));
	    
	    wait(waitTime, sc_core::SC_US);

	    _buffer->txFreeSpace(packet->size);
	    _buffer->txPutPacket(packet);
	    
	 }
      }
   }

   void ProcessingElement::rxDma()
   {
      tgPacket* packet = 0;
      double waitTime  = 0.0;

      while(true)
      {
	 if(_rxDmaQueue.empty())
	 {
	    wait(_rxDmaEvent);
	 }

	 while(!_rxDmaQueue.empty())
	 {
	    packet = _rxDmaQueue.front();
	    _rxDmaQueue.pop();
	    
	    while(_buffer->rxSpaceLeft() < packet->size)
	    {
	       wait(*(_buffer->rxGetReadEvent()));
	    }

	    //std::cout << "rxDma" << std::endl;
	    
	    _buffer->rxReserveSpace(packet->size);
	    
	    waitTime =
	       (1.0 / getFrequency()) * 
	       (packet->size / (getTerminalWidth(getTerminal())/8));
	    
	    wait(waitTime, sc_core::SC_US);
	    
	    _buffer->rxFreeSpace(packet->size);
	       
	    _buffer->addInternalBytes(packet->size);
	    _buffer->rxPutPacket(packet);
	    
	 }
      }
   }

   std::queue<tgToken>& ProcessingElement::getTokenQueue()
   {
      return _tokenQueue;
   }

   void ProcessingElement::updateUnfinishedTokensLatency()
   {
      _buffer->updateUnfinishedTokensLatency();
   }

}



// Local Variables:
// mode: c++
// c-file-style: "ellemtel"
// c-basic-offset: 3
// End:
