301 lines
8.4 KiB
C++
301 lines
8.4 KiB
C++
/*
|
|
* File: FailureDetector.cc
|
|
* Author: jgaebler
|
|
*
|
|
* Created on July 31, 2012, 9:42 AM
|
|
*/
|
|
|
|
#include "fd/FailureDetector.h"
|
|
|
|
#include "Moversight.h"
|
|
#include "Dispatcher.h"
|
|
|
|
#include "fd/NetworkFailureDetector.h"
|
|
#include "fd/NetworkFailureDetectorType.h"
|
|
#include "fd/nfd/bertier/BertierFailureDetector.h"
|
|
#include "fd/nfd/staticFD/StaticIntervalDetector.h"
|
|
#include "fd/partition/NeighborhoodDetector.h"
|
|
#include "fd/partition/PartitionDetector.h"
|
|
#include "fd/partition/timer/PartitionTimer.h"
|
|
#include "fd/partition/msg/NDMessage.h"
|
|
#include "fd/partition/msg/NDMessageConfirm.h"
|
|
|
|
#include "mob/events/UnableToReconnectToGroupEvent.h"
|
|
#include "ms/events/PeerJoinedEvent.h"
|
|
#include "ms/events/PeerLeftEvent.h"
|
|
#include "ms/events/LocalPeerUpdatedEvent.h"
|
|
#include "ms/events/JoinGroupDoneEvent.h"
|
|
#include "mt/events/PendingPeersEvent.h"
|
|
#include "GroupClosedEvent.h"
|
|
|
|
namespace ubeeme {
|
|
namespace moversight {
|
|
|
|
#undef DEBUG
|
|
#define DEBUG(msg) if (module.isPrintDebugNFD()) MOV_DEBUG << "NFD@" << getLocalID() << " "<<msg<<endl;
|
|
|
|
/**
|
|
* @brief Constructor
|
|
* @param d A reference of the dispatcher
|
|
*/
|
|
FailureDetector::FailureDetector(Dispatcher & d) : MoversightService(d, "FailureDetector"), nfd(NULL), pd(NULL) {
|
|
}
|
|
|
|
/**
|
|
* @brief Destructor
|
|
*/
|
|
FailureDetector::~FailureDetector() {
|
|
}
|
|
|
|
/**
|
|
* @brief Initialize the service
|
|
*/
|
|
void
|
|
FailureDetector::initialise() {
|
|
|
|
dispatcher.subscribe<PeerFailedEvent>(this);
|
|
dispatcher.subscribe<GroupClosedEvent>(this);
|
|
|
|
//
|
|
// if (pd != NULL) {
|
|
//
|
|
// pd->finalise();
|
|
// delete pd;
|
|
//
|
|
// }//End if
|
|
// pd = new PartitionDetector(dispatcher);
|
|
// pd->initialise();
|
|
//
|
|
if (nfd != NULL) {
|
|
nfd->finalise();
|
|
delete nfd;
|
|
}//End if
|
|
|
|
switch (module.getFailureDetectorType()) {
|
|
|
|
case BERTIER_DETECTOR:
|
|
{
|
|
nfd = new BertierFailureDetector(dispatcher);
|
|
break;
|
|
}//End case
|
|
case STATIC_INTERVAL_DETECTOR:
|
|
case UNDEFINED_DETECTOR:
|
|
default:
|
|
{
|
|
nfd = new StaticIntervalDetector(dispatcher);
|
|
break;
|
|
}//End default
|
|
}//end switch
|
|
nfd->initialise();
|
|
}
|
|
|
|
/**
|
|
* @brief Finalize the service
|
|
*/
|
|
void
|
|
FailureDetector::finalise() {
|
|
|
|
if (nfd != NULL) {
|
|
nfd->stop();
|
|
nfd->finalise();
|
|
delete nfd;
|
|
nfd = NULL;
|
|
}//End if
|
|
|
|
// if (pd != NULL) {
|
|
// pd->finalise();
|
|
// delete pd;
|
|
// pd = NULL;
|
|
// }
|
|
|
|
dispatcher.unsubscribeAll(this);
|
|
}
|
|
|
|
/**
|
|
* @brief Handle each incoming group message
|
|
* @param pdu A received group message
|
|
*/
|
|
void
|
|
FailureDetector::handleMessage( const MoversightMessage* const pdu) {
|
|
|
|
// if( pdu->getType() == ND) {
|
|
// pd->handleNDMessage( dynamic_cast< const NDMessage*>( pdu));
|
|
// }
|
|
// else if( pdu->getType() == NDC) {
|
|
// pd->handleNDCMessage( dynamic_cast< const NDMessageConfirm*>( pdu));
|
|
// }
|
|
// else {
|
|
nfd->handleMessage( pdu);
|
|
// }
|
|
}
|
|
|
|
// /**
|
|
// * @brief A slot for receiving the peer reconnected event.
|
|
// * @param reconnectedPeerID The ID of the reconnected peer.
|
|
// */
|
|
// void
|
|
// FailureDetector::peerReconnected(PeerID reconnectedPeerID) {
|
|
// nfd->peerReconnected(reconnectedPeerID);
|
|
// }
|
|
|
|
/**
|
|
* @brief Callback method for the FD to tell the nfd that a monitored
|
|
* peer have failed.
|
|
* @param pId The id of the failed peer.
|
|
*/
|
|
void
|
|
FailureDetector::handleEvent(const PeerFailedEvent & e) {
|
|
std::cerr<<getLocalID()<<": peer failed event not handled for failed peer "<<e.getFailedPeer()<<std::endl;
|
|
// //Peer & p = dispatcher.getMembershipService().getPeer(pId);
|
|
// //p.setState(PENDING);
|
|
//
|
|
// //TODO missing PendingAnnounce --> checkForPartition() only when all agree on the pending peer!!!!
|
|
// //TODO add peer failed handling !!
|
|
//// dispatcher.getMembershipService().peerFailed(pId);
|
|
////
|
|
//// connectionLost();
|
|
}
|
|
|
|
|
|
// /**
|
|
// * @brief Starts the nfd service
|
|
// */
|
|
// void
|
|
// FailureDetector::start() {
|
|
//
|
|
//// if (nfd == NULL) {
|
|
//// throw NullPointerException("start - no network failure detector found");
|
|
//// }//End if
|
|
////
|
|
//// nfd->start();
|
|
//
|
|
// }
|
|
|
|
// /**
|
|
// * @brief Stops the nfd service
|
|
// */
|
|
// void
|
|
// FailureDetector::stop() {
|
|
//
|
|
//
|
|
//// pd->finalise();
|
|
// }
|
|
|
|
/**
|
|
* @brief Assignment operator.
|
|
* @param other The instance to assign
|
|
* @return A reference to the local instance.
|
|
*/
|
|
MoversightService &
|
|
FailureDetector::operator=(const FailureDetector & other) {
|
|
|
|
if (this == &other) {
|
|
return *this;
|
|
}
|
|
|
|
// nfd = other.nfd;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
// /**
|
|
// * @brief Receives the signal connection lost and starts partition detection of the PartitionDetector.
|
|
// */
|
|
// void
|
|
// FailureDetector::connectionLost() {
|
|
//// pd->detectPartition();
|
|
// }
|
|
|
|
/**
|
|
* @brief If a peer of the secondary group is leaving. Updating the reachable
|
|
* and disconnected lists of the partitionTimer need to be updated.
|
|
* @param pID The leaving Peer
|
|
*/
|
|
void
|
|
FailureDetector::updatePartitionDetector(PeerID pID) {
|
|
|
|
// if (pd != NULL && pd->partitionTimer != NULL) {
|
|
//
|
|
// pd->partitionTimer->updateListsDueToDisconnectedPeer(pID);
|
|
// }
|
|
}
|
|
|
|
/**
|
|
* @brief Starts the neighborhoodDetection service trying to reach the nonreachable peers again.
|
|
* @param unreachable List of peerIDs that do not belong to our part
|
|
*/
|
|
void
|
|
FailureDetector::startND(const PeerIDList & unreachable) {
|
|
|
|
// if (pd != NULL && pd->partitionTimer != NULL) {
|
|
// pd->partitionTimer->setReReachablePeerID(UNDEFINED_PEER_ID);
|
|
// }
|
|
// if (pd != NULL && pd->nd != NULL) {
|
|
// pd->nd->start(unreachable);
|
|
// }
|
|
}
|
|
|
|
/**
|
|
* @brief Stops the neighborhoodDetection service, either because the others could be reached again
|
|
* or the rejoinTimer timed out.
|
|
*/
|
|
void
|
|
FailureDetector::stopND() {
|
|
// pd->nd->stop();
|
|
}
|
|
|
|
/**
|
|
* @brief Not only stopping but also deleting the things of the neighborhood detector.
|
|
*/
|
|
void
|
|
FailureDetector::finaliseND() {
|
|
|
|
// if (pd != NULL && pd->nd != NULL) {
|
|
// pd->nd->finalise();
|
|
// }
|
|
}
|
|
|
|
/**
|
|
* @brief Handle GroupClosedEvent
|
|
*
|
|
* The group was closed. Stop all sub-services.
|
|
*
|
|
* @param e The event.
|
|
*/
|
|
void
|
|
FailureDetector::handleEvent( const GroupClosedEvent & e) {
|
|
nfd->stop();
|
|
}
|
|
|
|
|
|
/**
|
|
* @brief Permits access to the moversight network failure detector.
|
|
* @return The current network failure detector instance.
|
|
*/
|
|
NetworkFailureDetector &
|
|
FailureDetector::getNetworkFailureDetector() {
|
|
if(nfd != NULL){
|
|
return *nfd;
|
|
}
|
|
|
|
throw NullPointerException("nfd not set within the failure detector.");
|
|
}
|
|
|
|
/**
|
|
* @brief Permits access to the moversight network failure detector.
|
|
* @return The current network failure detector instance.
|
|
*/
|
|
const NetworkFailureDetector &
|
|
FailureDetector::getNetworkFailureDetector() const {
|
|
|
|
if(nfd != NULL){
|
|
return *nfd;
|
|
}
|
|
|
|
throw NullPointerException("nfd not set within the failure detector.");
|
|
}
|
|
|
|
}
|
|
}
|