Files
scandocs/uni/masterarbeit/source/moversight/fd/partition/PartitionDetector.cc
2014-06-30 13:58:10 +02:00

340 lines
11 KiB
C++

/*
* File: PartitionDetector.cc
* Author: Grit Schneider
*
* Created on August 2, 2012, 3:31 PM
*/
#include "PartitionDetector.h"
#include "Dispatcher.h"
#include "Moversight.h"
#include "ms/MembershipService.h"
#include "ms/MemberRegister.h"
#include "fd/partition/NeighborhoodDetector.h"
#include "fd/partition/timer/PartitionTimer.h"
#include "fd/partition/msg/NDMessage.h"
#include "fd/partition/msg/NDMessageConfirm.h"
#include "fd/partition/events/PartitionDetectedEvent.h"
#include "fd/partition/events/NeighborReachableAgainEvent.h"
namespace ubeeme {
namespace moversight {
#undef DEBUG
#define DEBUG(msg) if (module.isPrintDebugNFD()) MOV_DEBUG << "PD@" << getLocalID() << " "<<msg<<endl;
/**
* @brief Constructor
* @param d A reference to the dispatcher.
*/
PartitionDetector::PartitionDetector(Dispatcher & d) : MoversightService(d, "PartitionDetector"), partitionTimer(NULL), nd(NULL) {
}
/**
* @brief Destructor
*/
PartitionDetector::~PartitionDetector() {
}
/**
* @brief Initialise the PartitionDetector
*/
void
PartitionDetector::initialise() {
if(nd != NULL){
nd->finalise();
delete nd;
}//End if
nd = new NeighborhoodDetector(dispatcher);
nd->initialise();
if(partitionTimer != NULL){
stopAndDeletePartitionTimer();
}//End if
partitionTimer = new PartitionTimer(*this);
}
/**
* @brief Runs operations to finalise the PartitionDetector
*/
void
PartitionDetector::finalise() {
dispatcher.unsubscribeAll(this);
DEBUG("finalise - stop PartitionDetector");
stopAndDeletePartitionTimer();
//stop the nd
if (nd != NULL) {
nd->finalise();
delete nd;
nd = NULL;
}
}
/**
* @brief Starts the Partition Detection Service.Adds the peers to the lists,
* checks whether its a signaled connection-loss by the network-services,
* and if not start sending the ND-Messages to the slaves in the current
* cluster and to all other masters.
*/
void
PartitionDetector::detectPartition() {
if (checkIfConnectionLost()) {
partitionTimer->stop();
dispatcher.signal( new PartitionDetectedEvent( partitionTimer->getNonReachablePeerIDList()));
return;
}
if (!partitionTimer->isRunning()) {
partitionTimer->setNonReachablePeersFromMR();
partitionTimer->setIsMandatory(true); // first round asking for your neighbors is mandatory!!
partitionTimer->start();
nd->sendNDMessage(partitionTimer->getNonReachablePeerIDList());
}
}
/**
* @brief Assignment operator
* @param other The instance to assign
* @return A reference to the local instance
*/
PartitionDetector &
PartitionDetector::operator =(PartitionDetector & other) {
if (this == &other) {
return *this;
}
this->partitionTimer = other.partitionTimer;
return *this;
}//End
/**
* @brief Method to get the MembershipService from the dispatcher.
* @return membershipservice
*/
MembershipService &
PartitionDetector::getMembershipService() {
return dispatcher.getMembershipService();
}
/**
* @brief Handle a partition timer event.
* @param timer The timer to handle.
*/
void
PartitionDetector::handlePartitionTimer(PartitionTimer* timer) {
if (timer->isMandatory()) {
determineAndSendToMissingMastersSlaves();
} else {
partitionTimer->stop();
dispatcher.signal( new PartitionDetectedEvent( partitionTimer->getNonReachablePeerIDList()));
}
}
/**
* @brief Handle each incoming nd message.
* @param ndm A received nd message.
*/
void
PartitionDetector::handleNDMessage(const NDMessage * ndm) {
DEBUG("handleNDMessage");
if (getLocalSubState() == REJOIN_IN_PROGRESS) {
//drop
return;
}//end if
PeerID senderID = ndm->getSourceID();
// equal whether its in the detection or in the rejoin part
nd->sendNDMessageConfirm(senderID);
// set the peer as reReachable to make sure we're contacting the right one afterwards
partitionTimer->setReReachablePeerID(senderID);
}
/**
* @brief Handle incoming NDC message.
* @param ndmc A received ndc message.
*/
void
PartitionDetector::handleNDCMessage(const NDMessageConfirm * ndmc) {
DEBUG("handleNDCMessage");
if (getLocalSubState() == REJOIN_IN_PROGRESS) {
//drop
return;
}//end if
PeerID senderID = ndmc->getSourceID();
if (getLocalSubState() != WAITING_FOR_REJOIN) {
// updating lists only during neighborhood detection not during the refinding of the group!
partitionTimer->markAsReachable(senderID);
} else if (getLocalSubState() == WAITING_FOR_REJOIN) {
// success: primary group could be reached again!
if (isPeerMemberOfOldGroup(senderID)) {
if ((partitionTimer->getReReachablePeerID() == senderID && getLocalID() < senderID) || partitionTimer->getReReachablePeerID() == 0) {
DEBUG("yippie - another group reachable again! ");
setLocalSubState(REJOIN_IN_PROGRESS);
dispatcher.signal( new NeighborReachableAgainEvent( senderID, partitionTimer->getDisconnectedPeers()));
}
} else {
throw PeerNotFoundException("The Peer sending us a ND wasn't member of the old group!");
}
} else if (partitionTimer->isNonReachableQueueEmpty() && !partitionTimer->isMandatory()) {
// partition case: success if all the peers answered with NDC
partitionTimer->stop();
resetAllStates();
}
}
/**
* @brief Method to clean up the whole PeerState to (JOINED,NO_SUBSTATE,NO_STATE_OPERATION).
*/
void
PartitionDetector::resetAllStates() {
MemberRegister & mr = dispatcher.getMembershipService().getCurrentMemberRegister();
PeerState state(JOINED);
mr.setPeerStateOfAllPeers(state);
}
/**
* @brief Method checks if signal came directly from the dispatcher, that the connection
* was lost for sure - no searching for the partition needed, we know which
* peer has lost its connection.
* @return true - if it was a signal from the dispatcher, and one of the peers
* has the state "DISCONNECTED", false otherwise
*/
bool
PartitionDetector::checkIfConnectionLost() {
PeerIDList allPeers = getMembershipService().getClusterAndMasterPeerIDList(getLocalPeer());
for (size_t i = 0; i < allPeers.size(); i++) {
PeerID pId = allPeers.get(i);
Peer p = getMembershipService().getPeer(pId);
if (p.getPeerState() == DISCONNECTED) {
partitionTimer->updateDueToConnectionLoss(pId);
return true;
}
}
return false;
}
/**
* @brief Method that determines whether a master was missing or not. And
* for all missing masters, their slaves are added to the list, to which a
* ND-Message is sent.
*/
void
PartitionDetector::determineAndSendToMissingMastersSlaves() {
partitionTimer->stop();
if (partitionTimer->getNonReachableMasters().size() != 0) {
addingMissingMastersSlaves();
}
addingPendingSlavesFromOtherClusters();
nd->sendNDMessage(partitionTimer->getNonReachablePeerIDList());
partitionTimer->start();
partitionTimer->setIsMandatory(false);
}
/**
* @brief Adding the slaves from a master we can't reach to the unreachable list.
*/
void
PartitionDetector::addingMissingMastersSlaves() {
PeerIDList nonReachableMasters = partitionTimer->getNonReachableMasters();
PeerIDList otherClustersSlaves;
for (size_t i = 0; i < nonReachableMasters.size(); i++) {
PeerID masterID = nonReachableMasters.get(i);
ClusterID cID = getMembershipService().findPeer(masterID);
if (getLocalMasterID() != masterID) {
otherClustersSlaves.add(getMembershipService().getClusterPeerIDListSlavesOnly(cID));
}
}
partitionTimer->addingPeerSetToNonReachableList(otherClustersSlaves);
}
/**
* @brief Also slaves from other clusters where the master is joined need to be checked whether they are pending.
* And if they are Pending we need to put them on the unreachable list of the partitionTimer!
*/
void
PartitionDetector::addingPendingSlavesFromOtherClusters() {
ClusterList & cList = dispatcher.getMembershipService().getCurrentMemberRegister().getClusters();
PeerIDList otherClustersSlaves = partitionTimer->getNonReachablePeerIDList();
PeerIDList missedPeers;
for (size_t i = 0; i < cList.size(); i++) {
const PeerList & pList = cList.get(i).getPeerList();
for (size_t j = 0; j < pList.size(); j++) {
if (pList.get(j).getState() == PENDING && !otherClustersSlaves.contains(pList.get(j).getPeerID())) {
missedPeers.add(pList.get(j).getPeerID());
}
}
}
if (missedPeers.size() != 0) {
partitionTimer->addingPeerSetToNonReachableList(missedPeers);
}
}
/**
* @brief Checks whether the peer was a member of the original group (the one before the partition).
* @param searchedPeer - the peer ID searched for
* @return true if it was a member, false if not
*/
bool
PartitionDetector::isPeerMemberOfOldGroup(PeerID searchedPeer) {
MemberRegister oldMR = dispatcher.getMembershipService().getLastMemberRegister();
if (oldMR.contains(searchedPeer)) {
return true;
}
return false;
}
/**
* @brief Method tries to stop and delete the current running partitionTImer.
*/
void
PartitionDetector::stopAndDeletePartitionTimer() {
DEBUG("stopTimer- try to stop the timer PartitionTimer");
if (partitionTimer != NULL) {
partitionTimer->stop();
delete partitionTimer;
partitionTimer = NULL;
}
}
}
}