diff options
author | Benoit Foucher <benoit@zeroc.com> | 2012-05-25 14:20:45 +0200 |
---|---|---|
committer | Benoit Foucher <benoit@zeroc.com> | 2012-05-25 14:20:45 +0200 |
commit | 60a630deeffc0e847dbcabcc0ac6e34bdc9aa068 (patch) | |
tree | 267506bf88b16a268014b0cc67dee1e23a72ebb5 /cpp/src | |
parent | vsaddin bug fixes: (diff) | |
download | ice-60a630deeffc0e847dbcabcc0ac6e34bdc9aa068.tar.bz2 ice-60a630deeffc0e847dbcabcc0ac6e34bdc9aa068.tar.xz ice-60a630deeffc0e847dbcabcc0ac6e34bdc9aa068.zip |
Fixed ICE-4835, round-robin policy bug when nodes are down
Diffstat (limited to 'cpp/src')
-rw-r--r-- | cpp/src/IceGrid/AdapterCache.cpp | 114 | ||||
-rw-r--r-- | cpp/src/IceGrid/AdapterCache.h | 3 |
2 files changed, 75 insertions, 42 deletions
diff --git a/cpp/src/IceGrid/AdapterCache.cpp b/cpp/src/IceGrid/AdapterCache.cpp index 28572d48dd5..5a1f3d6528c 100644 --- a/cpp/src/IceGrid/AdapterCache.cpp +++ b/cpp/src/IceGrid/AdapterCache.cpp @@ -437,7 +437,8 @@ ReplicaGroupEntry::ReplicaGroupEntry(AdapterCache& cache, const string& application, const LoadBalancingPolicyPtr& policy) : AdapterEntry(cache, id, application), - _lastReplica(0) + _lastReplica(0), + _requestInProgress(false) { update(policy); } @@ -572,6 +573,12 @@ ReplicaGroupEntry::getLocatorAdapterInfo(LocatorAdapterInfoSeq& adapters, int& n replicas.reserve(_replicas.size()); if(RoundRobinLoadBalancingPolicyPtr::dynamicCast(_loadBalancing)) { + // Serialize round-robin requests + while(_requestInProgress) + { + wait(); + } + _requestInProgress = true; for(unsigned int i = 0; i < _replicas.size(); ++i) { replicas.push_back(_replicas[(_lastReplica + i) % _replicas.size()]); @@ -600,53 +607,78 @@ ReplicaGroupEntry::getLocatorAdapterInfo(LocatorAdapterInfoSeq& adapters, int& n } } - if(adaptive) + int unreachable = 0; + bool synchronizing = false; + try { + if(adaptive) + { + // + // This must be done outside the synchronization block since + // the trasnform() might call and lock each server adapter + // entry. We also can't sort directly as the load of each + // server adapter is not stable so we first take a snapshot of + // each adapter and sort the snapshot. + // + vector<pair<float, ServerAdapterEntryPtr> > rl; + transform(replicas.begin(), replicas.end(), back_inserter(rl), TransformToReplicaLoad(loadSample)); + sort(rl.begin(), rl.end(), ReplicaLoadComp()); + replicas.clear(); + transform(rl.begin(), rl.end(), back_inserter(replicas), TransformToReplica()); + } + // - // This must be done outside the synchronization block since - // the trasnform() might call and lock each server adapter - // entry. We also can't sort directly as the load of each - // server adapter is not stable so we first take a snapshot of - // each adapter and sort the snapshot. + // Retrieve the proxy of each adapter from the server. The adapter + // might not exist anymore at this time or the node might not be + // reachable. // - vector<pair<float, ServerAdapterEntryPtr> > rl; - transform(replicas.begin(), replicas.end(), back_inserter(rl), TransformToReplicaLoad(loadSample)); - sort(rl.begin(), rl.end(), ReplicaLoadComp()); - replicas.clear(); - transform(rl.begin(), rl.end(), back_inserter(replicas), TransformToReplica()); + set<string> emptyExcludes; + bool firstUnreachable = true; + for(vector<ServerAdapterEntryPtr>::const_iterator p = replicas.begin(); p != replicas.end(); ++p) + { + if(!roundRobin || excludes.find((*p)->getId()) == excludes.end()) + { + try + { + int dummy; + bool dummy2; + bool dummy3; + (*p)->getLocatorAdapterInfo(adapters, dummy, dummy2, dummy3, emptyExcludes); + firstUnreachable = false; + } + catch(const SynchronizationException&) + { + synchronizing = true; + } + catch(const Ice::UserException&) + { + if(firstUnreachable) + { + ++unreachable; // Count the number of un-reachable nodes. + } + } + } + } + } + catch(...) + { + if(roundRobin) + { + Lock sync(*this); + assert(_requestInProgress); + _requestInProgress = false; + } + throw; } - // - // Retrieve the proxy of each adapter from the server. The adapter - // might not exist anymore at this time or the node might not be - // reachable. - // - bool synchronizing = false; - set<string> emptyExcludes; - for(vector<ServerAdapterEntryPtr>::const_iterator p = replicas.begin(); p != replicas.end(); ++p) + if(roundRobin) { - if(!roundRobin || excludes.find((*p)->getId()) == excludes.end()) + Lock sync(*this); + assert(_requestInProgress); + _requestInProgress = false; + if(unreachable > 0) { - try - { - int dummy; - bool dummy2; - bool dummy3; - (*p)->getLocatorAdapterInfo(adapters, dummy, dummy2, dummy3, emptyExcludes); - } - catch(const AdapterNotExistException&) - { - } - catch(const NodeUnreachableException&) - { - } - catch(const DeploymentException&) - { - } - catch(const SynchronizationException&) - { - synchronizing = true; - } + _lastReplica = (_lastReplica + unreachable) % static_cast<int>(_replicas.size()); } } diff --git a/cpp/src/IceGrid/AdapterCache.h b/cpp/src/IceGrid/AdapterCache.h index 723a85ebbe4..9a42d62b645 100644 --- a/cpp/src/IceGrid/AdapterCache.h +++ b/cpp/src/IceGrid/AdapterCache.h @@ -91,7 +91,7 @@ private: }; typedef IceUtil::Handle<ServerAdapterEntry> ServerAdapterEntryPtr; -class ReplicaGroupEntry : public AdapterEntry, public IceUtil::Mutex +class ReplicaGroupEntry : public AdapterEntry, public IceUtil::Monitor<IceUtil::Mutex> { public: @@ -117,6 +117,7 @@ private: LoadSample _loadSample; std::vector<ServerAdapterEntryPtr> _replicas; int _lastReplica; + bool _requestInProgress; }; typedef IceUtil::Handle<ReplicaGroupEntry> ReplicaGroupEntryPtr; |