diff options
author | Benoit Foucher <benoit@zeroc.com> | 2005-12-16 15:20:55 +0000 |
---|---|---|
committer | Benoit Foucher <benoit@zeroc.com> | 2005-12-16 15:20:55 +0000 |
commit | 47eced97827ba72557f219cb6caa98232a504f74 (patch) | |
tree | 561156f48ef103bee7ebfe6babca14e99ea01152 /cpp/src | |
parent | Fixed bug wrt hash calculation which did not take into effect hosts can be (diff) | |
download | ice-47eced97827ba72557f219cb6caa98232a504f74.tar.bz2 ice-47eced97827ba72557f219cb6caa98232a504f74.tar.xz ice-47eced97827ba72557f219cb6caa98232a504f74.zip |
Added feature to allow disabling a server if it doesn't return with a
0 exit code or if it crashes.
Diffstat (limited to 'cpp/src')
-rw-r--r-- | cpp/src/IceGrid/Activator.cpp | 128 | ||||
-rw-r--r-- | cpp/src/IceGrid/Activator.h | 2 | ||||
-rw-r--r-- | cpp/src/IceGrid/IceGridNode.cpp | 46 | ||||
-rw-r--r-- | cpp/src/IceGrid/ServerAdapterI.cpp | 1 | ||||
-rw-r--r-- | cpp/src/IceGrid/ServerI.cpp | 62 | ||||
-rw-r--r-- | cpp/src/IceGrid/ServerI.h | 5 |
6 files changed, 140 insertions, 104 deletions
diff --git a/cpp/src/IceGrid/Activator.cpp b/cpp/src/IceGrid/Activator.cpp index a3c3b804573..0dc94443d97 100644 --- a/cpp/src/IceGrid/Activator.cpp +++ b/cpp/src/IceGrid/Activator.cpp @@ -20,9 +20,13 @@ #include <sys/types.h> #include <sys/stat.h> -#include <signal.h> #include <fcntl.h> +#ifndef _WIN32 +# include <sys/wait.h> +# include <signal.h> +#endif + using namespace std; using namespace Ice; using namespace IceGrid; @@ -54,7 +58,7 @@ private: #define ICE_STRING(X) #X -namespace +namespace IceGrid { #ifndef _WIN32 @@ -742,7 +746,7 @@ Activator::deactivate(const string& name, const Ice::ProcessPrx& process) // return; } -#endif +#endif // // Try to shut down the server gracefully using the process proxy. @@ -820,17 +824,10 @@ Activator::kill(const string& name) throw ex; } - BOOL b = TerminateProcess(hnd, 1); + BOOL b = TerminateProcess(hnd, 0); // We use 0 for the exit code to make sure it's not considered as a crash. CloseHandle(hnd); - if(!b) - { - SyscallException ex(__FILE__, __LINE__); - ex.error = getSystemErrno(); - throw ex; - } - if(_traceLevels->activator > 1) { Ice::Trace out(_traceLevels->logger, _traceLevels->activatorCat); @@ -962,19 +959,23 @@ Activator::destroy() void Activator::runTerminationListener() { - try - { - terminationListener(); - } - catch(const Exception& ex) - { - Error out(_traceLevels->logger); - out << "exception in process termination listener:\n" << ex; - } - catch(...) + while(true) { - Error out(_traceLevels->logger); - out << "unknown exception in process termination listener"; + try + { + terminationListener(); + break; + } + catch(const Exception& ex) + { + Error out(_traceLevels->logger); + out << "exception in process termination listener:\n" << ex; + } + catch(...) + { + Error out(_traceLevels->logger); + out << "unknown exception in process termination listener"; + } } } @@ -1052,7 +1053,7 @@ Activator::terminationListener() assert(pos < handles.size()); HANDLE hnd = handles[pos]; - vector<ServerIPtr> terminated; + vector<Process> terminated; bool deactivated = false; { IceUtil::Monitor< IceUtil::Mutex>::Lock sync(*this); @@ -1067,15 +1068,7 @@ Activator::terminationListener() { if(p->second.hnd == hnd) { - if(_traceLevels->activator > 0) - { - Ice::Trace out(_traceLevels->logger, _traceLevels->activatorCat); - out << "detected termination of server `" << p->first << "'"; - } - - terminated.push_back(p->second.server); - - CloseHandle(hnd); + terminated.push_back(p->second); _processes.erase(p); break; } @@ -1085,16 +1078,31 @@ Activator::terminationListener() deactivated = _deactivating && _processes.empty(); } - for(vector<ServerIPtr>::const_iterator p = terminated.begin(); p != terminated.end(); ++p) + for(vector<Process>::const_iterator p = terminated.begin(); p != terminated.end(); ++p) { + DWORD status; + BOOL b = GetExitCodeProcess(p->hnd, &status); + CloseHandle(p->hnd); + assert(status != STILL_ACTIVE); + + if(_traceLevels->activator > 0) + { + Ice::Trace out(_traceLevels->logger, _traceLevels->activatorCat); + out << "detected termination of server `" << p->server->getId() << "'"; + if(status != 0) + { + out << "\nexit code = " << status; + } + } + try { - (*p)->terminated(); + p->server->terminated("", status); } catch(const Ice::LocalException& ex) { Ice::Warning out(_traceLevels->logger); - out << "unexpected exception raised by server `" << (*p)->getId() << "' termination:\n" << ex; + out << "unexpected exception raised by server `" << p->server->getId() << "' termination:\n" << ex; } } @@ -1148,7 +1156,7 @@ Activator::terminationListener() throw ex; } - vector<pair<std::string, ServerIPtr> > terminated; + vector<Process> terminated; bool deactivated = false; { IceUtil::Monitor< IceUtil::Mutex>::Lock sync(*this); @@ -1210,17 +1218,7 @@ Activator::terminationListener() // If the pipe was closed, the process has terminated. // - if(_traceLevels->activator > 0) - { - Ice::Trace out(_traceLevels->logger, _traceLevels->activatorCat); - out << "detected termination of server `" << p->first << "':"; - if(!p->second.msg.empty()) - { - out << "\n" << p->second.msg; - } - } - - terminated.push_back(make_pair(p->second.msg, p->second.server)); + terminated.push_back(p->second); close(p->second.pipeFd); _processes.erase(p++); @@ -1233,16 +1231,44 @@ Activator::terminationListener() deactivated = _deactivating && _processes.empty(); } - for(vector<pair<string, ServerIPtr> >::const_iterator p = terminated.begin(); p != terminated.end(); ++p) + for(vector<Process>::const_iterator p = terminated.begin(); p != terminated.end(); ++p) { + int status; + pid_t pid = waitpid(p->pid, &status, 0); + if(pid < 0) + { + SyscallException ex(__FILE__, __LINE__); + ex.error = getSystemErrno(); + throw ex; + } + assert(pid == p->pid); + + if(_traceLevels->activator > 0) + { + Ice::Trace out(_traceLevels->logger, _traceLevels->activatorCat); + out << "detected termination of server `" << p->server->getId() << "'"; + if(!p->msg.empty()) + { + out << "\nreason = " << p->msg; + } + if(WIFEXITED(status) && status != 0) + { + out << "\nexit code = " << WEXITSTATUS(status); + } + else if(WIFSIGNALED(status)) + { + out << "\nsignal = " << signalToString(WTERMSIG(status)); + } + } + try { - p->second->terminated(p->first); + p->server->terminated(p->msg, status); } catch(const Ice::LocalException& ex) { Ice::Warning out(_traceLevels->logger); - out << "unexpected exception raised by server `" << p->second->getId() << "' termination:\n" << ex; + out << "unexpected exception raised by server `" << p->server->getId() << "' termination:\n" << ex; } } diff --git a/cpp/src/IceGrid/Activator.h b/cpp/src/IceGrid/Activator.h index 85133192e08..7ea4267ccf4 100644 --- a/cpp/src/IceGrid/Activator.h +++ b/cpp/src/IceGrid/Activator.h @@ -22,6 +22,8 @@ typedef IceUtil::Handle<TraceLevels> TraceLevelsPtr; class ServerI; typedef IceUtil::Handle<ServerI> ServerIPtr; +std::string signalToString(int); + class Activator : public IceUtil::Monitor< IceUtil::Mutex>, public IceUtil::Shared { public: diff --git a/cpp/src/IceGrid/IceGridNode.cpp b/cpp/src/IceGrid/IceGridNode.cpp index 1e51dd6d1a7..3535cf3ab4c 100644 --- a/cpp/src/IceGrid/IceGridNode.cpp +++ b/cpp/src/IceGrid/IceGridNode.cpp @@ -27,12 +27,7 @@ # define S_ISDIR(mode) ((mode) & _S_IFDIR) # define S_ISREG(mode) ((mode) & _S_IFREG) #else -# include <csignal> -# include <signal.h> -# include <sys/wait.h> -# include <sys/types.h> # include <sys/stat.h> -# include <unistd.h> #endif using namespace std; @@ -142,35 +137,6 @@ private: } // End of namespace IceGrid -#ifndef _WIN32 -extern "C" -{ - -static void -childHandler(int) -{ - // - // Call wait to de-allocate any resources allocated for the child - // process and avoid zombie processes. See man wait or waitpid for - // more information. - // - int olderrno = errno; - - pid_t pid; - do - { - pid = waitpid(-1, 0, WNOHANG); - } - while(pid > 0); - - assert(pid != -1 || errno == ECHILD); - - errno = olderrno; -} - -} -#endif - CollocatedRegistry::CollocatedRegistry(const CommunicatorPtr& communicator, const ActivatorPtr& activator) : RegistryI(communicator), _activator(activator) @@ -226,18 +192,6 @@ NodeService::shutdown() bool NodeService::start(int argc, char* argv[]) { -#ifndef _WIN32 - // - // This application forks, so we need a signal handler for child termination. - // - struct sigaction action; - action.sa_handler = childHandler; - sigemptyset(&action.sa_mask); - sigaddset(&action.sa_mask, SIGCHLD); - action.sa_flags = 0; - sigaction(SIGCHLD, &action, 0); -#endif - bool nowarn = false; bool checkdb = false; string desc; diff --git a/cpp/src/IceGrid/ServerAdapterI.cpp b/cpp/src/IceGrid/ServerAdapterI.cpp index 1bf8ab729c3..f016b95aa0e 100644 --- a/cpp/src/IceGrid/ServerAdapterI.cpp +++ b/cpp/src/IceGrid/ServerAdapterI.cpp @@ -208,6 +208,7 @@ ServerAdapterI::activationFailed(bool destroyed) } } + Lock sync(*this); for(vector<AMD_Adapter_activatePtr>::const_iterator p = _activateCB.begin(); p != _activateCB.end(); ++p) { (*p)->ice_response(0); diff --git a/cpp/src/IceGrid/ServerI.cpp b/cpp/src/IceGrid/ServerI.cpp index 8c7bb230b25..f08dfeeaf84 100644 --- a/cpp/src/IceGrid/ServerI.cpp +++ b/cpp/src/IceGrid/ServerI.cpp @@ -24,6 +24,7 @@ #ifdef _WIN32 # include <direct.h> +# include <signal.h> #else # include <unistd.h> # include <dirent.h> @@ -493,11 +494,14 @@ ServerI::ServerI(const NodeIPtr& node, const ServerPrx& proxy, const string& ser _id(id), _waitTime(wt), _serversDir(serversDir), + _disableOnFailure(0), _state(ServerI::Inactive), _activation(ServerI::Manual), _pid(0) { assert(_node->getActivator()); + const_cast<int&>(_disableOnFailure) = + _node->getCommunicator()->getProperties()->getPropertyAsIntWithDefault("IceGrid.Node.DisableOnFailure", 0); } ServerI::~ServerI() @@ -517,6 +521,21 @@ ServerI::start_async(const AMD_Server_startPtr& amdCB, const Ice::Current&) } // + // The server is disabled because it failed and if the time of + // the failure is now past the configured duration or if the + // server is manualy started, we re-enable the server. + // + if(_activation == Disabled && + _failureTime != IceUtil::Time() && + (amdCB || + (_disableOnFailure > 0 && + (_failureTime + IceUtil::Time::seconds(_disableOnFailure) < IceUtil::Time::now())))) + { + _failureTime = IceUtil::Time(); + _activation = _previousActivation; + } + + // // If the amd callback is set, it's a remote start call to // manually activate the server. Otherwise it's a call to // activate the server on demand (called from ServerAdapterI). @@ -629,6 +648,8 @@ ServerI::setEnabled(bool enabled, const ::Ice::Current&) { return; } + + _failureTime = IceUtil::Time(); _activation = enabled ? (_desc->activation == "on-demand" ? OnDemand : Manual) : Disabled; } @@ -688,7 +709,7 @@ ServerI::ServerActivation ServerI::getActivationMode() const { Lock sync(*this); - return _activation; + return _desc->activation == "on-demand" ? OnDemand : Manual; } const string& @@ -1140,7 +1161,7 @@ ServerI::destroy() } void -ServerI::terminated(const string& msg) +ServerI::terminated(const string& msg, int status) { ServerAdapterDict adpts; { @@ -1175,16 +1196,45 @@ ServerI::terminated(const string& msg) _process = 0; _pid = 0; + if(_disableOnFailure != 0 && _activation != Disabled) + { + bool failed = false; +#ifndef _WIN32 + failed = WIFEXITED(status) && WEXITSTATUS(status) != 0; + if(WIFSIGNALED(status)) + { + int s = WTERMSIG(status); + failed = s == SIGABRT || s == SIGILL || s == SIGBUS || s == SIGFPE || s == SIGSEGV; + } +#else + failed = status != 0; +#endif + if(failed) + { + _previousActivation = _activation; + _activation = Disabled; + _failureTime = IceUtil::Time::now(); + } + } + if(_state != ServerI::Destroying) { - if(msg.empty()) + ostringstream os; + os << "The server terminated unexpectedly"; +#ifndef _WIN32 + if(WIFEXITED(status)) { - setStateNoSync(ServerI::Inactive, "The server terminated unexpectedly."); + os << " with exit code " << WEXITSTATUS(status); } - else + else if(WIFSIGNALED(status)) { - setStateNoSync(ServerI::Inactive, "The server terminated unexpectedly:\n" + msg); + os << " with signal " << signalToString(WTERMSIG(status)); } +#else + os << " with exit code " << status; +#endif + os << (msg.empty() ? "." : ":\n" + msg); + setStateNoSync(ServerI::Inactive, os.str()); command = nextCommand(); } else diff --git a/cpp/src/IceGrid/ServerI.h b/cpp/src/IceGrid/ServerI.h index 7de46709cd2..5d50ad740c5 100644 --- a/cpp/src/IceGrid/ServerI.h +++ b/cpp/src/IceGrid/ServerI.h @@ -100,7 +100,7 @@ public: void deactivate(); void update(); void destroy(); - void terminated(const std::string& = std::string()); + void terminated(const std::string&, int); private: @@ -124,6 +124,7 @@ private: const std::string _id; const Ice::Int _waitTime; const std::string _serversDir; + const int _disableOnFailure; std::string _serverDir; std::string _application; @@ -137,6 +138,8 @@ private: bool _processRegistered; Ice::ProcessPrx _process; std::set<std::string> _activeAdapters; + IceUtil::Time _failureTime; + ServerActivation _previousActivation; DestroyCommandPtr _destroy; StopCommandPtr _stop; |