summaryrefslogtreecommitdiff
path: root/cpp/src
diff options
context:
space:
mode:
authorBenoit Foucher <benoit@zeroc.com>2005-12-16 15:20:55 +0000
committerBenoit Foucher <benoit@zeroc.com>2005-12-16 15:20:55 +0000
commit47eced97827ba72557f219cb6caa98232a504f74 (patch)
tree561156f48ef103bee7ebfe6babca14e99ea01152 /cpp/src
parentFixed bug wrt hash calculation which did not take into effect hosts can be (diff)
downloadice-47eced97827ba72557f219cb6caa98232a504f74.tar.bz2
ice-47eced97827ba72557f219cb6caa98232a504f74.tar.xz
ice-47eced97827ba72557f219cb6caa98232a504f74.zip
Added feature to allow disabling a server if it doesn't return with a
0 exit code or if it crashes.
Diffstat (limited to 'cpp/src')
-rw-r--r--cpp/src/IceGrid/Activator.cpp128
-rw-r--r--cpp/src/IceGrid/Activator.h2
-rw-r--r--cpp/src/IceGrid/IceGridNode.cpp46
-rw-r--r--cpp/src/IceGrid/ServerAdapterI.cpp1
-rw-r--r--cpp/src/IceGrid/ServerI.cpp62
-rw-r--r--cpp/src/IceGrid/ServerI.h5
6 files changed, 140 insertions, 104 deletions
diff --git a/cpp/src/IceGrid/Activator.cpp b/cpp/src/IceGrid/Activator.cpp
index a3c3b804573..0dc94443d97 100644
--- a/cpp/src/IceGrid/Activator.cpp
+++ b/cpp/src/IceGrid/Activator.cpp
@@ -20,9 +20,13 @@
#include <sys/types.h>
#include <sys/stat.h>
-#include <signal.h>
#include <fcntl.h>
+#ifndef _WIN32
+# include <sys/wait.h>
+# include <signal.h>
+#endif
+
using namespace std;
using namespace Ice;
using namespace IceGrid;
@@ -54,7 +58,7 @@ private:
#define ICE_STRING(X) #X
-namespace
+namespace IceGrid
{
#ifndef _WIN32
@@ -742,7 +746,7 @@ Activator::deactivate(const string& name, const Ice::ProcessPrx& process)
//
return;
}
-#endif
+#endif
//
// Try to shut down the server gracefully using the process proxy.
@@ -820,17 +824,10 @@ Activator::kill(const string& name)
throw ex;
}
- BOOL b = TerminateProcess(hnd, 1);
+ BOOL b = TerminateProcess(hnd, 0); // We use 0 for the exit code to make sure it's not considered as a crash.
CloseHandle(hnd);
- if(!b)
- {
- SyscallException ex(__FILE__, __LINE__);
- ex.error = getSystemErrno();
- throw ex;
- }
-
if(_traceLevels->activator > 1)
{
Ice::Trace out(_traceLevels->logger, _traceLevels->activatorCat);
@@ -962,19 +959,23 @@ Activator::destroy()
void
Activator::runTerminationListener()
{
- try
- {
- terminationListener();
- }
- catch(const Exception& ex)
- {
- Error out(_traceLevels->logger);
- out << "exception in process termination listener:\n" << ex;
- }
- catch(...)
+ while(true)
{
- Error out(_traceLevels->logger);
- out << "unknown exception in process termination listener";
+ try
+ {
+ terminationListener();
+ break;
+ }
+ catch(const Exception& ex)
+ {
+ Error out(_traceLevels->logger);
+ out << "exception in process termination listener:\n" << ex;
+ }
+ catch(...)
+ {
+ Error out(_traceLevels->logger);
+ out << "unknown exception in process termination listener";
+ }
}
}
@@ -1052,7 +1053,7 @@ Activator::terminationListener()
assert(pos < handles.size());
HANDLE hnd = handles[pos];
- vector<ServerIPtr> terminated;
+ vector<Process> terminated;
bool deactivated = false;
{
IceUtil::Monitor< IceUtil::Mutex>::Lock sync(*this);
@@ -1067,15 +1068,7 @@ Activator::terminationListener()
{
if(p->second.hnd == hnd)
{
- if(_traceLevels->activator > 0)
- {
- Ice::Trace out(_traceLevels->logger, _traceLevels->activatorCat);
- out << "detected termination of server `" << p->first << "'";
- }
-
- terminated.push_back(p->second.server);
-
- CloseHandle(hnd);
+ terminated.push_back(p->second);
_processes.erase(p);
break;
}
@@ -1085,16 +1078,31 @@ Activator::terminationListener()
deactivated = _deactivating && _processes.empty();
}
- for(vector<ServerIPtr>::const_iterator p = terminated.begin(); p != terminated.end(); ++p)
+ for(vector<Process>::const_iterator p = terminated.begin(); p != terminated.end(); ++p)
{
+ DWORD status;
+ BOOL b = GetExitCodeProcess(p->hnd, &status);
+ CloseHandle(p->hnd);
+ assert(status != STILL_ACTIVE);
+
+ if(_traceLevels->activator > 0)
+ {
+ Ice::Trace out(_traceLevels->logger, _traceLevels->activatorCat);
+ out << "detected termination of server `" << p->server->getId() << "'";
+ if(status != 0)
+ {
+ out << "\nexit code = " << status;
+ }
+ }
+
try
{
- (*p)->terminated();
+ p->server->terminated("", status);
}
catch(const Ice::LocalException& ex)
{
Ice::Warning out(_traceLevels->logger);
- out << "unexpected exception raised by server `" << (*p)->getId() << "' termination:\n" << ex;
+ out << "unexpected exception raised by server `" << p->server->getId() << "' termination:\n" << ex;
}
}
@@ -1148,7 +1156,7 @@ Activator::terminationListener()
throw ex;
}
- vector<pair<std::string, ServerIPtr> > terminated;
+ vector<Process> terminated;
bool deactivated = false;
{
IceUtil::Monitor< IceUtil::Mutex>::Lock sync(*this);
@@ -1210,17 +1218,7 @@ Activator::terminationListener()
// If the pipe was closed, the process has terminated.
//
- if(_traceLevels->activator > 0)
- {
- Ice::Trace out(_traceLevels->logger, _traceLevels->activatorCat);
- out << "detected termination of server `" << p->first << "':";
- if(!p->second.msg.empty())
- {
- out << "\n" << p->second.msg;
- }
- }
-
- terminated.push_back(make_pair(p->second.msg, p->second.server));
+ terminated.push_back(p->second);
close(p->second.pipeFd);
_processes.erase(p++);
@@ -1233,16 +1231,44 @@ Activator::terminationListener()
deactivated = _deactivating && _processes.empty();
}
- for(vector<pair<string, ServerIPtr> >::const_iterator p = terminated.begin(); p != terminated.end(); ++p)
+ for(vector<Process>::const_iterator p = terminated.begin(); p != terminated.end(); ++p)
{
+ int status;
+ pid_t pid = waitpid(p->pid, &status, 0);
+ if(pid < 0)
+ {
+ SyscallException ex(__FILE__, __LINE__);
+ ex.error = getSystemErrno();
+ throw ex;
+ }
+ assert(pid == p->pid);
+
+ if(_traceLevels->activator > 0)
+ {
+ Ice::Trace out(_traceLevels->logger, _traceLevels->activatorCat);
+ out << "detected termination of server `" << p->server->getId() << "'";
+ if(!p->msg.empty())
+ {
+ out << "\nreason = " << p->msg;
+ }
+ if(WIFEXITED(status) && status != 0)
+ {
+ out << "\nexit code = " << WEXITSTATUS(status);
+ }
+ else if(WIFSIGNALED(status))
+ {
+ out << "\nsignal = " << signalToString(WTERMSIG(status));
+ }
+ }
+
try
{
- p->second->terminated(p->first);
+ p->server->terminated(p->msg, status);
}
catch(const Ice::LocalException& ex)
{
Ice::Warning out(_traceLevels->logger);
- out << "unexpected exception raised by server `" << p->second->getId() << "' termination:\n" << ex;
+ out << "unexpected exception raised by server `" << p->server->getId() << "' termination:\n" << ex;
}
}
diff --git a/cpp/src/IceGrid/Activator.h b/cpp/src/IceGrid/Activator.h
index 85133192e08..7ea4267ccf4 100644
--- a/cpp/src/IceGrid/Activator.h
+++ b/cpp/src/IceGrid/Activator.h
@@ -22,6 +22,8 @@ typedef IceUtil::Handle<TraceLevels> TraceLevelsPtr;
class ServerI;
typedef IceUtil::Handle<ServerI> ServerIPtr;
+std::string signalToString(int);
+
class Activator : public IceUtil::Monitor< IceUtil::Mutex>, public IceUtil::Shared
{
public:
diff --git a/cpp/src/IceGrid/IceGridNode.cpp b/cpp/src/IceGrid/IceGridNode.cpp
index 1e51dd6d1a7..3535cf3ab4c 100644
--- a/cpp/src/IceGrid/IceGridNode.cpp
+++ b/cpp/src/IceGrid/IceGridNode.cpp
@@ -27,12 +27,7 @@
# define S_ISDIR(mode) ((mode) & _S_IFDIR)
# define S_ISREG(mode) ((mode) & _S_IFREG)
#else
-# include <csignal>
-# include <signal.h>
-# include <sys/wait.h>
-# include <sys/types.h>
# include <sys/stat.h>
-# include <unistd.h>
#endif
using namespace std;
@@ -142,35 +137,6 @@ private:
} // End of namespace IceGrid
-#ifndef _WIN32
-extern "C"
-{
-
-static void
-childHandler(int)
-{
- //
- // Call wait to de-allocate any resources allocated for the child
- // process and avoid zombie processes. See man wait or waitpid for
- // more information.
- //
- int olderrno = errno;
-
- pid_t pid;
- do
- {
- pid = waitpid(-1, 0, WNOHANG);
- }
- while(pid > 0);
-
- assert(pid != -1 || errno == ECHILD);
-
- errno = olderrno;
-}
-
-}
-#endif
-
CollocatedRegistry::CollocatedRegistry(const CommunicatorPtr& communicator, const ActivatorPtr& activator) :
RegistryI(communicator),
_activator(activator)
@@ -226,18 +192,6 @@ NodeService::shutdown()
bool
NodeService::start(int argc, char* argv[])
{
-#ifndef _WIN32
- //
- // This application forks, so we need a signal handler for child termination.
- //
- struct sigaction action;
- action.sa_handler = childHandler;
- sigemptyset(&action.sa_mask);
- sigaddset(&action.sa_mask, SIGCHLD);
- action.sa_flags = 0;
- sigaction(SIGCHLD, &action, 0);
-#endif
-
bool nowarn = false;
bool checkdb = false;
string desc;
diff --git a/cpp/src/IceGrid/ServerAdapterI.cpp b/cpp/src/IceGrid/ServerAdapterI.cpp
index 1bf8ab729c3..f016b95aa0e 100644
--- a/cpp/src/IceGrid/ServerAdapterI.cpp
+++ b/cpp/src/IceGrid/ServerAdapterI.cpp
@@ -208,6 +208,7 @@ ServerAdapterI::activationFailed(bool destroyed)
}
}
+ Lock sync(*this);
for(vector<AMD_Adapter_activatePtr>::const_iterator p = _activateCB.begin(); p != _activateCB.end(); ++p)
{
(*p)->ice_response(0);
diff --git a/cpp/src/IceGrid/ServerI.cpp b/cpp/src/IceGrid/ServerI.cpp
index 8c7bb230b25..f08dfeeaf84 100644
--- a/cpp/src/IceGrid/ServerI.cpp
+++ b/cpp/src/IceGrid/ServerI.cpp
@@ -24,6 +24,7 @@
#ifdef _WIN32
# include <direct.h>
+# include <signal.h>
#else
# include <unistd.h>
# include <dirent.h>
@@ -493,11 +494,14 @@ ServerI::ServerI(const NodeIPtr& node, const ServerPrx& proxy, const string& ser
_id(id),
_waitTime(wt),
_serversDir(serversDir),
+ _disableOnFailure(0),
_state(ServerI::Inactive),
_activation(ServerI::Manual),
_pid(0)
{
assert(_node->getActivator());
+ const_cast<int&>(_disableOnFailure) =
+ _node->getCommunicator()->getProperties()->getPropertyAsIntWithDefault("IceGrid.Node.DisableOnFailure", 0);
}
ServerI::~ServerI()
@@ -517,6 +521,21 @@ ServerI::start_async(const AMD_Server_startPtr& amdCB, const Ice::Current&)
}
//
+ // The server is disabled because it failed and if the time of
+ // the failure is now past the configured duration or if the
+ // server is manualy started, we re-enable the server.
+ //
+ if(_activation == Disabled &&
+ _failureTime != IceUtil::Time() &&
+ (amdCB ||
+ (_disableOnFailure > 0 &&
+ (_failureTime + IceUtil::Time::seconds(_disableOnFailure) < IceUtil::Time::now()))))
+ {
+ _failureTime = IceUtil::Time();
+ _activation = _previousActivation;
+ }
+
+ //
// If the amd callback is set, it's a remote start call to
// manually activate the server. Otherwise it's a call to
// activate the server on demand (called from ServerAdapterI).
@@ -629,6 +648,8 @@ ServerI::setEnabled(bool enabled, const ::Ice::Current&)
{
return;
}
+
+ _failureTime = IceUtil::Time();
_activation = enabled ? (_desc->activation == "on-demand" ? OnDemand : Manual) : Disabled;
}
@@ -688,7 +709,7 @@ ServerI::ServerActivation
ServerI::getActivationMode() const
{
Lock sync(*this);
- return _activation;
+ return _desc->activation == "on-demand" ? OnDemand : Manual;
}
const string&
@@ -1140,7 +1161,7 @@ ServerI::destroy()
}
void
-ServerI::terminated(const string& msg)
+ServerI::terminated(const string& msg, int status)
{
ServerAdapterDict adpts;
{
@@ -1175,16 +1196,45 @@ ServerI::terminated(const string& msg)
_process = 0;
_pid = 0;
+ if(_disableOnFailure != 0 && _activation != Disabled)
+ {
+ bool failed = false;
+#ifndef _WIN32
+ failed = WIFEXITED(status) && WEXITSTATUS(status) != 0;
+ if(WIFSIGNALED(status))
+ {
+ int s = WTERMSIG(status);
+ failed = s == SIGABRT || s == SIGILL || s == SIGBUS || s == SIGFPE || s == SIGSEGV;
+ }
+#else
+ failed = status != 0;
+#endif
+ if(failed)
+ {
+ _previousActivation = _activation;
+ _activation = Disabled;
+ _failureTime = IceUtil::Time::now();
+ }
+ }
+
if(_state != ServerI::Destroying)
{
- if(msg.empty())
+ ostringstream os;
+ os << "The server terminated unexpectedly";
+#ifndef _WIN32
+ if(WIFEXITED(status))
{
- setStateNoSync(ServerI::Inactive, "The server terminated unexpectedly.");
+ os << " with exit code " << WEXITSTATUS(status);
}
- else
+ else if(WIFSIGNALED(status))
{
- setStateNoSync(ServerI::Inactive, "The server terminated unexpectedly:\n" + msg);
+ os << " with signal " << signalToString(WTERMSIG(status));
}
+#else
+ os << " with exit code " << status;
+#endif
+ os << (msg.empty() ? "." : ":\n" + msg);
+ setStateNoSync(ServerI::Inactive, os.str());
command = nextCommand();
}
else
diff --git a/cpp/src/IceGrid/ServerI.h b/cpp/src/IceGrid/ServerI.h
index 7de46709cd2..5d50ad740c5 100644
--- a/cpp/src/IceGrid/ServerI.h
+++ b/cpp/src/IceGrid/ServerI.h
@@ -100,7 +100,7 @@ public:
void deactivate();
void update();
void destroy();
- void terminated(const std::string& = std::string());
+ void terminated(const std::string&, int);
private:
@@ -124,6 +124,7 @@ private:
const std::string _id;
const Ice::Int _waitTime;
const std::string _serversDir;
+ const int _disableOnFailure;
std::string _serverDir;
std::string _application;
@@ -137,6 +138,8 @@ private:
bool _processRegistered;
Ice::ProcessPrx _process;
std::set<std::string> _activeAdapters;
+ IceUtil::Time _failureTime;
+ ServerActivation _previousActivation;
DestroyCommandPtr _destroy;
StopCommandPtr _stop;