Commit 69b2ddbd authored by Simon Morlat's avatar Simon Morlat

Handle database connection failures, so that current query is retried automatically.

This was already done for "getPassword", but not for queries used by the presence server.
Factorize the logic.
parent 9439c16a
Pipeline #4584 passed with stages
in 24 minutes and 11 seconds
......@@ -279,7 +279,7 @@ list(APPEND FLEXISIP_INCLUDES ${OPENSSL_INCLUDE_DIR})
if(ENABLE_SOCI)
add_definitions(-DENABLE_SOCI)
list(APPEND FLEXISIP_SOURCES authdb-soci.cc)
list(APPEND FLEXISIP_SOURCES authdb-soci.cc soci-helper.cc soci-helper.hh)
list(APPEND FLEXISIP_LIBS ${SOCI_LIBRARY})
list(APPEND FLEXISIP_INCLUDES ${SOCI_INCLUDE_DIRS} ${SOCI_MYSQL_INCLUDES})
endif()
......
This diff is collapsed.
......@@ -310,7 +310,6 @@ private:
void getPasswordWithPool(const std::string &id, const std::string &domain,
const std::string &authid, AuthDbListener *listener, AuthDbListener *listener_ref);
void reconnectSession( soci::session &session );
void notifyAllListeners(std::list<std::tuple<std::string, std::string, AuthDbListener *>> &creds, const std::set<std::pair<std::string, std::string>> &presences);
......
......@@ -20,8 +20,8 @@
#include <thread>
#include "belle-sip/message.h"
#include "soci/mysql/soci-mysql.h"
#include "soci-helper.hh"
#include "bellesip-signaling-exception.hh"
#include "external-list-subscription.hh"
#include <flexisip/logmanager.hh>
......@@ -49,44 +49,25 @@ ExternalListSubscription::ExternalListSubscription(
SLOGE << "[SOCI] Auth queue is full, cannot fullfil user request for list subscription";
}
#define DURATION_MS(start, stop) (unsigned long) duration_cast<milliseconds>((stop) - (start)).count()
void ExternalListSubscription::reconnectSession(soci::session &session) {
try {
SLOGE << "[SOCI] Trying close/reconnect session";
session.close();
session.reconnect();
SLOGD << "[SOCI] Session " << session.get_backend_name() << " successfully reconnected";
} catch (soci::mysql_soci_error const & e) {
SLOGE << "[SOCI] reconnectSession MySQL error: " << e.err_num_ << " " << e.what() << endl;
} catch (exception const &e) {
SLOGE << "[SOCI] reconnectSession error: " << e.what() << endl;
}
}
void ExternalListSubscription::getUsersList(const string &sqlRequest, belle_sip_server_transaction_t *ist) {
steady_clock::time_point start;
steady_clock::time_point stop;
soci::session *sql = nullptr;
try {
start = steady_clock::now();
// will grab a connection from the pool. This is thread safe
sql = new soci::session(*mConnPool); //this may raise a soci_error exception, so keep it in the try block.
stop = steady_clock::now();
SLOGD << "[SOCI] Pool acquired in " << DURATION_MS(start, stop) << "ms";
start = stop;
SociHelper sociHelper(*mConnPool);
belle_sip_request_t *request = belle_sip_transaction_get_request(BELLE_SIP_TRANSACTION(ist));
belle_sip_header_to_t *toHeader = belle_sip_message_get_header_by_type(BELLE_SIP_MESSAGE(request), belle_sip_header_to_t);
belle_sip_header_from_t *fromHeader = belle_sip_message_get_header_by_type(BELLE_SIP_MESSAGE(request), belle_sip_header_from_t);
char *toUri = belle_sip_uri_to_string(belle_sip_header_address_get_uri(BELLE_SIP_HEADER_ADDRESS(toHeader)));
char *fromUri = belle_sip_uri_to_string(belle_sip_header_address_get_uri(BELLE_SIP_HEADER_ADDRESS(fromHeader)));
soci::rowset<soci::row> ret = (sql->prepare << sqlRequest, soci::use(string(fromUri), "from"), soci::use(string(toUri), "to"));
belle_sip_free(toUri);
belle_sip_free(fromUri);
char *c_toUri = belle_sip_uri_to_string(belle_sip_header_address_get_uri(BELLE_SIP_HEADER_ADDRESS(toHeader)));
char *c_fromUri = belle_sip_uri_to_string(belle_sip_header_address_get_uri(BELLE_SIP_HEADER_ADDRESS(fromHeader)));
string fromUri(c_fromUri);
string toUri(c_toUri);
belle_sip_free(c_fromUri);
belle_sip_free(c_toUri);
soci::rowset<soci::row> ret = sociHelper.execute([&](soci::session &sql){
return (sql.prepare << sqlRequest, soci::use(fromUri, "from"), soci::use(toUri, "to"));
});
string addrStr;
for (const auto &row : ret) {
......@@ -111,24 +92,9 @@ void ExternalListSubscription::getUsersList(const string &sqlRequest, belle_sip_
mListeners.push_back(make_shared<PresentityResourceListener>(*this, uri, name ? name : ""));
belle_sip_object_unref(uri); // Because PresentityResourceListener takes its own ref
}
stop = steady_clock::now();
} catch (soci::mysql_soci_error const &e) {
stop = steady_clock::now();
SLOGE << "[SOCI] getUsersList MySQL error after " << DURATION_MS(start, stop) << "ms : " << e.err_num_ << " " << e.what();
if (sql)
reconnectSession(*sql);
} catch (exception const &e) {
stop = steady_clock::now();
SLOGE << "[SOCI] getUsersList error after " << DURATION_MS(start, stop) << "ms : " << e.what();
if (sql)
reconnectSession(*sql);
} catch (SociHelper::DatabaseException &e) {
}
if (sql)
delete sql;
finishCreation(ist);
}
......
......@@ -47,7 +47,6 @@ public:
private:
void getUsersList(const std::string &sqlRequest, belle_sip_server_transaction_t *ist);
void reconnectSession(soci::session &session);
soci::connection_pool *mConnPool;
};
......
/*
Flexisip, a flexible SIP proxy server with media capabilities.
Copyright (C) 2019 Belledonne Communications SARL, All rights reserved.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "flexisip/logmanager.hh"
#include "soci-helper.hh"
namespace flexisip{
void SociHelper::reconnectSession(soci::session &session) {
try {
SLOGE << "[SOCI] Trying close/reconnect session";
session.close();
session.reconnect();
SLOGD << "[SOCI] Session " << session.get_backend_name() << " successfully reconnected";
} catch (soci::mysql_soci_error const & e) {
SLOGE << "[SOCI] reconnectSession MySQL error: " << e.err_num_ << " " << e.what();
} catch (std::exception const &e) {
SLOGE << "[SOCI] reconnectSession error: " << e.what();
}
}
}
/*
Flexisip, a flexible SIP proxy server with media capabilities.
Copyright (C) 2019 Belledonne Communications SARL, All rights reserved.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "soci/soci.h"
#include "soci/mysql/soci-mysql.h"
#include "flexisip/logmanager.hh"
#include <chrono>
namespace flexisip{
/*
* This is a helper class to execute soci queries in a reliable way, ie
* it will take care automatically to reconnect the session if it has been disconnected.
* Indeed, inactive connections are dropped by mariadb/mysqld after some time, so this case
* happens quite frequently on a system where there are few users.
*/
class SociHelper{
public:
class DatabaseException : std::exception{
virtual const char* what() const noexcept override{
return "Database failure"; //The great thing about exception.
}
};
// Initialize the SociHelper by giving the connection pool.
SociHelper(soci::connection_pool &pool) : mPool(pool){};
// Execute the database query safely. The code to execute the query shall be provided in the lambda argument.
template <typename _lambda>
soci::rowset<soci::row> execute(_lambda requestLambda){
std::chrono::steady_clock::time_point start;
std::chrono::steady_clock::time_point stop;
soci::session *sql = nullptr;
int errorCount = 0;
bool retry;
do{
retry = false;
try{
start = std::chrono::steady_clock::now();
// will grab a connection from the pool. This is thread safe.
sql = new soci::session(mPool);
stop = std::chrono::steady_clock::now();
LOGD("[SOCI] Session acquired from pool in %lu ms", durationMs(start, stop));
start = stop;
auto ret = requestLambda(*sql);
stop = std::chrono::steady_clock::now();
LOGD("[SOCI] statement successfully executed in %lu ms", durationMs(start, stop));
return ret;
} catch (soci::mysql_soci_error const &e) {
errorCount++;
stop = std::chrono::steady_clock::now();
SLOGE << "[SOCI] MySQL error after " << durationMs(start, stop) << " ms : " << e.err_num_ << " " << e.what();
if (sql) reconnectSession(*sql);
if ((e.err_num_ == 2014 || e.err_num_ == 2006) && errorCount == 1){
/* 2014 is the infamous "Commands out of sync; you can't run this command now" mysql error,
* which is retryable.
* At this time we don't know if it is a soci or mysql bug, or bug with the sql request being executed.
*
* 2006 is "MySQL server has gone away" which is also retryable.
*/
SLOGE << "[SOCI] retryable mysql error ["<< e.err_num_<<"], so trying statement execution again...";
retry = true;
}
} catch (const std::runtime_error &e) {
errorCount++;
stop = std::chrono::steady_clock::now();
SLOGE << "[SOCI] error after " << durationMs(start, stop) << " ms : " << e.what();
if (sql) reconnectSession(*sql);
}
} while (retry);
if (sql) delete sql;
throw DatabaseException();
}
// Variant of the previous method for the case where no rowset is needed as return value.
// Probably it is possible to merge the two methods thanks std::enable_if (TODO later).
template <typename _lambda>
void executeNoReturn(_lambda requestLambda){
std::chrono::steady_clock::time_point start;
std::chrono::steady_clock::time_point stop;
soci::session *sql = nullptr;
int errorCount = 0;
bool retry;
do{
retry = false;
try{
// will grab a connection from the pool. This is thread safe.
if (!sql) {
start = std::chrono::steady_clock::now();
sql = new soci::session(mPool);
stop = std::chrono::steady_clock::now();
LOGD("[SOCI] Session acquired from pool in %lu ms", durationMs(start, stop));
start = stop;
}else{
start = std::chrono::steady_clock::now();
}
requestLambda(*sql);
stop = std::chrono::steady_clock::now();
LOGD("[SOCI] statement successfully executed in %lu ms", durationMs(start, stop));
return;
} catch (soci::mysql_soci_error const &e) {
errorCount++;
stop = std::chrono::steady_clock::now();
SLOGE << "[SOCI] MySQL error after " << durationMs(start, stop) << " ms : " << e.err_num_ << " " << e.what();
if (sql) reconnectSession(*sql);
if ((e.err_num_ == 2014 || e.err_num_ == 2006) && errorCount == 1){
/* 2014 is the infamous "Commands out of sync; you can't run this command now" mysql error,
* which is retryable.
* At this time we don't know if it is a soci or mysql bug, or bug with the sql request being executed.
*
* 2006 is "MySQL server has gone away" which is also retryable.
*/
SLOGE << "[SOCI] retryable mysql error ["<< e.err_num_<<"], so trying statement execution again...";
retry = true;
}
} catch (const std::runtime_error &e) {
errorCount++;
stop = std::chrono::steady_clock::now();
SLOGE << "[SOCI] error after " << durationMs(start, stop) << " ms : " << e.what();
if (sql) reconnectSession(*sql);
}
} while (retry);
if (sql) delete sql;
throw DatabaseException();
}
private:
void reconnectSession(soci::session &session);
unsigned long durationMs(std::chrono::steady_clock::time_point start, std::chrono::steady_clock::time_point stop){
return (unsigned long) std::chrono::duration_cast<std::chrono::milliseconds>((stop) - (start)).count();
}
soci::connection_pool &mPool;
};
} //end of namespace
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment