fix: send transaction retry code

This commit is contained in:
Timo Kösters 2021-04-24 18:01:05 +02:00
parent dd0b2597b7
commit 3dfc245633
No known key found for this signature in database
GPG key ID: 24DA7517711A2BA4

View file

@ -10,7 +10,7 @@ use crate::{
appservice_server, database::pusher, server_server, utils, Database, Error, PduEvent, Result,
};
use federation::transactions::send_transaction_message;
use log::warn;
use log::{error, warn};
use ring::digest;
use rocket::futures::stream::{FuturesUnordered, StreamExt};
use ruma::{
@ -32,10 +32,16 @@ pub enum OutgoingKind {
pub struct Sending {
/// The state for a given state hash.
pub(super) servernamepduids: sled::Tree, // ServernamePduId = (+ / $)SenderKey / ServerName / UserId + PduId
pub(super) servercurrentpdus: sled::Tree, // ServerCurrentPdus = (+ / $)ServerName / UserId + PduId (pduid can be empty for reservation)
pub(super) servercurrentpdus: sled::Tree, // ServerCurrentPdus = (+ / $)ServerName / UserId + PduId
pub(super) maximum_requests: Arc<Semaphore>,
}
enum TransactionStatus {
Running,
Failed(u32, Instant), // number of times failed, time of last failure
Retrying(u32), // number of times failed
}
impl Sending {
pub fn start_handler(&self, db: &Database) {
let servernamepduids = self.servernamepduids.clone();
@ -47,8 +53,10 @@ impl Sending {
let mut futures = FuturesUnordered::new();
// Retry requests we could not finish yet
let mut current_transactions = HashMap::<OutgoingKind, Vec<Vec<u8>>>::new();
let mut subscriber = servernamepduids.watch_prefix(b"");
let mut current_transaction_status = HashMap::<Vec<u8>, TransactionStatus>::new();
let mut initial_transactions = HashMap::<OutgoingKind, Vec<Vec<u8>>>::new();
for (key, outgoing_kind, pdu) in servercurrentpdus
.iter()
.filter_map(|r| r.ok())
@ -58,18 +66,15 @@ impl Sending {
.map(|(k, p)| (key, k, p.to_vec()))
})
{
if pdu.is_empty() {
// Remove old reservation key
servercurrentpdus.remove(key).unwrap();
continue;
}
let entry = current_transactions
.entry(outgoing_kind)
let entry = initial_transactions
.entry(outgoing_kind.clone())
.or_insert_with(Vec::new);
if entry.len() > 30 {
warn!("Dropping some current pdus because too many were queued. This should not happen.");
warn!(
"Dropping some current pdu: {:?} {:?} {:?}",
key, outgoing_kind, pdu
);
servercurrentpdus.remove(key).unwrap();
continue;
}
@ -77,8 +82,7 @@ impl Sending {
entry.push(pdu);
}
for (outgoing_kind, pdus) in current_transactions {
// Create new reservation
for (outgoing_kind, pdus) in initial_transactions {
let mut prefix = match &outgoing_kind {
OutgoingKind::Appservice(server) => {
let mut p = b"+".to_vec();
@ -99,14 +103,10 @@ impl Sending {
}
};
prefix.push(0xff);
servercurrentpdus.insert(prefix, &[]).unwrap();
current_transaction_status.insert(prefix, TransactionStatus::Running);
futures.push(Self::handle_event(outgoing_kind.clone(), pdus, &db));
}
let mut last_failed_try: HashMap<OutgoingKind, (u32, Instant)> = HashMap::new();
let mut subscriber = servernamepduids.watch_prefix(b"");
loop {
select! {
Some(response) = futures.next() => {
@ -138,10 +138,7 @@ impl Sending {
.keys()
.filter_map(|r| r.ok())
{
// Don't remove reservation yet
if prefix.len() != key.len() {
servercurrentpdus.remove(key).unwrap();
}
servercurrentpdus.remove(key).unwrap();
}
// Find events that have been added since starting the last request
@ -171,8 +168,7 @@ impl Sending {
)
);
} else {
servercurrentpdus.remove(&prefix).unwrap();
// servercurrentpdus with the prefix should be empty now
current_transaction_status.remove(&prefix);
}
}
Err((outgoing_kind, _)) => {
@ -198,15 +194,14 @@ impl Sending {
prefix.push(0xff);
last_failed_try.insert(outgoing_kind.clone(), match last_failed_try.get(&outgoing_kind) {
Some(last_failed) => {
(last_failed.0+1, Instant::now())
current_transaction_status.entry(prefix).and_modify(|e| *e = match e {
TransactionStatus::Running => TransactionStatus::Failed(1, Instant::now()),
TransactionStatus::Retrying(n) => TransactionStatus::Failed(*n+1, Instant::now()),
TransactionStatus::Failed(_, _) => {
error!("Request that was not even running failed?!");
return
},
None => {
(1, Instant::now())
}
});
servercurrentpdus.remove(&prefix).unwrap();
}
};
},
@ -220,24 +215,12 @@ impl Sending {
let servernamepduid = key.clone();
let exponential_backoff = |(tries, instant): &(u32, Instant)| {
// Fail if a request has failed recently (exponential backoff)
let mut min_elapsed_duration = Duration::from_secs(30) * (*tries) * (*tries);
if min_elapsed_duration > Duration::from_secs(60*60*24) {
min_elapsed_duration = Duration::from_secs(60*60*24);
}
let mut retry = false;
instant.elapsed() < min_elapsed_duration
};
if let Some((outgoing_kind, pdu_id)) = Self::parse_servercurrentpdus(&servernamepduid)
if let Some((outgoing_kind, prefix, pdu_id)) = Self::parse_servercurrentpdus(&servernamepduid)
.ok()
.filter(|(outgoing_kind, _)| {
if last_failed_try.get(outgoing_kind).map_or(false, exponential_backoff) {
return false;
}
let mut prefix = match outgoing_kind {
.map(|(outgoing_kind, pdu_id)| {
let mut prefix = match &outgoing_kind {
OutgoingKind::Appservice(serv) => {
let mut p = b"+".to_vec();
p.extend_from_slice(serv.as_bytes());
@ -258,20 +241,59 @@ impl Sending {
};
prefix.push(0xff);
servercurrentpdus
.compare_and_swap(prefix, Option::<&[u8]>::None, Some(&[])) // Try to reserve
== Ok(Ok(()))
(outgoing_kind, prefix, pdu_id)
})
.filter(|(_, prefix, _)| {
let entry = current_transaction_status.entry(prefix.clone());
let mut allow = true;
entry.and_modify(|e| match e {
TransactionStatus::Running | TransactionStatus::Retrying(_) => {
allow = false; // already running
},
TransactionStatus::Failed(tries, time) => {
// Fail if a request has failed recently (exponential backoff)
let mut min_elapsed_duration = Duration::from_secs(30) * (*tries) * (*tries);
if min_elapsed_duration > Duration::from_secs(60*60*24) {
min_elapsed_duration = Duration::from_secs(60*60*24);
}
if time.elapsed() < min_elapsed_duration {
allow = false;
} else {
retry = true;
*e = TransactionStatus::Retrying(*tries);
}
}
}).or_insert(TransactionStatus::Running);
allow
})
{
servercurrentpdus.insert(&key, &[]).unwrap();
servernamepduids.remove(&key).unwrap();
last_failed_try.remove(&outgoing_kind);
let mut pdus = Vec::new();
if retry {
// We retry the previous transaction
for pdu in servercurrentpdus
.scan_prefix(&prefix)
.filter_map(|r| r.ok())
.filter_map(|(key, _)| {
Self::parse_servercurrentpdus(&key)
.ok()
.map(|(_, p)| p.to_vec())
})
{
pdus.push(pdu);
}
} else {
servercurrentpdus.insert(&key, &[]).unwrap();
servernamepduids.remove(&key).unwrap();
pdus.push(pdu_id.to_vec());
}
futures.push(
Self::handle_event(
outgoing_kind,
vec![pdu_id.to_vec()],
pdus,
&db,
)
);
@ -342,7 +364,7 @@ impl Sending {
(
server.clone(),
Error::bad_database(
"[Appservice] Event in servernamepduids not found in ",
"[Appservice] Event in servernamepduids not found in db.",
),
)
})?