feat: lazy loading

This commit is contained in:
Timo Kösters 2022-01-04 14:30:13 +01:00
parent 5bcc1324ed
commit 68e910bb77
No known key found for this signature in database
GPG key ID: 356E705610F626D5
5 changed files with 321 additions and 42 deletions

View file

@ -1,5 +1,9 @@
use crate::{database::DatabaseGuard, ConduitResult, Error, Ruma}; use crate::{database::DatabaseGuard, ConduitResult, Error, Ruma};
use ruma::api::client::{error::ErrorKind, r0::context::get_context}; use ruma::{
api::client::{error::ErrorKind, r0::context::get_context},
events::EventType,
};
use std::collections::HashSet;
use std::convert::TryFrom; use std::convert::TryFrom;
#[cfg(feature = "conduit_bin")] #[cfg(feature = "conduit_bin")]
@ -21,6 +25,7 @@ pub async fn get_context_route(
body: Ruma<get_context::Request<'_>>, body: Ruma<get_context::Request<'_>>,
) -> ConduitResult<get_context::Response> { ) -> ConduitResult<get_context::Response> {
let sender_user = body.sender_user.as_ref().expect("user is authenticated"); let sender_user = body.sender_user.as_ref().expect("user is authenticated");
let sender_device = body.sender_device.as_ref().expect("user is authenticated");
if !db.rooms.is_joined(sender_user, &body.room_id)? { if !db.rooms.is_joined(sender_user, &body.room_id)? {
return Err(Error::BadRequest( return Err(Error::BadRequest(
@ -29,6 +34,8 @@ pub async fn get_context_route(
)); ));
} }
let mut lazy_loaded = HashSet::new();
let base_pdu_id = db let base_pdu_id = db
.rooms .rooms
.get_pdu_id(&body.event_id)? .get_pdu_id(&body.event_id)?
@ -45,8 +52,18 @@ pub async fn get_context_route(
.ok_or(Error::BadRequest( .ok_or(Error::BadRequest(
ErrorKind::NotFound, ErrorKind::NotFound,
"Base event not found.", "Base event not found.",
))? ))?;
.to_room_event();
if !db.rooms.lazy_load_was_sent_before(
&sender_user,
&sender_device,
&body.room_id,
&base_event.sender,
)? {
lazy_loaded.insert(base_event.sender.clone());
}
let base_event = base_event.to_room_event();
let events_before: Vec<_> = db let events_before: Vec<_> = db
.rooms .rooms
@ -60,6 +77,17 @@ pub async fn get_context_route(
.filter_map(|r| r.ok()) // Remove buggy events .filter_map(|r| r.ok()) // Remove buggy events
.collect(); .collect();
for (_, event) in &events_before {
if !db.rooms.lazy_load_was_sent_before(
&sender_user,
&sender_device,
&body.room_id,
&event.sender,
)? {
lazy_loaded.insert(event.sender.clone());
}
}
let start_token = events_before let start_token = events_before
.last() .last()
.and_then(|(pdu_id, _)| db.rooms.pdu_count(pdu_id).ok()) .and_then(|(pdu_id, _)| db.rooms.pdu_count(pdu_id).ok())
@ -82,6 +110,17 @@ pub async fn get_context_route(
.filter_map(|r| r.ok()) // Remove buggy events .filter_map(|r| r.ok()) // Remove buggy events
.collect(); .collect();
for (_, event) in &events_after {
if !db.rooms.lazy_load_was_sent_before(
&sender_user,
&sender_device,
&body.room_id,
&event.sender,
)? {
lazy_loaded.insert(event.sender.clone());
}
}
let end_token = events_after let end_token = events_after
.last() .last()
.and_then(|(pdu_id, _)| db.rooms.pdu_count(pdu_id).ok()) .and_then(|(pdu_id, _)| db.rooms.pdu_count(pdu_id).ok())
@ -92,18 +131,23 @@ pub async fn get_context_route(
.map(|(_, pdu)| pdu.to_room_event()) .map(|(_, pdu)| pdu.to_room_event())
.collect(); .collect();
let mut state = Vec::new();
for ll_id in &lazy_loaded {
if let Some(member_event) =
db.rooms
.room_state_get(&body.room_id, &EventType::RoomMember, ll_id.as_str())?
{
state.push(member_event.to_state_event());
}
}
let resp = get_context::Response { let resp = get_context::Response {
start: start_token, start: start_token,
end: end_token, end: end_token,
events_before, events_before,
event: Some(base_event), event: Some(base_event),
events_after, events_after,
state: db // TODO: State at event state,
.rooms
.room_state_full(&body.room_id)?
.values()
.map(|pdu| pdu.to_state_event())
.collect(),
}; };
Ok(resp.into()) Ok(resp.into())

View file

@ -6,7 +6,11 @@ use ruma::{
}, },
events::EventType, events::EventType,
}; };
use std::{collections::BTreeMap, convert::TryInto, sync::Arc}; use std::{
collections::{BTreeMap, HashSet},
convert::TryInto,
sync::Arc,
};
#[cfg(feature = "conduit_bin")] #[cfg(feature = "conduit_bin")]
use rocket::{get, put}; use rocket::{get, put};
@ -117,6 +121,7 @@ pub async fn get_message_events_route(
body: Ruma<get_message_events::Request<'_>>, body: Ruma<get_message_events::Request<'_>>,
) -> ConduitResult<get_message_events::Response> { ) -> ConduitResult<get_message_events::Response> {
let sender_user = body.sender_user.as_ref().expect("user is authenticated"); let sender_user = body.sender_user.as_ref().expect("user is authenticated");
let sender_device = body.sender_device.as_ref().expect("user is authenticated");
if !db.rooms.is_joined(sender_user, &body.room_id)? { if !db.rooms.is_joined(sender_user, &body.room_id)? {
return Err(Error::BadRequest( return Err(Error::BadRequest(
@ -136,6 +141,12 @@ pub async fn get_message_events_route(
// Use limit or else 10 // Use limit or else 10
let limit = body.limit.try_into().map_or(10_usize, |l: u32| l as usize); let limit = body.limit.try_into().map_or(10_usize, |l: u32| l as usize);
let next_token;
let mut resp = get_message_events::Response::new();
let mut lazy_loaded = HashSet::new();
match body.dir { match body.dir {
get_message_events::Direction::Forward => { get_message_events::Direction::Forward => {
let events_after: Vec<_> = db let events_after: Vec<_> = db
@ -152,21 +163,27 @@ pub async fn get_message_events_route(
.take_while(|&(k, _)| Some(Ok(k)) != to) // Stop at `to` .take_while(|&(k, _)| Some(Ok(k)) != to) // Stop at `to`
.collect(); .collect();
let end_token = events_after.last().map(|(count, _)| count.to_string()); for (_, event) in &events_after {
if !db.rooms.lazy_load_was_sent_before(
&sender_user,
&sender_device,
&body.room_id,
&event.sender,
)? {
lazy_loaded.insert(event.sender.clone());
}
}
next_token = events_after.last().map(|(count, _)| count).copied();
let events_after: Vec<_> = events_after let events_after: Vec<_> = events_after
.into_iter() .into_iter()
.map(|(_, pdu)| pdu.to_room_event()) .map(|(_, pdu)| pdu.to_room_event())
.collect(); .collect();
let resp = get_message_events::Response { resp.start = body.from.to_owned();
start: body.from.to_owned(), resp.end = next_token.map(|count| count.to_string());
end: end_token, resp.chunk = events_after;
chunk: events_after,
state: Vec::new(),
};
Ok(resp.into())
} }
get_message_events::Direction::Backward => { get_message_events::Direction::Backward => {
let events_before: Vec<_> = db let events_before: Vec<_> = db
@ -183,21 +200,51 @@ pub async fn get_message_events_route(
.take_while(|&(k, _)| Some(Ok(k)) != to) // Stop at `to` .take_while(|&(k, _)| Some(Ok(k)) != to) // Stop at `to`
.collect(); .collect();
let start_token = events_before.last().map(|(count, _)| count.to_string()); for (_, event) in &events_before {
if !db.rooms.lazy_load_was_sent_before(
&sender_user,
&sender_device,
&body.room_id,
&event.sender,
)? {
lazy_loaded.insert(event.sender.clone());
}
}
next_token = events_before.last().map(|(count, _)| count).copied();
let events_before: Vec<_> = events_before let events_before: Vec<_> = events_before
.into_iter() .into_iter()
.map(|(_, pdu)| pdu.to_room_event()) .map(|(_, pdu)| pdu.to_room_event())
.collect(); .collect();
let resp = get_message_events::Response { resp.start = body.from.to_owned();
start: body.from.to_owned(), resp.end = next_token.map(|count| count.to_string());
end: start_token, resp.chunk = events_before;
chunk: events_before, }
state: Vec::new(), }
};
db.rooms
.lazy_load_confirm_delivery(&sender_user, &sender_device, &body.room_id, from)?;
resp.state = Vec::new();
for ll_id in &lazy_loaded {
if let Some(member_event) =
db.rooms
.room_state_get(&body.room_id, &EventType::RoomMember, ll_id.as_str())?
{
resp.state.push(member_event.to_state_event());
}
}
if let Some(next_token) = next_token {
db.rooms.lazy_load_mark_sent(
&sender_user,
&sender_device,
&body.room_id,
lazy_loaded.into_iter().collect(),
next_token,
);
}
Ok(resp.into()) Ok(resp.into())
}
}
} }

View file

@ -264,6 +264,14 @@ async fn sync_helper(
// limited unless there are events in non_timeline_pdus // limited unless there are events in non_timeline_pdus
let limited = non_timeline_pdus.next().is_some(); let limited = non_timeline_pdus.next().is_some();
let mut timeline_users = HashSet::new();
for (_, event) in &timeline_pdus {
timeline_users.insert(event.sender.as_str().to_owned());
}
db.rooms
.lazy_load_confirm_delivery(&sender_user, &sender_device, &room_id, since)?;
// Database queries: // Database queries:
let current_shortstatehash = db let current_shortstatehash = db
@ -344,14 +352,51 @@ async fn sync_helper(
state_events, state_events,
) = if since_shortstatehash.is_none() { ) = if since_shortstatehash.is_none() {
// Probably since = 0, we will do an initial sync // Probably since = 0, we will do an initial sync
let (joined_member_count, invited_member_count, heroes) = calculate_counts()?; let (joined_member_count, invited_member_count, heroes) = calculate_counts()?;
let current_state_ids = db.rooms.state_full_ids(current_shortstatehash)?; let current_state_ids = db.rooms.state_full_ids(current_shortstatehash)?;
let state_events: Vec<_> = current_state_ids
.iter() let mut state_events = Vec::new();
.map(|(_, id)| db.rooms.get_pdu(id)) let mut lazy_loaded = Vec::new();
.filter_map(|r| r.ok().flatten())
.collect(); for (_, id) in current_state_ids {
let pdu = match db.rooms.get_pdu(&id)? {
Some(pdu) => pdu,
None => {
error!("Pdu in state not found: {}", id);
continue;
}
};
let state_key = pdu
.state_key
.as_ref()
.expect("state events have state keys");
if pdu.kind != EventType::RoomMember {
state_events.push(pdu);
} else if full_state || timeline_users.contains(state_key) {
// TODO: check filter: is ll enabled?
lazy_loaded.push(
UserId::parse(state_key.as_ref())
.expect("they are in timeline_users, so they should be correct"),
);
state_events.push(pdu);
}
}
// Reset lazy loading because this is an initial sync
db.rooms
.lazy_load_reset(&sender_user, &sender_device, &room_id)?;
// The state_events above should contain all timeline_users, let's mark them as lazy
// loaded.
db.rooms.lazy_load_mark_sent(
&sender_user,
&sender_device,
&room_id,
lazy_loaded,
next_batch,
);
( (
heroes, heroes,
@ -387,21 +432,67 @@ async fn sync_helper(
let since_state_ids = db.rooms.state_full_ids(since_shortstatehash)?; let since_state_ids = db.rooms.state_full_ids(since_shortstatehash)?;
let state_events = if joined_since_last_sync { /*
let state_events = if joined_since_last_sync || full_state {
current_state_ids current_state_ids
.iter() .iter()
.map(|(_, id)| db.rooms.get_pdu(id)) .map(|(_, id)| db.rooms.get_pdu(id))
.filter_map(|r| r.ok().flatten()) .filter_map(|r| r.ok().flatten())
.collect::<Vec<_>>() .collect::<Vec<_>>()
} else { } else {
current_state_ids */
.iter() let mut state_events = Vec::new();
.filter(|(key, id)| since_state_ids.get(key) != Some(id)) let mut lazy_loaded = Vec::new();
.map(|(_, id)| db.rooms.get_pdu(id))
.filter_map(|r| r.ok().flatten()) for (key, id) in current_state_ids {
.collect() let pdu = match db.rooms.get_pdu(&id)? {
Some(pdu) => pdu,
None => {
error!("Pdu in state not found: {}", id);
continue;
}
}; };
let state_key = pdu
.state_key
.as_ref()
.expect("state events have state keys");
if pdu.kind != EventType::RoomMember {
if full_state || since_state_ids.get(&key) != Some(&id) {
state_events.push(pdu);
}
continue;
}
// Pdu has to be a member event
let state_key_userid = UserId::parse(state_key.as_ref())
.expect("they are in timeline_users, so they should be correct");
if full_state || since_state_ids.get(&key) != Some(&id) {
lazy_loaded.push(state_key_userid);
state_events.push(pdu);
} else if timeline_users.contains(state_key)
&& !db.rooms.lazy_load_was_sent_before(
&sender_user,
&sender_device,
&room_id,
&state_key_userid,
)?
{
lazy_loaded.push(state_key_userid);
state_events.push(pdu);
}
}
db.rooms.lazy_load_mark_sent(
&sender_user,
&sender_device,
&room_id,
lazy_loaded,
next_batch,
);
let encrypted_room = db let encrypted_room = db
.rooms .rooms
.state_get(current_shortstatehash, &EventType::RoomEncryption, "")? .state_get(current_shortstatehash, &EventType::RoomEncryption, "")?

View file

@ -288,6 +288,8 @@ impl Database {
userroomid_leftstate: builder.open_tree("userroomid_leftstate")?, userroomid_leftstate: builder.open_tree("userroomid_leftstate")?,
roomuserid_leftcount: builder.open_tree("roomuserid_leftcount")?, roomuserid_leftcount: builder.open_tree("roomuserid_leftcount")?,
lazyloadedids: builder.open_tree("lazyloadedids")?,
userroomid_notificationcount: builder.open_tree("userroomid_notificationcount")?, userroomid_notificationcount: builder.open_tree("userroomid_notificationcount")?,
userroomid_highlightcount: builder.open_tree("userroomid_highlightcount")?, userroomid_highlightcount: builder.open_tree("userroomid_highlightcount")?,
@ -323,6 +325,7 @@ impl Database {
statekeyshort_cache: Mutex::new(LruCache::new(1_000_000)), statekeyshort_cache: Mutex::new(LruCache::new(1_000_000)),
our_real_users_cache: RwLock::new(HashMap::new()), our_real_users_cache: RwLock::new(HashMap::new()),
appservice_in_room_cache: RwLock::new(HashMap::new()), appservice_in_room_cache: RwLock::new(HashMap::new()),
lazy_load_waiting: Mutex::new(HashMap::new()),
stateinfo_cache: Mutex::new(LruCache::new(1000)), stateinfo_cache: Mutex::new(LruCache::new(1000)),
}, },
account_data: account_data::AccountData { account_data: account_data::AccountData {

View file

@ -28,7 +28,7 @@ use ruma::{
push::{Action, Ruleset, Tweak}, push::{Action, Ruleset, Tweak},
serde::{CanonicalJsonObject, CanonicalJsonValue, Raw}, serde::{CanonicalJsonObject, CanonicalJsonValue, Raw},
state_res::{self, RoomVersion, StateMap}, state_res::{self, RoomVersion, StateMap},
uint, EventId, RoomAliasId, RoomId, RoomVersionId, ServerName, UserId, uint, DeviceId, EventId, RoomAliasId, RoomId, RoomVersionId, ServerName, UserId,
}; };
use serde::Deserialize; use serde::Deserialize;
use serde_json::value::to_raw_value; use serde_json::value::to_raw_value;
@ -79,6 +79,8 @@ pub struct Rooms {
pub(super) userroomid_leftstate: Arc<dyn Tree>, pub(super) userroomid_leftstate: Arc<dyn Tree>,
pub(super) roomuserid_leftcount: Arc<dyn Tree>, pub(super) roomuserid_leftcount: Arc<dyn Tree>,
pub(super) lazyloadedids: Arc<dyn Tree>, // LazyLoadedIds = UserId + DeviceId + RoomId + LazyLoadedUserId
pub(super) userroomid_notificationcount: Arc<dyn Tree>, // NotifyCount = u64 pub(super) userroomid_notificationcount: Arc<dyn Tree>, // NotifyCount = u64
pub(super) userroomid_highlightcount: Arc<dyn Tree>, // HightlightCount = u64 pub(super) userroomid_highlightcount: Arc<dyn Tree>, // HightlightCount = u64
@ -117,6 +119,8 @@ pub struct Rooms {
pub(super) shortstatekey_cache: Mutex<LruCache<u64, (EventType, String)>>, pub(super) shortstatekey_cache: Mutex<LruCache<u64, (EventType, String)>>,
pub(super) our_real_users_cache: RwLock<HashMap<Box<RoomId>, Arc<HashSet<Box<UserId>>>>>, pub(super) our_real_users_cache: RwLock<HashMap<Box<RoomId>, Arc<HashSet<Box<UserId>>>>>,
pub(super) appservice_in_room_cache: RwLock<HashMap<Box<RoomId>, HashMap<String, bool>>>, pub(super) appservice_in_room_cache: RwLock<HashMap<Box<RoomId>, HashMap<String, bool>>>,
pub(super) lazy_load_waiting:
Mutex<HashMap<(Box<UserId>, Box<DeviceId>, Box<RoomId>, u64), Vec<Box<UserId>>>>,
pub(super) stateinfo_cache: Mutex< pub(super) stateinfo_cache: Mutex<
LruCache< LruCache<
u64, u64,
@ -3466,4 +3470,94 @@ impl Rooms {
Ok(()) Ok(())
} }
#[tracing::instrument(skip(self))]
pub fn lazy_load_was_sent_before(
&self,
user_id: &UserId,
device_id: &DeviceId,
room_id: &RoomId,
ll_user: &UserId,
) -> Result<bool> {
let mut key = user_id.as_bytes().to_vec();
key.push(0xff);
key.extend_from_slice(&device_id.as_bytes());
key.push(0xff);
key.extend_from_slice(&room_id.as_bytes());
key.push(0xff);
key.extend_from_slice(&ll_user.as_bytes());
Ok(self.lazyloadedids.get(&key)?.is_some())
}
#[tracing::instrument(skip(self))]
pub fn lazy_load_mark_sent(
&self,
user_id: &UserId,
device_id: &DeviceId,
room_id: &RoomId,
lazy_load: Vec<Box<UserId>>,
count: u64,
) {
self.lazy_load_waiting.lock().unwrap().insert(
(
user_id.to_owned(),
device_id.to_owned(),
room_id.to_owned(),
count,
),
lazy_load,
);
}
#[tracing::instrument(skip(self))]
pub fn lazy_load_confirm_delivery(
&self,
user_id: &UserId,
device_id: &DeviceId,
room_id: &RoomId,
since: u64,
) -> Result<()> {
if let Some(user_ids) = self.lazy_load_waiting.lock().unwrap().remove(&(
user_id.to_owned(),
device_id.to_owned(),
room_id.to_owned(),
since,
)) {
let mut prefix = user_id.as_bytes().to_vec();
prefix.push(0xff);
prefix.extend_from_slice(&device_id.as_bytes());
prefix.push(0xff);
prefix.extend_from_slice(&room_id.as_bytes());
prefix.push(0xff);
for ll_id in user_ids {
let mut key = prefix.clone();
key.extend_from_slice(&ll_id.as_bytes());
self.lazyloadedids.insert(&key, &[])?;
}
}
Ok(())
}
#[tracing::instrument(skip(self))]
pub fn lazy_load_reset(
&self,
user_id: &Box<UserId>,
device_id: &Box<DeviceId>,
room_id: &Box<RoomId>,
) -> Result<()> {
let mut prefix = user_id.as_bytes().to_vec();
prefix.push(0xff);
prefix.extend_from_slice(&device_id.as_bytes());
prefix.push(0xff);
prefix.extend_from_slice(&room_id.as_bytes());
prefix.push(0xff);
for (key, _) in self.lazyloadedids.scan_prefix(prefix) {
self.lazyloadedids.remove(&key)?;
}
Ok(())
}
} }