fix: de-index pdus when redacted

bit of code dedupe as well

Co-authored-by: strawberry <strawberry@puppygock.gay>
Signed-off-by: strawberry <strawberry@puppygock.gay>
This commit is contained in:
Benjamin Lee 2024-06-12 00:33:12 -04:00 committed by June 🍓🦴
parent 20a54aacd6
commit eb73d8c669
3 changed files with 43 additions and 13 deletions

View file

@ -7,6 +7,8 @@ type SearchPdusResult<'a> = Result<Option<(Box<dyn Iterator<Item = Vec<u8>> + 'a
pub trait Data: Send + Sync {
fn index_pdu(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) -> Result<()>;
fn deindex_pdu(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) -> Result<()>;
fn search_pdus<'a>(&'a self, room_id: &RoomId, search_string: &str) -> SearchPdusResult<'a>;
}
@ -34,6 +36,22 @@ impl Data for KeyValueDatabase {
self.tokenids.insert_batch(&mut batch)
}
fn deindex_pdu(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) -> Result<()> {
let batch = tokenize(message_body).map(|word| {
let mut key = shortroomid.to_be_bytes().to_vec();
key.extend_from_slice(word.as_bytes());
key.push(0xFF);
key.extend_from_slice(pdu_id); // TODO: currently we save the room id a second time here
key
});
for token in batch {
self.tokenids.remove(&token)?;
}
Ok(())
}
fn search_pdus<'a>(&'a self, room_id: &RoomId, search_string: &str) -> SearchPdusResult<'a> {
let prefix = services()
.rooms

View file

@ -17,6 +17,11 @@ impl Service {
self.db.index_pdu(shortroomid, pdu_id, message_body)
}
#[tracing::instrument(skip(self))]
pub fn deindex_pdu(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) -> Result<()> {
self.db.deindex_pdu(shortroomid, pdu_id, message_body)
}
#[tracing::instrument(skip(self))]
pub fn search_pdus<'a>(
&'a self, room_id: &RoomId, search_string: &str,

View file

@ -68,6 +68,11 @@ struct ExtractRelatesToEventId {
relates_to: ExtractEventId,
}
#[derive(Deserialize)]
struct ExtractBody {
body: Option<String>,
}
pub struct Service {
pub db: Arc<dyn Data>,
@ -397,7 +402,7 @@ impl Service {
| RoomVersionId::V9
| RoomVersionId::V10 => {
if let Some(redact_id) = &pdu.redacts {
self.redact_pdu(redact_id, pdu)?;
self.redact_pdu(redact_id, pdu, shortroomid)?;
}
},
RoomVersionId::V11 => {
@ -407,7 +412,7 @@ impl Service {
Error::bad_database("Invalid content in redaction pdu.")
})?;
if let Some(redact_id) = &content.redacts {
self.redact_pdu(redact_id, pdu)?;
self.redact_pdu(redact_id, pdu, shortroomid)?;
}
},
_ => {
@ -463,11 +468,6 @@ impl Service {
}
},
TimelineEventType::RoomMessage => {
#[derive(Deserialize)]
struct ExtractBody {
body: Option<String>,
}
let content = serde_json::from_str::<ExtractBody>(pdu.content.get())
.map_err(|_| Error::bad_database("Invalid content in pdu."))?;
@ -984,14 +984,26 @@ impl Service {
/// Replace a PDU with the redacted form.
#[tracing::instrument(skip(self, reason))]
pub fn redact_pdu(&self, event_id: &EventId, reason: &PduEvent) -> Result<()> {
pub fn redact_pdu(&self, event_id: &EventId, reason: &PduEvent, shortroomid: u64) -> Result<()> {
// TODO: Don't reserialize, keep original json
if let Some(pdu_id) = self.get_pdu_id(event_id)? {
let mut pdu = self
.get_pdu_from_id(&pdu_id)?
.ok_or_else(|| Error::bad_database("PDU ID points to invalid PDU."))?;
if let Ok(content) = serde_json::from_str::<ExtractBody>(pdu.content.get()) {
if let Some(body) = content.body {
services()
.rooms
.search
.deindex_pdu(shortroomid, &pdu_id, &body)?;
}
}
let room_version_id = services().rooms.state.get_room_version(&pdu.room_id)?;
pdu.redact(room_version_id, reason)?;
self.replace_pdu(
&pdu_id,
&utils::to_canonical_object(&pdu).map_err(|e| {
@ -1188,11 +1200,6 @@ impl Service {
drop(insert_lock);
if pdu.kind == TimelineEventType::RoomMessage {
#[derive(Deserialize)]
struct ExtractBody {
body: Option<String>,
}
let content = serde_json::from_str::<ExtractBody>(pdu.content.get())
.map_err(|_| Error::bad_database("Invalid content in pdu."))?;