split media related migrations into service dir

Signed-off-by: Jason Volk <jason@zemos.net>
This commit is contained in:
Jason Volk 2024-08-27 01:55:09 +00:00
parent 73af171830
commit 47f9c69eff
3 changed files with 164 additions and 154 deletions

View file

@ -1,15 +1,12 @@
use std::{
collections::{HashMap, HashSet},
ffi::{OsStr, OsString},
fs::{self},
io::Write,
mem::size_of,
path::PathBuf,
sync::Arc,
time::Instant,
};
use conduit::{debug, debug_info, debug_warn, error, info, utils, warn, Config, Error, Result};
use conduit::{debug, debug_info, debug_warn, error, info, utils, warn, Error, Result};
use itertools::Itertools;
use ruma::{
events::{push_rules::PushRulesEvent, room::member::MembershipState, GlobalAccountDataEventType},
@ -17,7 +14,7 @@ use ruma::{
EventId, OwnedRoomId, RoomId, UserId,
};
use crate::Services;
use crate::{media, Services};
/// The current schema version.
/// - If database is opened at greater version we reject with error. The
@ -25,7 +22,7 @@ use crate::Services;
/// - If database is opened at lesser version we apply migrations up to this.
/// Note that named-feature migrations may also be performed when opening at
/// equal or lesser version. These are expected to be backward-compatible.
const DATABASE_VERSION: u64 = 13;
pub(crate) const DATABASE_VERSION: u64 = 13;
pub(crate) async fn migrations(services: &Services) -> Result<()> {
// Matrix resource ownership is based on the server name; changing it
@ -131,9 +128,9 @@ async fn migrate(services: &Services) -> Result<()> {
}
if db["global"].get(b"feat_sha256_media")?.is_none() {
migrate_sha256_media(services).await?;
media::migrations::migrate_sha256_media(services).await?;
} else if config.media_startup_check {
checkup_sha256_media(services).await?;
media::migrations::checkup_sha256_media(services).await?;
}
if db["global"]
@ -738,152 +735,6 @@ async fn db_lt_13(services: &Services) -> Result<()> {
Ok(())
}
/// Migrates a media directory from legacy base64 file names to sha2 file names.
/// All errors are fatal. Upon success the database is keyed to not perform this
/// again.
async fn migrate_sha256_media(services: &Services) -> Result<()> {
let db = &services.db;
let config = &services.server.config;
warn!("Migrating legacy base64 file names to sha256 file names");
let mediaid_file = &db["mediaid_file"];
// Move old media files to new names
let mut changes = Vec::<(PathBuf, PathBuf)>::new();
for (key, _) in mediaid_file.iter() {
let old = services.media.get_media_file_b64(&key);
let new = services.media.get_media_file_sha256(&key);
debug!(?key, ?old, ?new, num = changes.len(), "change");
changes.push((old, new));
}
// move the file to the new location
for (old_path, path) in changes {
if old_path.exists() {
tokio::fs::rename(&old_path, &path).await?;
if config.media_compat_file_link {
tokio::fs::symlink(&path, &old_path).await?;
}
}
}
// Apply fix from when sha256_media was backward-incompat and bumped the schema
// version from 13 to 14. For users satisfying these conditions we can go back.
if services.globals.db.database_version()? == 14 && DATABASE_VERSION == 13 {
services.globals.db.bump_database_version(13)?;
}
db["global"].insert(b"feat_sha256_media", &[])?;
info!("Finished applying sha256_media");
Ok(())
}
/// Check is run on startup for prior-migrated media directories. This handles:
/// - Going back and forth to non-sha256 legacy binaries (e.g. upstream).
/// - Deletion of artifacts in the media directory which will then fall out of
/// sync with the database.
async fn checkup_sha256_media(services: &Services) -> Result<()> {
use crate::media::encode_key;
debug!("Checking integrity of media directory");
let db = &services.db;
let media = &services.media;
let config = &services.server.config;
let mediaid_file = &db["mediaid_file"];
let mediaid_user = &db["mediaid_user"];
let dbs = (mediaid_file, mediaid_user);
let timer = Instant::now();
let dir = media.get_media_dir();
let files: HashSet<OsString> = fs::read_dir(dir)?
.filter_map(|ent| ent.map_or(None, |ent| Some(ent.path().into_os_string())))
.collect();
for key in media.db.get_all_media_keys() {
let new_path = media.get_media_file_sha256(&key).into_os_string();
let old_path = media.get_media_file_b64(&key).into_os_string();
if let Err(e) = handle_media_check(&dbs, config, &files, &key, &new_path, &old_path).await {
error!(
media_id = ?encode_key(&key), ?new_path, ?old_path,
"Failed to resolve media check failure: {e}"
);
}
}
debug_info!(
elapsed = ?timer.elapsed(),
"Finished checking media directory"
);
Ok(())
}
async fn handle_media_check(
dbs: &(&Arc<database::Map>, &Arc<database::Map>), config: &Config, files: &HashSet<OsString>, key: &[u8],
new_path: &OsStr, old_path: &OsStr,
) -> Result<()> {
use crate::media::encode_key;
let (mediaid_file, mediaid_user) = dbs;
let new_exists = files.contains(new_path);
let old_exists = files.contains(old_path);
let old_is_symlink = || async {
tokio::fs::symlink_metadata(old_path)
.await
.map_or(false, |md| md.is_symlink())
};
if config.prune_missing_media && !old_exists && !new_exists {
error!(
media_id = ?encode_key(key), ?new_path, ?old_path,
"Media is missing at all paths. Removing from database..."
);
mediaid_file.remove(key)?;
mediaid_user.remove(key)?;
}
if config.media_compat_file_link && !old_exists && new_exists {
debug_warn!(
media_id = ?encode_key(key), ?new_path, ?old_path,
"Media found but missing legacy link. Fixing..."
);
tokio::fs::symlink(&new_path, &old_path).await?;
}
if config.media_compat_file_link && !new_exists && old_exists {
debug_warn!(
media_id = ?encode_key(key), ?new_path, ?old_path,
"Legacy media found without sha256 migration. Fixing..."
);
debug_assert!(
old_is_symlink().await,
"Legacy media not expected to be a symlink without an existing sha256 migration."
);
tokio::fs::rename(&old_path, &new_path).await?;
tokio::fs::symlink(&new_path, &old_path).await?;
}
if !config.media_compat_file_link && old_exists && old_is_symlink().await {
debug_warn!(
media_id = ?encode_key(key), ?new_path, ?old_path,
"Legacy link found but compat disabled. Cleansing symlink..."
);
debug_assert!(
new_exists,
"sha256 migration into new file expected prior to cleaning legacy symlink here."
);
tokio::fs::remove_file(&old_path).await?;
}
Ok(())
}
async fn fix_bad_double_separator_in_state_cache(services: &Services) -> Result<()> {
warn!("Fixing bad double separator in state_cache roomuserid_joined");

View file

@ -0,0 +1,158 @@
use std::{
collections::HashSet,
ffi::{OsStr, OsString},
fs::{self},
path::PathBuf,
sync::Arc,
time::Instant,
};
use conduit::{debug, debug_info, debug_warn, error, info, warn, Config, Result};
use crate::{globals, Services};
/// Migrates a media directory from legacy base64 file names to sha2 file names.
/// All errors are fatal. Upon success the database is keyed to not perform this
/// again.
pub(crate) async fn migrate_sha256_media(services: &Services) -> Result<()> {
let db = &services.db;
let config = &services.server.config;
warn!("Migrating legacy base64 file names to sha256 file names");
let mediaid_file = &db["mediaid_file"];
// Move old media files to new names
let mut changes = Vec::<(PathBuf, PathBuf)>::new();
for (key, _) in mediaid_file.iter() {
let old = services.media.get_media_file_b64(&key);
let new = services.media.get_media_file_sha256(&key);
debug!(?key, ?old, ?new, num = changes.len(), "change");
changes.push((old, new));
}
// move the file to the new location
for (old_path, path) in changes {
if old_path.exists() {
tokio::fs::rename(&old_path, &path).await?;
if config.media_compat_file_link {
tokio::fs::symlink(&path, &old_path).await?;
}
}
}
// Apply fix from when sha256_media was backward-incompat and bumped the schema
// version from 13 to 14. For users satisfying these conditions we can go back.
if services.globals.db.database_version()? == 14 && globals::migrations::DATABASE_VERSION == 13 {
services.globals.db.bump_database_version(13)?;
}
db["global"].insert(b"feat_sha256_media", &[])?;
info!("Finished applying sha256_media");
Ok(())
}
/// Check is run on startup for prior-migrated media directories. This handles:
/// - Going back and forth to non-sha256 legacy binaries (e.g. upstream).
/// - Deletion of artifacts in the media directory which will then fall out of
/// sync with the database.
pub(crate) async fn checkup_sha256_media(services: &Services) -> Result<()> {
use crate::media::encode_key;
debug!("Checking integrity of media directory");
let db = &services.db;
let media = &services.media;
let config = &services.server.config;
let mediaid_file = &db["mediaid_file"];
let mediaid_user = &db["mediaid_user"];
let dbs = (mediaid_file, mediaid_user);
let timer = Instant::now();
let dir = media.get_media_dir();
let files: HashSet<OsString> = fs::read_dir(dir)?
.filter_map(|ent| ent.map_or(None, |ent| Some(ent.path().into_os_string())))
.collect();
for key in media.db.get_all_media_keys() {
let new_path = media.get_media_file_sha256(&key).into_os_string();
let old_path = media.get_media_file_b64(&key).into_os_string();
if let Err(e) = handle_media_check(&dbs, config, &files, &key, &new_path, &old_path).await {
error!(
media_id = ?encode_key(&key), ?new_path, ?old_path,
"Failed to resolve media check failure: {e}"
);
}
}
debug_info!(
elapsed = ?timer.elapsed(),
"Finished checking media directory"
);
Ok(())
}
async fn handle_media_check(
dbs: &(&Arc<database::Map>, &Arc<database::Map>), config: &Config, files: &HashSet<OsString>, key: &[u8],
new_path: &OsStr, old_path: &OsStr,
) -> Result<()> {
use crate::media::encode_key;
let (mediaid_file, mediaid_user) = dbs;
let new_exists = files.contains(new_path);
let old_exists = files.contains(old_path);
let old_is_symlink = || async {
tokio::fs::symlink_metadata(old_path)
.await
.map_or(false, |md| md.is_symlink())
};
if config.prune_missing_media && !old_exists && !new_exists {
error!(
media_id = ?encode_key(key), ?new_path, ?old_path,
"Media is missing at all paths. Removing from database..."
);
mediaid_file.remove(key)?;
mediaid_user.remove(key)?;
}
if config.media_compat_file_link && !old_exists && new_exists {
debug_warn!(
media_id = ?encode_key(key), ?new_path, ?old_path,
"Media found but missing legacy link. Fixing..."
);
tokio::fs::symlink(&new_path, &old_path).await?;
}
if config.media_compat_file_link && !new_exists && old_exists {
debug_warn!(
media_id = ?encode_key(key), ?new_path, ?old_path,
"Legacy media found without sha256 migration. Fixing..."
);
debug_assert!(
old_is_symlink().await,
"Legacy media not expected to be a symlink without an existing sha256 migration."
);
tokio::fs::rename(&old_path, &new_path).await?;
tokio::fs::symlink(&new_path, &old_path).await?;
}
if !config.media_compat_file_link && old_exists && old_is_symlink().await {
debug_warn!(
media_id = ?encode_key(key), ?new_path, ?old_path,
"Legacy link found but compat disabled. Cleansing symlink..."
);
debug_assert!(
new_exists,
"sha256 migration into new file expected prior to cleaning legacy symlink here."
);
tokio::fs::remove_file(&old_path).await?;
}
Ok(())
}

View file

@ -1,4 +1,5 @@
mod data;
pub(super) mod migrations;
mod preview;
mod remote;
mod tests;