diff --git a/Cargo.lock b/Cargo.lock index 9935aa21..6944d8d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -615,6 +615,7 @@ dependencies = [ "ruma-identifiers-validation", "rusqlite", "rust-rocksdb", + "sanitize-filename", "sd-notify", "sentry", "sentry-tower", @@ -3054,6 +3055,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "sanitize-filename" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ed72fbaf78e6f2d41744923916966c4fbe3d7c74e3037a8ee482f1115572603" +dependencies = [ + "lazy_static", + "regex", +] + [[package]] name = "schannel" version = "0.1.23" diff --git a/Cargo.toml b/Cargo.toml index 681057a0..f24789f6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,6 +79,7 @@ url = { version = "2.5.0", features = ["serde"] } async-trait = "0.1.80" lru-cache = "0.1.2" +sanitize-filename = "0.5.0" # standard date and time tools [dependencies.chrono] diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index 53091d3a..e4e7376d 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -1,7 +1,6 @@ use std::{io::Cursor, sync::Arc, time::Duration}; use image::io::Reader as ImgReader; -use infer::MatcherType; use ipaddress::IPAddress; use reqwest::Url; use ruma::api::client::{ @@ -18,7 +17,11 @@ use crate::{ debug_warn, service::media::{FileMeta, UrlPreviewData}, services, - utils::{self, server_name::server_is_ours}, + utils::{ + self, + content_disposition::{content_disposition_type, make_content_disposition, sanitise_filename}, + server_name::server_is_ours, + }, Error, Result, Ruma, RumaResponse, }; @@ -131,7 +134,13 @@ pub(crate) async fn create_content_route( mxc.clone(), body.filename .as_ref() - .map(|filename| format!("attachment; filename={filename}")) + .map(|filename| { + format!( + "{}; filename={}", + content_disposition_type(&body.file, &body.content_type), + sanitise_filename(filename.to_owned()) + ) + }) .as_deref(), body.content_type.as_deref(), &body.file, @@ -176,12 +185,11 @@ pub(crate) async fn get_content_route(body: Ruma) -> R if let Some(FileMeta { content_type, file, - .. + content_disposition, }) = services().media.get(mxc.clone()).await? { - let content_disposition = Some(String::from(content_disposition_type(&file, &content_type))); + let content_disposition = Some(make_content_disposition(&file, &content_type, content_disposition)); - // TODO: safely sanitise filename to be included in the content-disposition Ok(get_content::v3::Response { file, content_type, @@ -190,7 +198,7 @@ pub(crate) async fn get_content_route(body: Ruma) -> R cache_control: Some(CACHE_CONTROL_IMMUTABLE.into()), }) } else if !server_is_ours(&body.server_name) && body.allow_remote { - get_remote_content( + let response = get_remote_content( &mxc, &body.server_name, body.media_id.clone(), @@ -201,6 +209,20 @@ pub(crate) async fn get_content_route(body: Ruma) -> R .map_err(|e| { debug_warn!("Fetching media `{}` failed: {:?}", mxc, e); Error::BadRequest(ErrorKind::NotFound, "Remote media error.") + })?; + + let content_disposition = Some(make_content_disposition( + &response.file, + &response.content_type, + response.content_disposition, + )); + + Ok(get_content::v3::Response { + file: response.file, + content_type: response.content_type, + content_disposition, + cross_origin_resource_policy: Some(CORP_CROSS_ORIGIN.to_owned()), + cache_control: Some(CACHE_CONTROL_IMMUTABLE.to_owned()), }) } else { Err(Error::BadRequest(ErrorKind::NotFound, "Media not found.")) @@ -241,10 +263,10 @@ pub(crate) async fn get_content_as_filename_route( if let Some(FileMeta { content_type, file, - .. + content_disposition, }) = services().media.get(mxc.clone()).await? { - let content_disposition = Some(String::from(content_disposition_type(&file, &content_type))); + let content_disposition = Some(make_content_disposition(&file, &content_type, content_disposition)); Ok(get_content_as_filename::v3::Response { file, @@ -264,10 +286,11 @@ pub(crate) async fn get_content_as_filename_route( .await { Ok(remote_content_response) => { - let content_disposition = Some(String::from(content_disposition_type( + let content_disposition = Some(make_content_disposition( &remote_content_response.file, &remote_content_response.content_type, - ))); + remote_content_response.content_disposition, + )); Ok(get_content_as_filename::v3::Response { content_disposition, @@ -321,7 +344,7 @@ pub(crate) async fn get_content_thumbnail_route( if let Some(FileMeta { content_type, file, - .. + content_disposition, }) = services() .media .get_thumbnail( @@ -335,7 +358,7 @@ pub(crate) async fn get_content_thumbnail_route( ) .await? { - let content_disposition = Some(String::from(content_disposition_type(&file, &content_type))); + let content_disposition = Some(make_content_disposition(&file, &content_type, content_disposition)); Ok(get_content_thumbnail::v3::Response { file, @@ -387,10 +410,11 @@ pub(crate) async fn get_content_thumbnail_route( ) .await?; - let content_disposition = Some(String::from(content_disposition_type( + let content_disposition = Some(make_content_disposition( &get_thumbnail_response.file, &get_thumbnail_response.content_type, - ))); + get_thumbnail_response.content_disposition, + )); Ok(get_content_thumbnail::v3::Response { file: get_thumbnail_response.file, @@ -456,12 +480,18 @@ async fn get_remote_content( ) .await?; + let content_disposition = Some(make_content_disposition( + &content_response.file, + &content_response.content_type, + content_response.content_disposition, + )); + services() .media .create( None, mxc.to_owned(), - Some("attachment"), + content_disposition.as_deref(), content_response.content_type.as_deref(), &content_response.file, ) @@ -470,7 +500,7 @@ async fn get_remote_content( Ok(get_content::v3::Response { file: content_response.file, content_type: content_response.content_type, - content_disposition: Some("attachment".to_owned()), + content_disposition, cross_origin_resource_policy: Some(CORP_CROSS_ORIGIN.to_owned()), cache_control: Some(CACHE_CONTROL_IMMUTABLE.to_owned()), }) @@ -707,24 +737,3 @@ fn url_preview_allowed(url_str: &str) -> bool { false } - -/// Returns a Content-Disposition of `attachment` or `inline`, depending on the -/// *parsed* contents of the file uploaded via format magic keys using `infer` -/// crate (basically libmagic without needing libmagic). -/// -/// This forbids trusting what the client or remote server says the file is from -/// their `Content-Type` and we try to detect it ourselves. Also returns -/// `attachment` if the Content-Type does not match what we detected. -/// -/// TODO: add a "strict" function for comparing the Content-Type with what we -/// detected: `file_type.mime_type() != content_type` -fn content_disposition_type(buf: &[u8], _content_type: &Option) -> &'static str { - let Some(file_type) = infer::get(buf) else { - return "attachment"; - }; - - match file_type.matcher_type() { - MatcherType::IMAGE | MatcherType::AUDIO | MatcherType::TEXT | MatcherType::VIDEO => "inline", - _ => "attachment", - } -} diff --git a/src/utils/content_disposition.rs b/src/utils/content_disposition.rs new file mode 100644 index 00000000..6df0e14f --- /dev/null +++ b/src/utils/content_disposition.rs @@ -0,0 +1,88 @@ +use infer::MatcherType; + +/// Returns a Content-Disposition of `attachment` or `inline`, depending on the +/// *parsed* contents of the file uploaded via format magic keys using `infer` +/// crate (basically libmagic without needing libmagic). +/// +/// This forbids trusting what the client or remote server says the file is from +/// their `Content-Type` and we try to detect it ourselves. Also returns +/// `attachment` if the Content-Type does not match what we detected. +/// +/// TODO: add a "strict" function for comparing the Content-Type with what we +/// detected: `file_type.mime_type() != content_type` +pub(crate) fn content_disposition_type(buf: &[u8], _content_type: &Option) -> &'static str { + let Some(file_type) = infer::get(buf) else { + return "attachment"; + }; + + match file_type.matcher_type() { + MatcherType::IMAGE | MatcherType::AUDIO | MatcherType::TEXT | MatcherType::VIDEO => "inline", + _ => "attachment", + } +} + +/// sanitises the file name for the Content-Disposition using +/// `sanitize_filename` crate +#[tracing::instrument] +pub(crate) fn sanitise_filename(filename: String) -> String { + let options = sanitize_filename::Options { + truncate: false, + ..Default::default() + }; + + sanitize_filename::sanitize_with_options(filename, options) +} + +/// creates the final Content-Disposition based on whether the filename exists +/// or not. +/// +/// if filename exists: `Content-Disposition: attachment/inline; +/// filename=filename.ext` else: `Content-Disposition: attachment/inline` +#[tracing::instrument(skip(file))] +pub(crate) fn make_content_disposition( + file: &[u8], content_type: &Option, content_disposition: Option, +) -> String { + let filename = content_disposition.map_or_else(String::new, |content_disposition| { + let (_, filename) = content_disposition + .split_once("filename=") + .unwrap_or(("", "")); + + if filename.is_empty() { + String::new() + } else { + sanitise_filename(filename.to_owned()) + } + }); + + if !filename.is_empty() { + // Content-Disposition: attachment/inline; filename=filename.ext + format!("{}; filename={}", content_disposition_type(file, content_type), filename) + } else { + // Content-Disposition: attachment/inline + String::from(content_disposition_type(file, content_type)) + } +} + +#[cfg(test)] +mod tests { + #[test] + fn string_sanitisation() { + const SAMPLE: &str = + "🏳️‍⚧️this\\r\\n įs \r\\n ä \\r\nstrïng 🥴that\n\r ../../../../../../../may be\r\n malicious🏳️‍⚧️"; + const SANITISED: &str = "🏳️‍⚧️thisrn įs n ä rstrïng 🥴that ..............may be malicious🏳️‍⚧️"; + + let options = sanitize_filename::Options { + windows: true, + truncate: true, + replacement: "", + }; + + // cargo test -- --nocapture + println!("{}", SAMPLE); + println!("{}", sanitize_filename::sanitize_with_options(SAMPLE, options.clone())); + println!("{:?}", SAMPLE); + println!("{:?}", sanitize_filename::sanitize_with_options(SAMPLE, options.clone())); + + assert_eq!(SANITISED, sanitize_filename::sanitize_with_options(SAMPLE, options.clone())); + } +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index dbcef82d..2ceb0ff5 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,4 +1,5 @@ pub(crate) mod clap; +pub(crate) mod content_disposition; pub(crate) mod debug; pub(crate) mod error; pub(crate) mod server_name;