feat: URL preview support

from upstream MR https://gitlab.com/famedly/conduit/-/merge_requests/347
with the following changes (so far):
- remove hardcoded list of allowed hosts (strongly disagree with this,
even if it is desired, it should not be harcoded)
- add more allow config options for granularity via URL contains,
host contains, and domain is (explicit match) for security
- warn if a user is allowing all URLs to be previewed for security reasons
- replace an expect with proper error handling
- bump webpage to 2.0
- improved code style a tad

Co-authored-by: rooot <hey@rooot.gay>
Signed-off-by: rooot <hey@rooot.gay>
Signed-off-by: strawberry <strawberry@puppygock.gay>
This commit is contained in:
Reiner Herrmann 2024-02-09 23:16:06 -05:00 committed by June
parent 6f26be1c6e
commit c0dd5b1cc2
13 changed files with 821 additions and 41 deletions

242
Cargo.lock generated
View file

@ -102,7 +102,7 @@ checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.48",
]
[[package]]
@ -234,7 +234,7 @@ dependencies = [
"regex",
"rustc-hash",
"shlex",
"syn",
"syn 2.0.48",
]
[[package]]
@ -340,9 +340,9 @@ dependencies = [
[[package]]
name = "clap"
version = "4.4.18"
version = "4.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c"
checksum = "80c21025abd42669a92efc996ef13cfb2c5c627858421ea58d5c3b331a6c134f"
dependencies = [
"clap_builder",
"clap_derive",
@ -350,9 +350,9 @@ dependencies = [
[[package]]
name = "clap_builder"
version = "4.4.18"
version = "4.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7"
checksum = "458bf1f341769dfcf849846f65dffdf9146daa56bcd2a47cb4e1de9915567c99"
dependencies = [
"anstyle",
"clap_lex",
@ -360,21 +360,21 @@ dependencies = [
[[package]]
name = "clap_derive"
version = "4.4.7"
version = "4.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442"
checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
"syn 2.0.48",
]
[[package]]
name = "clap_lex"
version = "0.6.0"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce"
[[package]]
name = "color_quant"
@ -440,6 +440,7 @@ dependencies = [
"tracing-opentelemetry",
"tracing-subscriber",
"trust-dns-resolver",
"webpage",
]
[[package]]
@ -538,7 +539,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.48",
]
[[package]]
@ -647,7 +648,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
"syn 2.0.48",
]
[[package]]
@ -722,6 +723,16 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "futf"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
dependencies = [
"mac",
"new_debug_unreachable",
]
[[package]]
name = "futures-channel"
version = "0.3.30"
@ -762,7 +773,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.48",
]
[[package]]
@ -937,6 +948,20 @@ dependencies = [
"winapi",
]
[[package]]
name = "html5ever"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7"
dependencies = [
"log",
"mac",
"markup5ever",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "http"
version = "0.2.11"
@ -1335,12 +1360,44 @@ dependencies = [
"libc",
]
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "maplit"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]]
name = "markup5ever"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016"
dependencies = [
"log",
"phf",
"phf_codegen",
"string_cache",
"string_cache_codegen",
"tendril",
]
[[package]]
name = "markup5ever_rcdom"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9521dd6750f8e80ee6c53d65e2e4656d7de37064f3a7a5d2d11d05df93839c2"
dependencies = [
"html5ever",
"markup5ever",
"tendril",
"xml5ever",
]
[[package]]
name = "match_cfg"
version = "0.1.0"
@ -1401,6 +1458,12 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
[[package]]
name = "nix"
version = "0.27.1"
@ -1692,7 +1755,7 @@ dependencies = [
"proc-macro2",
"proc-macro2-diagnostics",
"quote",
"syn",
"syn 2.0.48",
]
[[package]]
@ -1717,6 +1780,44 @@ version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "phf"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
dependencies = [
"phf_shared",
]
[[package]]
name = "phf_codegen"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
dependencies = [
"phf_generator",
"phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
dependencies = [
"phf_shared",
"rand",
]
[[package]]
name = "phf_shared"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
dependencies = [
"siphasher",
]
[[package]]
name = "pin-project"
version = "1.1.4"
@ -1734,7 +1835,7 @@ checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.48",
]
[[package]]
@ -1796,6 +1897,12 @@ version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "prettyplease"
version = "0.2.16"
@ -1803,7 +1910,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5"
dependencies = [
"proc-macro2",
"syn",
"syn 2.0.48",
]
[[package]]
@ -1833,7 +1940,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.48",
"version_check",
"yansi",
]
@ -2165,7 +2272,7 @@ dependencies = [
"quote",
"ruma-identifiers-validation",
"serde",
"syn",
"syn 2.0.48",
"toml",
]
@ -2378,7 +2485,7 @@ checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.48",
]
[[package]]
@ -2543,6 +2650,12 @@ dependencies = [
"time",
]
[[package]]
name = "siphasher"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
[[package]]
name = "slab"
version = "0.4.9"
@ -2584,6 +2697,32 @@ dependencies = [
"der",
]
[[package]]
name = "string_cache"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b"
dependencies = [
"new_debug_unreachable",
"once_cell",
"parking_lot",
"phf_shared",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache_codegen"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988"
dependencies = [
"phf_generator",
"phf_shared",
"proc-macro2",
"quote",
]
[[package]]
name = "subslice"
version = "0.2.3"
@ -2599,6 +2738,17 @@ version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.48"
@ -2637,6 +2787,17 @@ dependencies = [
"libc",
]
[[package]]
name = "tendril"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
dependencies = [
"futf",
"mac",
"utf-8",
]
[[package]]
name = "thiserror"
version = "1.0.56"
@ -2654,7 +2815,7 @@ checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.48",
]
[[package]]
@ -2790,7 +2951,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.48",
]
[[package]]
@ -2944,7 +3105,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.48",
]
[[package]]
@ -3147,6 +3308,12 @@ version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
[[package]]
name = "utf-8"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "uuid"
version = "1.7.0"
@ -3210,7 +3377,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn",
"syn 2.0.48",
"wasm-bindgen-shared",
]
@ -3244,7 +3411,7 @@ checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.48",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@ -3275,6 +3442,18 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "webpage"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fb86b12e58d490a99867f561ce8466ffa7b73e24d015a8e7f5bc111d4424ba2"
dependencies = [
"html5ever",
"markup5ever_rcdom",
"serde_json",
"url",
]
[[package]]
name = "weezl"
version = "0.1.8"
@ -3466,6 +3645,17 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "xml5ever"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4034e1d05af98b51ad7214527730626f019682d797ba38b51689212118d8e650"
dependencies = [
"log",
"mac",
"markup5ever",
]
[[package]]
name = "yansi"
version = "1.0.0-rc.1"
@ -3489,7 +3679,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.48",
]
[[package]]

View file

@ -91,7 +91,7 @@ hmac = "0.12.1"
sha-1 = "0.10.1"
sha2 = { version = "0.10.8" }
# used for conduit's CLI and admin room command parsing
clap = { version = "4.4.17", default-features = false, features = ["std", "derive", "help", "usage", "error-context"] }
clap = { version = "4.5.0", default-features = false, features = ["std", "derive", "help", "usage", "error-context"] }
futures-util = { version = "0.3.30", default-features = false }
# Used for reading the configuration from conduit.toml & environment variables
figment = { version = "0.10.14", features = ["env", "toml"] }
@ -106,6 +106,8 @@ ipaddress = "0.1.3"
sd-notify = { version = "0.4.1", optional = true }
webpage = { version = "2.0", default-features = false }
[target.'cfg(unix)'.dependencies]
nix = { version = "0.27.1", features = ["resource"] }

View file

@ -113,6 +113,8 @@ ip_range_denylist = [
"fec0::/10",
]
### Moderation / Privacy / Security
# Set to true to allow user type "guest" registrations. Element attempts to register guest users automatically.
@ -163,6 +165,21 @@ allow_public_room_directory_without_auth = false
# If federation is disabled entirely (`allow_federation`), this is inherently false. For privacy, this is best disabled.
allow_device_name_federation = false
# Vector list of domains allowed to send requests to for URL previews. Defaults to none.
# Note: this is a *contains* match, not an explicit match. Putting "google.com" will match "https://google.com" and "http://mymaliciousdomainexamplegoogle.com"
# Setting this to "*" will allow all URL previews. Please note that this opens up significant attack surface to your server, you are expected to be aware of the risks by doing so.
url_preview_domain_contains_allowlist = []
# Vector list of explicit domains allowed to send requests to for URL previews. Defaults to none.
# Note: This is an *explicit* match, not a ccontains match. Putting "google.com" will match "https://google.com", "http://google.com", but not "https://mymaliciousdomainexamplegoogle.com"
# Setting this to "*" will allow all URL previews. Please note that this opens up significant attack surface to your server, you are expected to be aware of the risks by doing so.
url_preview_domain_explicit_allowlist = []
# Vector list of URLs allowed to send requests to for URL previews. Defaults to none.
# Note that this is a *contains* match, not an explicit match. Putting "https://google.com" will match "https://google.com/" and "https://google.com/url?q=https://mymaliciousdomainexample.com"
# Setting this to "*" will allow all URL previews. Please note that this opens up significant attack surface to your server, you are expected to be aware of the risks by doing so.
url_preview_url_contains_allowlist = []
### Misc

34
debian/postinst vendored
View file

@ -92,12 +92,20 @@ port = ${CONDUIT_PORT}
# likely need this to be 0.0.0.0.
address = "${CONDUIT_ADDRESS}"
# How many requests conduwuit sends to other servers at the same time. Default is 100
# Note that because conduwuit is very fast unlike other homeserver implementations,
# setting this too high could inadvertently result in ratelimits kicking in, or
# overloading lower-end homeservers out there. Recommended to leave this alone unless you
# have a valid reason to. No this will not speed up room joins.
#max_concurrent_requests = 100
# How many requests conduwuit sends to other servers at the same time concurrently. Default is 500
# Note that because conduwuit is very fast unlike other homeserver implementations, setting this too
# high could inadvertently result in ratelimits kicking in, or overloading lower-end homeservers out there.
#
# A valid use-case for enabling this is if you have a significant amount of overall federation activity
# such as many rooms joined/tracked, and many servers in the true destination cache caused by that. Upon
# rebooting conduwuit, depending on how fast your resources are, client and incoming federation requests
# may timeout or be "stalled" for a period of time due to hitting the max concurrent requests limit from
# refreshing federation/destination caches and such.
#
# If you have a lot of active users on your homeserver, you will definitely need to raise this.
#
# No this will not speed up room joins.
#max_concurrent_requests = 500
# Max request size for file uploads
max_request_size = 20_000_000 # in bytes
@ -142,6 +150,8 @@ ip_range_denylist = [
"fec0::/10",
]
### Moderation / Privacy / Security
# Set to true to allow user type "guest" registrations. Element attempts to register guest users automatically.
@ -192,6 +202,18 @@ allow_public_room_directory_without_auth = false
# If federation is disabled entirely (`allow_federation`), this is inherently false. For privacy, this is best disabled.
allow_device_name_federation = false
# Vector list of domains allowed to send requests to for URL previews. Defaults to none.
# Note: this is a *contains* match, not an explicit match. Putting "google.com" will match "https://google.com" and "http://mymaliciousdomainexamplegoogle.com"
url_preview_domain_contains_allowlist = []
# Vector list of explicit domains allowed to send requests to for URL previews. Defaults to none.
# Note: This is an *explicit* match, not a ccontains match. Putting "google.com" will match "https://google.com", "http://google.com", but not "https://mymaliciousdomainexamplegoogle.com"
url_preview_domain_explicit_allowlist = []
# Vector list of URLs allowed to send requests to for URL previews. Defaults to none.
# Note that this is a *contains* match, not an explicit match. Putting "https://google.com" will match "https://google.com/" and "https://google.com/url?q=https://mymaliciousdomainexample.com"
url_preview_url_contains_allowlist = []
### Misc

View file

@ -1,14 +1,21 @@
use std::time::Duration;
use std::{io::Cursor, net::IpAddr, sync::Arc, time::Duration};
use crate::{service::media::FileMeta, services, utils, Error, Result, Ruma};
use crate::{
service::media::{FileMeta, UrlPreviewData},
services, utils, Error, Result, Ruma,
};
use image::io::Reader as ImgReader;
use reqwest::Url;
use ruma::api::client::{
error::ErrorKind,
media::{
create_content, get_content, get_content_as_filename, get_content_thumbnail,
get_media_config,
get_media_config, get_media_preview,
},
};
use tracing::info;
use tracing::{debug, error, info};
use webpage::HTML;
/// generated MXC ID (`media-id`) length
const MXC_LENGTH: usize = 32;
@ -24,6 +31,43 @@ pub async fn get_media_config_route(
})
}
/// # `GET /_matrix/media/v3/preview_url`
///
/// Returns URL preview.
pub async fn get_media_preview_route(
body: Ruma<get_media_preview::v3::Request>,
) -> Result<get_media_preview::v3::Response> {
let url = &body.url;
if !url_preview_allowed(url) {
return Err(Error::BadRequest(
ErrorKind::Forbidden,
"URL is not allowed to be previewed",
));
}
if let Ok(preview) = get_url_preview(url).await {
let res = serde_json::value::to_raw_value(&preview).map_err(|e| {
error!(
"Failed to convert UrlPreviewData into a serde json value: {}",
e
);
Error::BadRequest(
ErrorKind::Unknown,
"Unknown error occurred parsing URL preview",
)
})?;
return Ok(get_media_preview::v3::Response::from_raw_value(res));
}
Err(Error::BadRequest(
ErrorKind::LimitExceeded {
retry_after_ms: Some(Duration::from_secs(5)),
},
"Retry later",
))
}
/// # `POST /_matrix/media/v3/upload`
///
/// Permanently save media in the server.
@ -266,3 +310,264 @@ pub async fn get_content_thumbnail_route(
Err(Error::BadRequest(ErrorKind::NotFound, "Media not found."))
}
}
async fn download_image(client: &reqwest::Client, url: &str) -> Result<UrlPreviewData> {
let image = client.get(url).send().await?.bytes().await?;
let mxc = format!(
"mxc://{}/{}",
services().globals.server_name(),
utils::random_string(MXC_LENGTH)
);
services()
.media
.create(mxc.clone(), None, None, &image)
.await?;
let (width, height) = match ImgReader::new(Cursor::new(&image)).with_guessed_format() {
Err(_) => (None, None),
Ok(reader) => match reader.into_dimensions() {
Err(_) => (None, None),
Ok((width, height)) => (Some(width), Some(height)),
},
};
Ok(UrlPreviewData {
image: Some(mxc),
image_size: Some(image.len()),
image_width: width,
image_height: height,
..Default::default()
})
}
async fn download_html(client: &reqwest::Client, url: &str) -> Result<UrlPreviewData> {
let max_download_size = 300_000; // TODO: is this bytes? kilobytes? megabytes?
let mut response = client.get(url).send().await?;
let mut bytes: Vec<u8> = Vec::new();
while let Some(chunk) = response.chunk().await? {
bytes.extend_from_slice(&chunk);
if bytes.len() > max_download_size {
break;
}
}
let body = String::from_utf8_lossy(&bytes);
let html = match HTML::from_string(body.to_string(), Some(url.to_owned())) {
Ok(html) => html,
Err(_) => {
return Err(Error::BadRequest(
ErrorKind::Unknown,
"Failed to parse HTML",
))
}
};
let mut data = match html.opengraph.images.first() {
None => UrlPreviewData::default(),
Some(obj) => download_image(client, &obj.url).await?,
};
let props = html.opengraph.properties;
/* use OpenGraph title/description, but fall back to HTML if not available */
data.title = props.get("title").cloned().or(html.title);
data.description = props.get("description").cloned().or(html.description);
Ok(data)
}
fn url_request_allowed(addr: &IpAddr) -> bool {
// TODO: make this check ip_range_denylist
// could be implemented with reqwest when it supports IP filtering:
// https://github.com/seanmonstar/reqwest/issues/1515
// These checks have been taken from the Rust core/net/ipaddr.rs crate,
// IpAddr::V4.is_global() and IpAddr::V6.is_global(), as .is_global is not
// yet stabilized. TODO: Once this is stable, this match can be simplified.
match addr {
IpAddr::V4(ip4) => {
!(ip4.octets()[0] == 0 // "This network"
|| ip4.is_private()
|| (ip4.octets()[0] == 100 && (ip4.octets()[1] & 0b1100_0000 == 0b0100_0000)) // is_shared()
|| ip4.is_loopback()
|| ip4.is_link_local()
// addresses reserved for future protocols (`192.0.0.0/24`)
|| (ip4.octets()[0] == 192 && ip4.octets()[1] == 0 && ip4.octets()[2] == 0)
|| ip4.is_documentation()
|| (ip4.octets()[0] == 198 && (ip4.octets()[1] & 0xfe) == 18) // is_benchmarking()
|| (ip4.octets()[0] & 240 == 240 && !ip4.is_broadcast()) // is_reserved()
|| ip4.is_broadcast())
}
IpAddr::V6(ip6) => {
!(ip6.is_unspecified()
|| ip6.is_loopback()
// IPv4-mapped Address (`::ffff:0:0/96`)
|| matches!(ip6.segments(), [0, 0, 0, 0, 0, 0xffff, _, _])
// IPv4-IPv6 Translat. (`64:ff9b:1::/48`)
|| matches!(ip6.segments(), [0x64, 0xff9b, 1, _, _, _, _, _])
// Discard-Only Address Block (`100::/64`)
|| matches!(ip6.segments(), [0x100, 0, 0, 0, _, _, _, _])
// IETF Protocol Assignments (`2001::/23`)
|| (matches!(ip6.segments(), [0x2001, b, _, _, _, _, _, _] if b < 0x200)
&& !(
// Port Control Protocol Anycast (`2001:1::1`)
u128::from_be_bytes(ip6.octets()) == 0x2001_0001_0000_0000_0000_0000_0000_0001
// Traversal Using Relays around NAT Anycast (`2001:1::2`)
|| u128::from_be_bytes(ip6.octets()) == 0x2001_0001_0000_0000_0000_0000_0000_0002
// AMT (`2001:3::/32`)
|| matches!(ip6.segments(), [0x2001, 3, _, _, _, _, _, _])
// AS112-v6 (`2001:4:112::/48`)
|| matches!(ip6.segments(), [0x2001, 4, 0x112, _, _, _, _, _])
// ORCHIDv2 (`2001:20::/28`)
|| matches!(ip6.segments(), [0x2001, b, _, _, _, _, _, _] if (0x20..=0x2F).contains(&b))
))
|| ((ip6.segments()[0] == 0x2001) && (ip6.segments()[1] == 0xdb8)) // is_documentation()
|| ((ip6.segments()[0] & 0xfe00) == 0xfc00) // is_unique_local()
|| ((ip6.segments()[0] & 0xffc0) == 0xfe80)) // is_unicast_link_local
}
}
}
async fn request_url_preview(url: &str) -> Result<UrlPreviewData> {
let client = services().globals.default_client();
let response = client.head(url).send().await?;
if !response
.remote_addr()
.map_or(false, |a| url_request_allowed(&a.ip()))
{
return Err(Error::BadRequest(
ErrorKind::Forbidden,
"Requesting from this address is forbidden",
));
}
let content_type = match response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|x| x.to_str().ok())
{
Some(ct) => ct,
None => {
return Err(Error::BadRequest(
ErrorKind::Unknown,
"Unknown Content-Type",
))
}
};
let data = match content_type {
html if html.starts_with("text/html") => download_html(&client, url).await?,
img if img.starts_with("image/") => download_image(&client, url).await?,
_ => {
return Err(Error::BadRequest(
ErrorKind::Unknown,
"Unsupported Content-Type",
))
}
};
services().media.set_url_preview(url, &data).await?;
Ok(data)
}
async fn get_url_preview(url: &str) -> Result<UrlPreviewData> {
if let Some(preview) = services().media.get_url_preview(url).await {
return Ok(preview);
}
// ensure that only one request is made per URL
let mutex_request = Arc::clone(
services()
.media
.url_preview_mutex
.write()
.unwrap()
.entry(url.to_owned())
.or_default(),
);
let _request_lock = mutex_request.lock().await;
match services().media.get_url_preview(url).await {
Some(preview) => Ok(preview),
None => request_url_preview(url).await,
}
}
fn url_preview_allowed(url_str: &str) -> bool {
let url: Url = match Url::parse(url_str) {
Ok(u) => u,
Err(_) => return false,
};
if ["http", "https"]
.iter()
.all(|&scheme| scheme != url.scheme().to_lowercase())
{
debug!("Ignoring non-HTTP/HTTPS URL to preview: {}", url);
return false;
}
let host = match url.host_str() {
None => {
debug!(
"Ignoring URL preview for a URL that does not have a host (?): {}",
url
);
return false;
}
Some(h) => h.to_owned(),
};
let allowlist_domain_contains = services().globals.url_preview_domain_contains_allowlist();
let allowlist_domain_explicit = services().globals.url_preview_domain_explicit_allowlist();
let allowlist_url_contains = services().globals.url_preview_url_contains_allowlist();
if allowlist_domain_contains.contains(&"*".to_owned())
|| allowlist_domain_explicit.contains(&"*".to_owned())
|| allowlist_url_contains.contains(&"*".to_owned())
{
debug!(
"Config key contains * which is allowing all URL previews. Allowing URL {}",
url
);
return true;
}
if !host.is_empty() {
if allowlist_domain_explicit.contains(&host) {
return true;
}
debug!(
"Host {} is allowed by url_preview_domain_explicit_allowlist (check 1/3)",
&host
);
if allowlist_domain_contains
.iter()
.any(|domain_s| domain_s.contains(&host.clone()))
{
return true;
}
debug!(
"Host {} is allowed by url_preview_domain_contains_allowlist (check 2/3)",
&host
);
if allowlist_url_contains
.iter()
.any(|url_s| url_s.contains(&url.to_string()))
{
return true;
}
debug!(
"URL {} is allowed by url_preview_url_contains_allowlist (check 3/3)",
&host
);
}
false
}

View file

@ -134,6 +134,15 @@ pub struct Config {
#[serde(default = "default_ip_range_denylist")]
pub ip_range_denylist: Vec<String>,
#[serde(default = "Vec::new")]
pub url_preview_domain_contains_allowlist: Vec<String>,
#[serde(default = "Vec::new")]
pub url_preview_domain_explicit_allowlist: Vec<String>,
#[serde(default = "Vec::new")]
pub url_preview_url_contains_allowlist: Vec<String>,
#[serde(default = "RegexSet::empty")]
#[serde(with = "serde_regex")]
pub forbidden_room_names: RegexSet,
@ -349,6 +358,18 @@ impl fmt::Display for Config {
("Forbidden room names", {
&self.forbidden_room_names.patterns().iter().join(", ")
}),
(
"URL preview domain contains allowlist",
&self.url_preview_domain_contains_allowlist.join(", "),
),
(
"URL preview domain explicit allowlist",
&self.url_preview_domain_explicit_allowlist.join(", "),
),
(
"URL preview URL contains allowlist",
&self.url_preview_url_contains_allowlist.join(", "),
),
];
let mut msg: String = "Active config values:\n\n".to_owned();

View file

@ -1,6 +1,10 @@
use ruma::api::client::error::ErrorKind;
use crate::{database::KeyValueDatabase, service, utils, Error, Result};
use crate::{
database::KeyValueDatabase,
service::{self, media::UrlPreviewData},
utils, Error, Result,
};
impl service::media::Data for KeyValueDatabase {
fn create_file_metadata(
@ -79,4 +83,112 @@ impl service::media::Data for KeyValueDatabase {
};
Ok((content_disposition, content_type, key))
}
fn remove_url_preview(&self, url: &str) -> Result<()> {
self.url_previews.remove(url.as_bytes())
}
fn set_url_preview(
&self,
url: &str,
data: &UrlPreviewData,
timestamp: std::time::Duration,
) -> Result<()> {
let mut value = Vec::<u8>::new();
value.extend_from_slice(&timestamp.as_secs().to_be_bytes());
value.push(0xff);
value.extend_from_slice(
data.title
.as_ref()
.map(|t| t.as_bytes())
.unwrap_or_default(),
);
value.push(0xff);
value.extend_from_slice(
data.description
.as_ref()
.map(|d| d.as_bytes())
.unwrap_or_default(),
);
value.push(0xff);
value.extend_from_slice(
data.image
.as_ref()
.map(|i| i.as_bytes())
.unwrap_or_default(),
);
value.push(0xff);
value.extend_from_slice(&data.image_size.unwrap_or(0).to_be_bytes());
value.push(0xff);
value.extend_from_slice(&data.image_width.unwrap_or(0).to_be_bytes());
value.push(0xff);
value.extend_from_slice(&data.image_height.unwrap_or(0).to_be_bytes());
self.url_previews.insert(url.as_bytes(), &value)
}
fn get_url_preview(&self, url: &str) -> Option<UrlPreviewData> {
let values = self.url_previews.get(url.as_bytes()).ok()??;
let mut values = values.split(|&b| b == 0xff);
let _ts = match values
.next()
.map(|b| u64::from_be_bytes(b.try_into().expect("valid BE array")))
{
Some(0) => None,
x => x,
};
let title = match values
.next()
.and_then(|b| String::from_utf8(b.to_vec()).ok())
{
Some(s) if s.is_empty() => None,
x => x,
};
let description = match values
.next()
.and_then(|b| String::from_utf8(b.to_vec()).ok())
{
Some(s) if s.is_empty() => None,
x => x,
};
let image = match values
.next()
.and_then(|b| String::from_utf8(b.to_vec()).ok())
{
Some(s) if s.is_empty() => None,
x => x,
};
let image_size = match values
.next()
.map(|b| usize::from_be_bytes(b.try_into().expect("valid BE array")))
{
Some(0) => None,
x => x,
};
let image_width = match values
.next()
.map(|b| u32::from_be_bytes(b.try_into().expect("valid BE array")))
{
Some(0) => None,
x => x,
};
let image_height = match values
.next()
.map(|b| u32::from_be_bytes(b.try_into().expect("valid BE array")))
{
Some(0) => None,
x => x,
};
Some(UrlPreviewData {
title,
description,
image,
image_size,
image_width,
image_height,
})
}
}

View file

@ -147,6 +147,7 @@ pub struct KeyValueDatabase {
//pub media: media::Media,
pub(super) mediaid_file: Arc<dyn KvTree>, // MediaId = MXC + WidthHeight + ContentDisposition + ContentType
pub(super) url_previews: Arc<dyn KvTree>,
//pub key_backups: key_backups::KeyBackups,
pub(super) backupid_algorithm: Arc<dyn KvTree>, // BackupId = UserId + Version(Count)
pub(super) backupid_etag: Arc<dyn KvTree>, // BackupId = UserId + Version(Count)
@ -350,6 +351,7 @@ impl KeyValueDatabase {
roomuserdataid_accountdata: builder.open_tree("roomuserdataid_accountdata")?,
roomusertype_roomuserdataid: builder.open_tree("roomusertype_roomuserdataid")?,
mediaid_file: builder.open_tree("mediaid_file")?,
url_previews: builder.open_tree("url_previews")?,
backupid_algorithm: builder.open_tree("backupid_algorithm")?,
backupid_etag: builder.open_tree("backupid_etag")?,
backupkeyid_backup: builder.open_tree("backupkeyid_backup")?,

View file

@ -148,8 +148,11 @@ async fn main() {
error!(?error, "The database couldn't be loaded or created");
return;
};
let config = &services().globals.config;
/* ad-hoc config validation/checks */
// check if user specified valid IP CIDR ranges on startup
for cidr in services().globals.ip_range_denylist() {
let _ = ipaddress::IPAddress::parse(cidr)
@ -179,6 +182,27 @@ async fn main() {
warn!("! Outgoing federated presence is not spec compliant due to relying on PDUs and EDUs combined.\nOutgoing presence will not be very reliable due to this and any issues with federated outgoing presence are very likely attributed to this issue.\nIncoming presence and local presence are unaffected.");
}
if config
.url_preview_domain_contains_allowlist
.contains(&"*".to_owned())
{
warn!("All URLs are allowed for URL previews via setting \"url_preview_domain_contains_allowlist\" to \"*\". This opens up significant attack surface to your server. You are expected to be aware of the risks by doing this.");
}
if config
.url_preview_domain_explicit_allowlist
.contains(&"*".to_owned())
{
warn!("All URLs are allowed for URL previews via setting \"url_preview_domain_explicit_allowlist\" to \"*\". This opens up significant attack surface to your server. You are expected to be aware of the risks by doing this.");
}
if config
.url_preview_url_contains_allowlist
.contains(&"*".to_owned())
{
warn!("All URLs are allowed for URL previews via setting \"url_preview_url_contains_allowlist\" to \"*\". This opens up significant attack surface to your server. You are expected to be aware of the risks by doing this.");
}
/* end ad-hoc config validation/checks */
info!("Starting server");
if let Err(e) = run_server().await {
error!("Critical error running server: {}", e);
@ -464,6 +488,7 @@ fn routes() -> Router {
.ruma_route(client_server::turn_server_route)
.ruma_route(client_server::send_event_to_device_route)
.ruma_route(client_server::get_media_config_route)
.ruma_route(client_server::get_media_preview_route)
.ruma_route(client_server::create_content_route)
.ruma_route(client_server::get_content_route)
.ruma_route(client_server::get_content_as_filename_route)

View file

@ -390,6 +390,18 @@ impl Service<'_> {
&self.config.emergency_password
}
pub fn url_preview_domain_contains_allowlist(&self) -> &Vec<String> {
&self.config.url_preview_domain_contains_allowlist
}
pub fn url_preview_domain_explicit_allowlist(&self) -> &Vec<String> {
&self.config.url_preview_domain_explicit_allowlist
}
pub fn url_preview_url_contains_allowlist(&self) -> &Vec<String> {
&self.config.url_preview_url_contains_allowlist
}
pub fn forbidden_room_names(&self) -> &RegexSet {
&self.config.forbidden_room_names
}

View file

@ -17,4 +17,15 @@ pub trait Data: Send + Sync {
width: u32,
height: u32,
) -> Result<(Option<String>, Option<String>, Vec<u8>)>;
fn remove_url_preview(&self, url: &str) -> Result<()>;
fn set_url_preview(
&self,
url: &str,
data: &super::UrlPreviewData,
timestamp: std::time::Duration,
) -> Result<()>;
fn get_url_preview(&self, url: &str) -> Option<super::UrlPreviewData>;
}

View file

@ -1,7 +1,13 @@
mod data;
use std::io::Cursor;
use std::{
collections::HashMap,
io::Cursor,
sync::{Arc, RwLock},
time::SystemTime,
};
pub(crate) use data::Data;
use serde::Serialize;
use crate::{services, Result};
use image::imageops::FilterType;
@ -9,6 +15,7 @@ use image::imageops::FilterType;
use tokio::{
fs::File,
io::{AsyncReadExt, AsyncWriteExt, BufReader},
sync::Mutex,
};
pub struct FileMeta {
@ -17,8 +24,43 @@ pub struct FileMeta {
pub file: Vec<u8>,
}
#[derive(Serialize, Default)]
pub struct UrlPreviewData {
#[serde(
skip_serializing_if = "Option::is_none",
rename(serialize = "og:title")
)]
pub title: Option<String>,
#[serde(
skip_serializing_if = "Option::is_none",
rename(serialize = "og:description")
)]
pub description: Option<String>,
#[serde(
skip_serializing_if = "Option::is_none",
rename(serialize = "og:image")
)]
pub image: Option<String>,
#[serde(
skip_serializing_if = "Option::is_none",
rename(serialize = "matrix:image:size")
)]
pub image_size: Option<usize>,
#[serde(
skip_serializing_if = "Option::is_none",
rename(serialize = "og:image:width")
)]
pub image_width: Option<u32>,
#[serde(
skip_serializing_if = "Option::is_none",
rename(serialize = "og:image:height")
)]
pub image_height: Option<u32>,
}
pub struct Service {
pub db: &'static dyn Data,
pub url_preview_mutex: RwLock<HashMap<String, Arc<Mutex<()>>>>,
}
impl Service {
@ -260,6 +302,22 @@ impl Service {
Ok(None)
}
}
pub async fn get_url_preview(&self, url: &str) -> Option<UrlPreviewData> {
self.db.get_url_preview(url)
}
pub async fn remove_url_preview(&self, url: &str) -> Result<()> {
// TODO: also remove the downloaded image
self.db.remove_url_preview(url)
}
pub async fn set_url_preview(&self, url: &str, data: &UrlPreviewData) -> Result<()> {
let now = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.expect("valid system time");
self.db.set_url_preview(url, data, now)
}
}
#[cfg(test)]

View file

@ -1,6 +1,6 @@
use std::{
collections::{BTreeMap, HashMap},
sync::{Arc, Mutex},
sync::{Arc, Mutex, RwLock},
};
use lru_cache::LruCache;
@ -114,7 +114,10 @@ impl Services<'_> {
account_data: account_data::Service { db },
admin: admin::Service::build(),
key_backups: key_backups::Service { db },
media: media::Service { db },
media: media::Service {
db,
url_preview_mutex: RwLock::new(HashMap::new()),
},
sending: sending::Service::build(db, &config),
globals: globals::Service::load(db, config)?,