From bb7e885143868fc4c50437be77c602bcfcbc3535 Mon Sep 17 00:00:00 2001 From: Johann150 Date: Mon, 4 Jan 2021 21:13:45 +0100 Subject: [PATCH 01/10] run cargo fmt --- src/main.rs | 60 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 11 deletions(-) diff --git a/src/main.rs b/src/main.rs index ef4906f..75e62f6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ use { once_cell::sync::Lazy, - percent_encoding::{AsciiSet, CONTROLS, percent_decode_str, percent_encode}, + percent_encoding::{percent_decode_str, percent_encode, AsciiSet, CONTROLS}, rustls::{ internal::pemfile::{certs, pkcs8_private_keys}, NoClientAuth, ServerConfig, @@ -20,7 +20,7 @@ use { net::{TcpListener, TcpStream}, runtime::Runtime, }, - tokio_rustls::{TlsAcceptor, server::TlsStream}, + tokio_rustls::{server::TlsStream, TlsAcceptor}, url::{Host, Url}, }; @@ -65,12 +65,42 @@ struct Args { fn args() -> Result { let args: Vec = std::env::args().collect(); let mut opts = getopts::Options::new(); - opts.optopt("", "content", "Root of the content directory (default ./content)", "DIR"); - opts.optopt("", "cert", "TLS certificate PEM file (default ./cert.pem)", "FILE"); - opts.optopt("", "key", "PKCS8 private key file (default ./key.rsa)", "FILE"); - opts.optmulti("", "addr", "Address to listen on (multiple occurences possible, default 0.0.0.0:1965 and [::]:1965)", "IP:PORT"); - opts.optopt("", "hostname", "Domain name of this Gemini server (optional)", "NAME"); - opts.optopt("", "lang", "RFC 4646 Language code(s) for text/gemini documents", "LANG"); + opts.optopt( + "", + "content", + "Root of the content directory (default ./content)", + "DIR", + ); + opts.optopt( + "", + "cert", + "TLS certificate PEM file (default ./cert.pem)", + "FILE", + ); + opts.optopt( + "", + "key", + "PKCS8 private key file (default ./key.rsa)", + "FILE", + ); + opts.optopt( + "", + "addr", + "Address to listen on (multiple occurences possible, default 0.0.0.0:1965 and [::]:1965)", + "IP:PORT", + ); + opts.optopt( + "", + "hostname", + "Domain name of this Gemini server (optional)", + "NAME", + ); + opts.optopt( + "", + "lang", + "RFC 4646 Language code(s) for text/gemini documents", + "LANG", + ); opts.optflag("s", "silent", "Disable logging output"); opts.optflag("h", "help", "Print this help menu"); opts.optflag("", "serve-secret", "Enable serving secret files (files/directories starting with a dot)"); @@ -142,7 +172,9 @@ fn acceptor() -> Result { } /// Return the URL requested by the client. -async fn parse_request(stream: &mut TlsStream) -> std::result::Result { +async fn parse_request( + stream: &mut TlsStream, +) -> std::result::Result { // Because requests are limited to 1024 bytes (plus 2 bytes for CRLF), we // can use a fixed-sized buffer on the stack, avoiding allocations and // copying, and stopping bad clients from making us use too much memory. @@ -152,7 +184,10 @@ async fn parse_request(stream: &mut TlsStream) -> std::result::Result // Read until CRLF, end-of-stream, or there's no buffer space left. loop { - let bytes_read = stream.read(buf).await.or(Err((59, "Request ended unexpectedly")))?; + let bytes_read = stream + .read(buf) + .await + .or(Err((59, "Request ended unexpectedly")))?; len += bytes_read; if request[..len].ends_with(b"\r\n") { break; @@ -251,7 +286,10 @@ async fn list_directory(stream: &mut TlsStream, path: &Path) -> Resul let mut entries = tokio::fs::read_dir(path).await?; let mut lines = vec![]; while let Some(entry) = entries.next_entry().await? { - let mut name = entry.file_name().into_string().or(Err("Non-Unicode filename"))?; + let mut name = entry + .file_name() + .into_string() + .or(Err("Non-Unicode filename"))?; if name.starts_with('.') { continue; } From c916827709a10d428cb55d30dc9a06a83954289d Mon Sep 17 00:00:00 2001 From: Johann150 Date: Mon, 4 Jan 2021 21:19:59 +0100 Subject: [PATCH 02/10] add metadata database --- src/main.rs | 2 + src/metadata.rs | 114 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 src/metadata.rs diff --git a/src/main.rs b/src/main.rs index 75e62f6..3f1efe1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,5 @@ +mod metadata; + use { once_cell::sync::Lazy, percent_encoding::{percent_decode_str, percent_encode, AsciiSet, CONTROLS}, diff --git a/src/metadata.rs b/src/metadata.rs new file mode 100644 index 0000000..650d0fa --- /dev/null +++ b/src/metadata.rs @@ -0,0 +1,114 @@ +use std::collections::BTreeMap; +use std::io::{BufRead, BufReader}; +use std::path::PathBuf; +use std::time::SystemTime; + +/// A struct to store a string of metadata for each file retrieved from +/// sidecar files called `.lang`. +/// +/// These sidecar file's lines should have the format +/// ```text +/// :\n +/// ``` +/// where `` is only a filename (not a path) of a file that resides +/// in the same directory and `` is the metadata to be stored. +/// Lines that start with optional whitespace and `#` are ignored, as are lines +/// that do not fit the basic format. +/// Both parts are stripped of any leading and/or trailing whitespace. +pub(crate) struct FileOptions { + /// Stores the paths of the side files and when they were last read. + /// By comparing this to the last write time, we can know if the file + /// has changed. + databases_read: BTreeMap, + /// Stores the metadata for each file + file_meta: BTreeMap, + /// The default value to return + default: String, +} + +impl FileOptions { + pub(crate) fn new(default: &String) -> Self { + Self { + databases_read: BTreeMap::new(), + file_meta: BTreeMap::new(), + default: default.clone(), + } + } + + /// Checks wether the database for the respective directory is still + /// up to date. + /// Will only return true if the database should be (re)read, i.e. it will + /// return false if there is no database file in the specified directory. + fn check_outdated(&self, db_dir: &PathBuf) -> bool { + let mut db = db_dir.clone(); + db.push(".lang"); + let db = db.as_path(); + + if let Ok(metadata) = db.metadata() { + if !metadata.is_file() { + // it exists, but it is a directory + false + } else if let (Ok(modified), Some(last_read)) = + (metadata.modified(), self.databases_read.get(db)) + { + // check that it was last modified before the read + // if the times are the same, we might have read the old file + &modified < last_read + } else { + // either the filesystem does not support last modified + // metadata, so we have to read it again every time; or the + // file exists but was not read before, so we have to read it + true + } + } else { + // the file probably does not exist + false + } + } + + /// (Re)reads a specific sidecar file that resides in the specified + /// directory. The function takes a directory to minimize path + /// alterations "on the fly". + /// This function will allways try to read the file, even if it is current. + fn read_database(&mut self, db_dir: &PathBuf) { + let mut db = db_dir.clone(); + db.push(".lang"); + let db = db.as_path(); + + if let Ok(file) = std::fs::File::open(db) { + let r = BufReader::new(file); + r.lines() + // discard any I/O errors + .filter_map(|line| line.ok()) + // filter out comment lines + .filter(|line| !line.trim_start().starts_with("#")) + .for_each(|line| { + // split line at colon + let parts = line.splitn(2, ':').collect::>(); + // only continue if line fits the format + if parts.len() == 2 { + // generate workspace-unique path + let mut path = db_dir.clone(); + path.push(parts[0].trim()); + self.file_meta.insert(path, parts[1].trim().to_string()); + } + }); + self.databases_read + .insert(db_dir.clone(), SystemTime::now()); + } + } + + /// Get the metadata for the specified file. This might need to (re)load a + /// single sidecar file. + /// The file path should consistenly be either absolute or relative to the + /// working/content directory. If inconsisten file paths are used, this can + /// lead to loading and storing sidecar files multiple times. + pub fn get(&mut self, file: PathBuf) -> &str { + let dir = file.parent().expect("no parent directory").to_path_buf(); + if self.check_outdated(&dir) { + self.read_database(&dir); + } + + self.file_meta.get(&file).unwrap_or(&self.default) + } +} From f7ac1fe21bf279036df68e36d96ad1bf640c83fd Mon Sep 17 00:00:00 2001 From: Johann150 Date: Mon, 4 Jan 2021 21:53:12 +0100 Subject: [PATCH 03/10] update readme --- README.md | 11 +++++++++++ src/metadata.rs | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2aa3c40..1ef1597 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,17 @@ All of the command-line arguments are optional. Run `agate --help` to see the d When a client requests the URL `gemini://example.com/foo/bar`, Agate will respond with the file at `path/to/content/foo/bar`. If any segment of the requested path starts with a dot, agate will respond with a status code 52, even if the file does not exist (this behaviour can be disabled with `--serve-secret`). If there is a directory at that path, Agate will look for a file named `index.gmi` inside that directory. If there is no such file, but a file named `.directory-listing-ok` exists inside that directory, a basic directory listing is displayed. Files or directories whose name starts with a dot (e.g. the `.directory-listing-ok` file itself) are omitted from the list. +Agate will look for a file called `.lang` in the same directory as the file currently being served. If this file exists and has an entry for the current file, the respective data will be used to formulate the response header. +The lines of the file should have this format: + +```text +: +``` + +Where `` is just a filename (not a path) of a file in the same directory, and `` is the metadata to be stored. +Lines that start with optional whitespace and `#` are ignored, as are lines that do not fit the above basic format. +Both parts are stripped of any leading and/or trailing whitespace. + [Gemini]: https://gemini.circumlunar.space/ [Rust]: https://www.rust-lang.org/ [home]: gemini://gem.limpet.net/agate/ diff --git a/src/metadata.rs b/src/metadata.rs index 650d0fa..16f021d 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -8,7 +8,7 @@ use std::time::SystemTime; /// /// These sidecar file's lines should have the format /// ```text -/// :\n +/// : /// ``` /// where `` is only a filename (not a path) of a file that resides /// in the same directory and `` is the metadata to be stored. From e68637fb1b1547b87a28f3d1f2e5be858792758f Mon Sep 17 00:00:00 2001 From: Johann150 Date: Tue, 5 Jan 2021 10:57:14 +0100 Subject: [PATCH 04/10] fix check_outdated --- README.md | 2 +- src/metadata.rs | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 1ef1597..fe050e7 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ All of the command-line arguments are optional. Run `agate --help` to see the d When a client requests the URL `gemini://example.com/foo/bar`, Agate will respond with the file at `path/to/content/foo/bar`. If any segment of the requested path starts with a dot, agate will respond with a status code 52, even if the file does not exist (this behaviour can be disabled with `--serve-secret`). If there is a directory at that path, Agate will look for a file named `index.gmi` inside that directory. If there is no such file, but a file named `.directory-listing-ok` exists inside that directory, a basic directory listing is displayed. Files or directories whose name starts with a dot (e.g. the `.directory-listing-ok` file itself) are omitted from the list. -Agate will look for a file called `.lang` in the same directory as the file currently being served. If this file exists and has an entry for the current file, the respective data will be used to formulate the response header. +Agate will look for a file called `.mime` in the same directory as the file currently being served. If this file exists and has an entry for the current file, the respective data will be used to formulate the response header. The lines of the file should have this format: ```text diff --git a/src/metadata.rs b/src/metadata.rs index 16f021d..3c1dfba 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -26,12 +26,14 @@ pub(crate) struct FileOptions { default: String, } +static SIDECAR_FILENAME: &str = ".mime"; + impl FileOptions { - pub(crate) fn new(default: &String) -> Self { + pub(crate) fn new(default: &str) -> Self { Self { databases_read: BTreeMap::new(), file_meta: BTreeMap::new(), - default: default.clone(), + default: default.to_string(), } } @@ -41,7 +43,7 @@ impl FileOptions { /// return false if there is no database file in the specified directory. fn check_outdated(&self, db_dir: &PathBuf) -> bool { let mut db = db_dir.clone(); - db.push(".lang"); + db.push(SIDECAR_FILENAME); let db = db.as_path(); if let Ok(metadata) = db.metadata() { @@ -49,11 +51,11 @@ impl FileOptions { // it exists, but it is a directory false } else if let (Ok(modified), Some(last_read)) = - (metadata.modified(), self.databases_read.get(db)) + (metadata.modified(), self.databases_read.get(db_dir)) { // check that it was last modified before the read // if the times are the same, we might have read the old file - &modified < last_read + &modified >= last_read } else { // either the filesystem does not support last modified // metadata, so we have to read it again every time; or the @@ -71,8 +73,9 @@ impl FileOptions { /// alterations "on the fly". /// This function will allways try to read the file, even if it is current. fn read_database(&mut self, db_dir: &PathBuf) { + log::trace!("reading database for {:?}", db_dir); let mut db = db_dir.clone(); - db.push(".lang"); + db.push(SIDECAR_FILENAME); let db = db.as_path(); if let Ok(file) = std::fs::File::open(db) { @@ -103,12 +106,12 @@ impl FileOptions { /// The file path should consistenly be either absolute or relative to the /// working/content directory. If inconsisten file paths are used, this can /// lead to loading and storing sidecar files multiple times. - pub fn get(&mut self, file: PathBuf) -> &str { + pub fn get(&mut self, file: &PathBuf) -> &str { let dir = file.parent().expect("no parent directory").to_path_buf(); if self.check_outdated(&dir) { self.read_database(&dir); } - self.file_meta.get(&file).unwrap_or(&self.default) + self.file_meta.get(file).unwrap_or(&self.default) } } From 6bbbb5ed4b4307582f90763507e1e0a12a616234 Mon Sep 17 00:00:00 2001 From: Johann150 Date: Tue, 5 Jan 2021 10:59:27 +0100 Subject: [PATCH 05/10] use sidecar files --- Cargo.toml | 2 +- README.md | 5 ++--- src/main.rs | 48 +++++++++++++++++++++++++++++++----------------- 3 files changed, 34 insertions(+), 21 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3c57022..0786031 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ edition = "2018" [dependencies] tokio-rustls = "0.22.0" -tokio = { version = "1.0", features = ["fs", "io-util", "net", "rt-multi-thread"] } +tokio = { version = "1.0", features = ["fs", "io-util", "net", "rt-multi-thread", "sync"] } env_logger = { version = "0.8", default-features = false, features = ["atty", "humantime", "termcolor"] } getopts = "0.2.21" log = "0.4" diff --git a/README.md b/README.md index fe050e7..68af2d4 100644 --- a/README.md +++ b/README.md @@ -50,9 +50,8 @@ The lines of the file should have this format: : ``` -Where `` is just a filename (not a path) of a file in the same directory, and `` is the metadata to be stored. -Lines that start with optional whitespace and `#` are ignored, as are lines that do not fit the above basic format. -Both parts are stripped of any leading and/or trailing whitespace. +Where `` is just a filename (not a path) of a file in the same directory, and `` is the MIME type to be stored. If `` starts with a semicolon, agate will use the usual mechanism to determine the mime type of the file and append the specified parameters. +Lines that start with optional whitespace and `#` are ignored, as are lines that do not contain a `:`. Both parts are stripped of any leading and/or trailing whitespace. [Gemini]: https://gemini.circumlunar.space/ [Rust]: https://www.rust-lang.org/ diff --git a/src/main.rs b/src/main.rs index 3f1efe1..cb9a494 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ mod metadata; +use metadata::FileOptions; use { once_cell::sync::Lazy, @@ -21,6 +22,7 @@ use { io::{AsyncReadExt, AsyncWriteExt}, net::{TcpListener, TcpStream}, runtime::Runtime, + sync::RwLock, }, tokio_rustls::{server::TlsStream, TlsAcceptor}, url::{Host, Url}, @@ -31,12 +33,19 @@ fn main() -> Result { env_logger::Builder::new().parse_filters("info").init(); } Runtime::new()?.block_on(async { + let mimetypes = Arc::new(RwLock::new(FileOptions::new( + &ARGS + .language + .as_ref() + .map_or(String::new(), |lang| format!(";lang={}", lang)), + ))); let listener = TcpListener::bind(&ARGS.addrs[..]).await?; log::info!("Listening on {:?}...", ARGS.addrs); loop { let (stream, _) = listener.accept().await?; + let arc = mimetypes.clone(); tokio::spawn(async { - if let Err(e) = handle_request(stream).await { + if let Err(e) = handle_request(stream, arc).await { log::error!("{:?}", e); } }); @@ -147,11 +156,11 @@ fn check_path(s: String) -> Result { } /// Handle a single client session (request + response). -async fn handle_request(stream: TcpStream) -> Result { +async fn handle_request(stream: TcpStream, mimetypes: Arc>) -> Result { let stream = &mut TLS.accept(stream).await?; match parse_request(stream).await { - Ok(url) => send_response(url, stream).await?, + Ok(url) => send_response(url, stream, mimetypes).await?, Err((status, msg)) => send_header(stream, status, &[msg]).await?, } stream.shutdown().await?; @@ -223,7 +232,11 @@ async fn parse_request( } /// Send the client the file located at the requested URL. -async fn send_response(url: Url, stream: &mut TlsStream) -> Result { +async fn send_response( + url: Url, + stream: &mut TlsStream, + mimetypes: Arc>, +) -> Result { let mut path = std::path::PathBuf::from(&ARGS.content_dir); if let Some(segments) = url.path_segments() { for segment in segments { @@ -265,12 +278,21 @@ async fn send_response(url: Url, stream: &mut TlsStream) -> Result { }; // Send header. - if path.extension() == Some(OsStr::new("gmi")) { - send_text_gemini_header(stream).await?; + let mut locked = mimetypes.write().await; + let data = locked.get(&path); + if data.is_empty() || data.starts_with(";") { + // guess MIME type + if path.extension() == Some(OsStr::new("gmi")) { + send_header(stream, 20, &["text/gemini", data]).await?; + } else { + let mime = mime_guess::from_path(&path).first_or_octet_stream(); + send_header(stream, 20, &[mime.essence_str(), data]).await?; + }; } else { - let mime = mime_guess::from_path(&path).first_or_octet_stream(); - send_header(stream, 20, &[mime.essence_str()]).await?; + // this must be a full MIME type + send_header(stream, 20, &[data]).await?; } + drop(locked); // Send body. tokio::io::copy(&mut file, stream).await?; @@ -284,7 +306,7 @@ async fn list_directory(stream: &mut TlsStream, path: &Path) -> Resul .add(b'?').add(b'`').add(b'{').add(b'}'); log::info!("Listing directory {:?}", path); - send_text_gemini_header(stream).await?; + send_header(stream, 20, &["text/gemini"]).await?; let mut entries = tokio::fs::read_dir(path).await?; let mut lines = vec![]; while let Some(entry) = entries.next_entry().await? { @@ -321,11 +343,3 @@ async fn send_header(stream: &mut TlsStream, status: u8, meta: &[&str stream.write_all(response.as_bytes()).await?; Ok(()) } - -async fn send_text_gemini_header(stream: &mut TlsStream) -> Result { - if let Some(lang) = ARGS.language.as_deref() { - send_header(stream, 20, &["text/gemini;lang=", lang]).await - } else { - send_header(stream, 20, &["text/gemini"]).await - } -} From aa713a2dea89e1f07369e72b7f0ad43ee8fa353a Mon Sep 17 00:00:00 2001 From: Johann150 Date: Thu, 4 Feb 2021 16:56:17 +0100 Subject: [PATCH 06/10] formatting --- src/main.rs | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/main.rs b/src/main.rs index 7774e8b..f3db194 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,15 +9,8 @@ use { NoClientAuth, ServerConfig, }, std::{ - borrow::Cow, - error::Error, - ffi::OsStr, - fmt::Write, - fs::File, - io::BufReader, - net::SocketAddr, - path::Path, - sync::Arc, + borrow::Cow, error::Error, ffi::OsStr, fmt::Write, fs::File, io::BufReader, + net::SocketAddr, path::Path, sync::Arc, }, tokio::{ io::{AsyncReadExt, AsyncWriteExt}, @@ -122,7 +115,11 @@ fn args() -> Result { ); opts.optflag("s", "silent", "Disable logging output"); opts.optflag("h", "help", "Print this help menu"); - opts.optflag("", "serve-secret", "Enable serving secret files (files/directories starting with a dot)"); + opts.optflag( + "", + "serve-secret", + "Enable serving secret files (files/directories starting with a dot)", + ); opts.optflag("", "log-ip", "Output IP addresses when logging"); let matches = opts.parse(&args[1..]).map_err(|f| f.to_string())?; @@ -153,7 +150,7 @@ fn args() -> Result { language: matches.opt_str("lang"), silent: matches.opt_present("s"), serve_secret: matches.opt_present("serve-secret"), - log_ips: matches.opt_present("log-ip"), + log_ips: matches.opt_present("log-ip"), }) } @@ -205,7 +202,11 @@ impl RequestHandle { ); match TLS.accept(stream).await { - Ok(stream) => Ok(Self { stream, log_line, metadata }), + Ok(stream) => Ok(Self { + stream, + log_line, + metadata, + }), Err(e) => Err(format!("{} error:{}", log_line, e)), } } From 548e0f401f35637932f845948d6b4fd789cb652d Mon Sep 17 00:00:00 2001 From: Johann150 Date: Thu, 4 Feb 2021 17:00:33 +0100 Subject: [PATCH 07/10] cache parsing, new syntax Changed the parsing and also parse once when reading the config file and then cache the parse result, rather than checking the line format every time the file is served. --- src/main.rs | 45 ++++++++++++++++------------ src/metadata.rs | 80 ++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 98 insertions(+), 27 deletions(-) diff --git a/src/main.rs b/src/main.rs index f3db194..6320f84 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,5 @@ mod metadata; -use metadata::FileOptions; +use metadata::{FileOptions, PresetMeta}; use { once_cell::sync::Lazy, @@ -27,12 +27,11 @@ fn main() -> Result { env_logger::Builder::new().parse_filters("info").init(); } Runtime::new()?.block_on(async { - let mimetypes = Arc::new(RwLock::new(FileOptions::new( - &ARGS - .language + let mimetypes = Arc::new(RwLock::new(FileOptions::new(PresetMeta::Parameters( + ARGS.language .as_ref() .map_or(String::new(), |lang| format!(";lang={}", lang)), - ))); + )))); let listener = TcpListener::bind(&ARGS.addrs[..]).await?; log::info!("Listening on {:?}...", ARGS.addrs); loop { @@ -317,7 +316,15 @@ impl RequestHandle { } } - // Make sure the file opens successfully before sending the success header. + let data = self.metadata.write().await.get(&path); + + if let PresetMeta::FullHeader(status, meta) = data { + self.send_header(status, &meta).await?; + // do not try to access the file + return Ok(()); + } + + // Make sure the file opens successfully before sending a success header. let mut file = match tokio::fs::File::open(&path).await { Ok(file) => file, Err(e) => { @@ -327,21 +334,21 @@ impl RequestHandle { }; // Send header. - let mut locked = self.metadata.write().await; - let data = locked.get(&path); - let mime = if data.is_empty() || data.starts_with(';') { - // guess MIME type - if path.extension() == Some(OsStr::new("gmi")) { - format!("text/gemini{}", data) - } else { - let mime = mime_guess::from_path(&path).first_or_octet_stream(); - format!("{}{}", mime.essence_str(), data) + let mime = match data { + // this was already handled before opening the file + PresetMeta::FullHeader(..) => unreachable!(), + // treat this as the full MIME type + PresetMeta::FullMime(mime) => mime.clone(), + // guess the MIME type and add the parameters + PresetMeta::Parameters(params) => { + if path.extension() == Some(OsStr::new("gmi")) { + format!("text/gemini{}", params) + } else { + let mime = mime_guess::from_path(&path).first_or_octet_stream(); + format!("{}{}", mime.essence_str(), params) + } } - } else { - // this must be a full MIME type - data.to_owned() }; - drop(locked); self.send_header(20, &mime).await?; // Send body. diff --git a/src/metadata.rs b/src/metadata.rs index 433571f..9d6334a 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -3,6 +3,8 @@ use std::io::{BufRead, BufReader}; use std::path::PathBuf; use std::time::SystemTime; +static SIDECAR_FILENAME: &str = ".meta"; + /// A struct to store a string of metadata for each file retrieved from /// sidecar files called `.lang`. /// @@ -21,19 +23,47 @@ pub(crate) struct FileOptions { /// has changed. databases_read: BTreeMap, /// Stores the metadata for each file - file_meta: BTreeMap, + file_meta: BTreeMap, /// The default value to return - default: String, + default: PresetMeta, } -static SIDECAR_FILENAME: &str = ".mime"; +/// A struct to store the different alternatives that a line in the sidecar +/// file can have. +#[derive(Clone, Debug)] +pub(crate) enum PresetMeta { + /// A line that starts with a semicolon in the sidecar file, or an + /// empty line (to overwrite the default language command line flag). + /// ```text + /// index.gmi: ;lang=en-GB + /// ``` + /// The content is interpreted as MIME parameters and are appended to what + /// agate guesses as the MIME type if the respective file can be found. + Parameters(String), + /// A line that is neither a `Parameters` line nor a `FullHeader` line. + /// ```text + /// strange.file: text/plain; lang=ee + /// ``` + /// Agate will send the complete line as the MIME type of the request if + /// the respective file can be found (i.e. a `20` status code). + FullMime(String), + /// A line that starts with a digit between 1 and 6 inclusive followed by + /// another digit and a space (U+0020). In the categories defined by the + /// Gemini specification you can pick a defined or non-defined status code. + /// ```text + /// gone.gmi: 52 This file is no longer available. + /// ``` + /// Agate will send this header line, CR, LF, and nothing else. Agate will + /// not try to access the requested file. + FullHeader(u8, String), +} impl FileOptions { - pub(crate) fn new(default: &str) -> Self { + pub(crate) fn new(default: PresetMeta) -> Self { Self { databases_read: BTreeMap::new(), file_meta: BTreeMap::new(), - default: default.to_string(), + default, } } @@ -93,7 +123,41 @@ impl FileOptions { // generate workspace-unique path let mut path = db_dir.clone(); path.push(parts[0].trim()); - self.file_meta.insert(path, parts[1].trim().to_string()); + // parse the line + let header = parts[1].trim(); + + let preset = if header.is_empty() || header.starts_with(';') { + PresetMeta::Parameters(header.to_string()) + } else if matches!(header.chars().next(), Some('1'..='6')) { + if header.len() < 3 + || !header.chars().nth(1).unwrap().is_ascii_digit() + || !header.chars().nth(2).unwrap().is_whitespace() + { + log::error!("Line for {:?} starts like a full header line, but it is incorrect; ignoring it.", path); + return; + } + let separator = header.chars().nth(2).unwrap(); + if separator != ' ' { + // the Gemini specification says that the third + // character has to be a space, so correct any + // other whitespace to it (e.g. tabs) + log::warn!("Full Header line for {:?} has an invalid character, treating {:?} as a space.", path, separator); + } + let status = header.chars() + .take(2) + .collect::() + .parse::() + // unwrap since we alread checked it's a number + .unwrap(); + // not taking a slice here because the separator + // might be a whitespace wider than a byte + let meta = header.chars().skip(3).collect::(); + PresetMeta::FullHeader(status, meta) + } else { + // must be a MIME type, but without status code + PresetMeta::FullMime(header.to_string()) + }; + self.file_meta.insert(path, preset); } }); self.databases_read @@ -106,12 +170,12 @@ impl FileOptions { /// The file path should consistenly be either absolute or relative to the /// working/content directory. If inconsisten file paths are used, this can /// lead to loading and storing sidecar files multiple times. - pub fn get(&mut self, file: &PathBuf) -> &str { + pub fn get(&mut self, file: &PathBuf) -> PresetMeta { let dir = file.parent().expect("no parent directory").to_path_buf(); if self.check_outdated(&dir) { self.read_database(&dir); } - self.file_meta.get(file).unwrap_or(&self.default) + self.file_meta.get(file).unwrap_or(&self.default).clone() } } From ac7af67cc540eb8e4750b6b8b0404c9510cd0dc3 Mon Sep 17 00:00:00 2001 From: Johann150 Date: Thu, 4 Feb 2021 17:53:32 +0100 Subject: [PATCH 08/10] reformat configuration README --- README.md | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 68af2d4..f87872b 100644 --- a/README.md +++ b/README.md @@ -41,18 +41,43 @@ agate --content path/to/content/ \ All of the command-line arguments are optional. Run `agate --help` to see the default values used when arguments are omitted. -When a client requests the URL `gemini://example.com/foo/bar`, Agate will respond with the file at `path/to/content/foo/bar`. If any segment of the requested path starts with a dot, agate will respond with a status code 52, even if the file does not exist (this behaviour can be disabled with `--serve-secret`). If there is a directory at that path, Agate will look for a file named `index.gmi` inside that directory. If there is no such file, but a file named `.directory-listing-ok` exists inside that directory, a basic directory listing is displayed. Files or directories whose name starts with a dot (e.g. the `.directory-listing-ok` file itself) are omitted from the list. +When a client requests the URL `gemini://example.com/foo/bar`, Agate will respond with the file at `path/to/content/foo/bar`. If any segment of the requested path starts with a dot, agate will respond with a status code 52, wether the file exists or not (this behaviour can be disabled with `--serve-secret`). If there is a directory at that path, Agate will look for a file named `index.gmi` inside that directory. -Agate will look for a file called `.mime` in the same directory as the file currently being served. If this file exists and has an entry for the current file, the respective data will be used to formulate the response header. -The lines of the file should have this format: +## Configuration +### Directory listing + +You can enable a basic directory listing for a directory by putting a file called `.directory-listing-ok` in that directory. This does not have an effect on subdirectories. +The directory listing will hide files and directories whose name starts with a dot (e.g. the `.directory-listing-ok` file itself or also the `.meta` configuration file). + +A file called `index.gmi` will always take precedence over a directory listing. + +### Meta-Presets + +You can put a file called `.meta` in a directory that stores some metadata about these files which Agate will use when serving these files. The file should be UTF-8 encoded. Like the `.directory-listing-ok` file, this file does not have an effect on subdirectories. +Lines starting with a `#` are comments and will be ignored like empty lines. All other lines must start with a file name (not a path), followed by a colon and then the metadata. + +The metadata can take one of four possible forms: +1. empty + Agate will not send a default language parameter, even if it was specified on the command line. +2. starting with a semicolon followed by MIME parameters + Agate will append the specified string onto the MIME type, if the file is found. +3. starting with a gemini status code (i.e. a digit 1-6 inclusive followed by another digit) and a space + Agate will send the metadata wether the file exists or not. The file will not be sent or accessed. +4. a MIME type, may include parameters + Agate will use this MIME type instead of what it would guess, if the file is found. + The default language parameter will not be used, even if it was specified on the command line. + +If a line violates the format or looks like case 3, but is incorrect, it might be ignored. You should check your logs. Please know that this configuration file is first read when a file from the respective directory is accessed. So no log messages after startup does not mean the `.meta` file is okay. + +Such a configuration file might look like this: ```text -: +# This line will be ignored. +index.gmi:;lang=en-UK +LICENSE:text/plain;charset=UTF-8 +gone.gmi:52 This file is no longer here, sorry. ``` -Where `` is just a filename (not a path) of a file in the same directory, and `` is the MIME type to be stored. If `` starts with a semicolon, agate will use the usual mechanism to determine the mime type of the file and append the specified parameters. -Lines that start with optional whitespace and `#` are ignored, as are lines that do not contain a `:`. Both parts are stripped of any leading and/or trailing whitespace. - [Gemini]: https://gemini.circumlunar.space/ [Rust]: https://www.rust-lang.org/ [home]: gemini://gem.limpet.net/agate/ From f1ff0fe0fe0d9ffb3a39cb9a5e2e3eace0ff8ddf Mon Sep 17 00:00:00 2001 From: Johann150 Date: Thu, 4 Feb 2021 17:56:50 +0100 Subject: [PATCH 09/10] use Mutex instead of RwLock Since we only ever need to access it mutably, no need to differentiate mutable and immutable acces. --- src/main.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main.rs b/src/main.rs index 6320f84..0a6f019 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,7 +16,7 @@ use { io::{AsyncReadExt, AsyncWriteExt}, net::{TcpListener, TcpStream}, runtime::Runtime, - sync::RwLock, + sync::Mutex, }, tokio_rustls::{server::TlsStream, TlsAcceptor}, url::{Host, Url}, @@ -27,7 +27,7 @@ fn main() -> Result { env_logger::Builder::new().parse_filters("info").init(); } Runtime::new()?.block_on(async { - let mimetypes = Arc::new(RwLock::new(FileOptions::new(PresetMeta::Parameters( + let mimetypes = Arc::new(Mutex::new(FileOptions::new(PresetMeta::Parameters( ARGS.language .as_ref() .map_or(String::new(), |lang| format!(";lang={}", lang)), @@ -179,13 +179,13 @@ fn acceptor() -> Result { struct RequestHandle { stream: TlsStream, log_line: String, - metadata: Arc>, + metadata: Arc>, } impl RequestHandle { /// Creates a new request handle for the given stream. If establishing the TLS /// session fails, returns a corresponding log line. - async fn new(stream: TcpStream, metadata: Arc>) -> Result { + async fn new(stream: TcpStream, metadata: Arc>) -> Result { let log_line = format!( "{} {}", stream.local_addr().unwrap(), @@ -316,7 +316,7 @@ impl RequestHandle { } } - let data = self.metadata.write().await.get(&path); + let data = self.metadata.lock().await.get(&path); if let PresetMeta::FullHeader(status, meta) = data { self.send_header(status, &meta).await?; From c7485f6553f8cefd328a905d870e76adb107c55e Mon Sep 17 00:00:00 2001 From: Johann150 <20990607+Johann150@users.noreply.github.com> Date: Thu, 4 Feb 2021 18:02:17 +0100 Subject: [PATCH 10/10] small formatting fix added some intended line breaks --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f87872b..44f32ac 100644 --- a/README.md +++ b/README.md @@ -58,13 +58,13 @@ You can put a file called `.meta` in a directory that stores some metadata about Lines starting with a `#` are comments and will be ignored like empty lines. All other lines must start with a file name (not a path), followed by a colon and then the metadata. The metadata can take one of four possible forms: -1. empty +1. empty Agate will not send a default language parameter, even if it was specified on the command line. -2. starting with a semicolon followed by MIME parameters +2. starting with a semicolon followed by MIME parameters Agate will append the specified string onto the MIME type, if the file is found. -3. starting with a gemini status code (i.e. a digit 1-6 inclusive followed by another digit) and a space +3. starting with a gemini status code (i.e. a digit 1-6 inclusive followed by another digit) and a space Agate will send the metadata wether the file exists or not. The file will not be sent or accessed. -4. a MIME type, may include parameters +4. a MIME type, may include parameters Agate will use this MIME type instead of what it would guess, if the file is found. The default language parameter will not be used, even if it was specified on the command line.