From f6d169567c922683be52f48164a2eabd7291107b Mon Sep 17 00:00:00 2001 From: Ana Gelez Date: Fri, 24 Jul 2020 17:27:12 +0200 Subject: [PATCH] Introduce Searcher::new This function does what was previously done in main: create a search index, and try to recover for errors if possible. This commit also fixes plume-cli to use the new Searcher API (that depends on a DbPool, not on a single Connection). --- plume-cli/src/main.rs | 36 +++++---- plume-cli/src/search.rs | 18 ++--- plume-models/src/db_conn.rs | 16 +++- plume-models/src/search/searcher.rs | 114 +++++++++++++++++++++++++++- src/main.rs | 93 +++-------------------- src/routes/search.rs | 3 +- 6 files changed, 164 insertions(+), 116 deletions(-) diff --git a/plume-cli/src/main.rs b/plume-cli/src/main.rs index 8dc11692..4828fbba 100644 --- a/plume-cli/src/main.rs +++ b/plume-cli/src/main.rs @@ -1,8 +1,7 @@ use dotenv; use clap::App; -use diesel::Connection; -use plume_models::{instance::Instance, Connection as Conn, CONFIG}; +use plume_models::{db_conn::init_pool, instance::Instance}; use std::io::{self, prelude::*}; mod instance; @@ -26,22 +25,27 @@ fn main() { Err(ref e) if e.not_found() => eprintln!("no .env was found"), e => e.map(|_| ()).unwrap(), } - let conn = Conn::establish(CONFIG.database_url.as_str()); - let _ = conn.as_ref().map(|conn| Instance::cache_local(conn)); + let db_pool = init_pool() + .expect("Couldn't create a database pool, please check DATABASE_URL in your .env"); + let _ = db_pool + .get() + .as_ref() + .map(|conn| Instance::cache_local(conn)); match matches.subcommand() { - ("instance", Some(args)) => { - instance::run(args, &conn.expect("Couldn't connect to the database.")) - } - ("migration", Some(args)) => { - migration::run(args, &conn.expect("Couldn't connect to the database.")) - } - ("search", Some(args)) => { - search::run(args, &conn.expect("Couldn't connect to the database.")) - } - ("users", Some(args)) => { - users::run(args, &conn.expect("Couldn't connect to the database.")) - } + ("instance", Some(args)) => instance::run( + args, + &db_pool.get().expect("Couldn't connect to the database."), + ), + ("migration", Some(args)) => migration::run( + args, + &db_pool.get().expect("Couldn't connect to the database."), + ), + ("search", Some(args)) => search::run(args, db_pool), + ("users", Some(args)) => users::run( + args, + &db_pool.get().expect("Couldn't connect to the database."), + ), _ => app.print_help().expect("Couldn't print help"), }; } diff --git a/plume-cli/src/search.rs b/plume-cli/src/search.rs index 7afa05d9..48c8a3c4 100644 --- a/plume-cli/src/search.rs +++ b/plume-cli/src/search.rs @@ -1,6 +1,6 @@ use clap::{App, Arg, ArgMatches, SubCommand}; -use plume_models::{search::Searcher, Connection, CONFIG}; +use plume_models::{db_conn::DbPool, search::Searcher, CONFIG}; use std::fs::{read_dir, remove_file}; use std::io::ErrorKind; use std::path::Path; @@ -52,7 +52,7 @@ pub fn command<'a, 'b>() -> App<'a, 'b> { ) } -pub fn run<'a>(args: &ArgMatches<'a>, conn: &Connection) { +pub fn run<'a>(args: &ArgMatches<'a>, conn: DbPool) { let conn = conn; match args.subcommand() { ("init", Some(x)) => init(x, conn), @@ -63,7 +63,7 @@ pub fn run<'a>(args: &ArgMatches<'a>, conn: &Connection) { } } -fn init<'a>(args: &ArgMatches<'a>, conn: &Connection) { +fn init<'a>(args: &ArgMatches<'a>, db_pool: DbPool) { let path = args .value_of("path") .map(|p| Path::new(p).join("search_index")) @@ -82,8 +82,8 @@ fn init<'a>(args: &ArgMatches<'a>, conn: &Connection) { } }; if can_do || force { - let searcher = Searcher::create(&path, &CONFIG.search_tokenizers).unwrap(); - refill(args, conn, Some(searcher)); + let searcher = Searcher::create(&path, db_pool.clone(), &CONFIG.search_tokenizers).unwrap(); + refill(args, db_pool, Some(searcher)); } else { eprintln!( "Can't create new index, {} exist and is not empty", @@ -92,16 +92,16 @@ fn init<'a>(args: &ArgMatches<'a>, conn: &Connection) { } } -fn refill<'a>(args: &ArgMatches<'a>, conn: &Connection, searcher: Option) { +fn refill<'a>(args: &ArgMatches<'a>, db_pool: DbPool, searcher: Option) { let path = args.value_of("path"); let path = match path { Some(path) => Path::new(path).join("search_index"), None => Path::new(&CONFIG.search_index).to_path_buf(), }; - let searcher = - searcher.unwrap_or_else(|| Searcher::open(&path, &CONFIG.search_tokenizers).unwrap()); + let searcher = searcher + .unwrap_or_else(|| Searcher::open(&path, db_pool, &CONFIG.search_tokenizers).unwrap()); - searcher.fill(conn).expect("Couldn't import post"); + searcher.fill().expect("Couldn't import post"); println!("Commiting result"); searcher.commit(); } diff --git a/plume-models/src/db_conn.rs b/plume-models/src/db_conn.rs index 5e461b18..dbff1661 100644 --- a/plume-models/src/db_conn.rs +++ b/plume-models/src/db_conn.rs @@ -1,4 +1,4 @@ -use crate::Connection; +use crate::{instance::Instance, Connection, CONFIG}; use diesel::r2d2::{ ConnectionManager, CustomizeConnection, Error as ConnError, Pool, PooledConnection, }; @@ -13,6 +13,20 @@ use std::ops::Deref; pub type DbPool = Pool>; +/// Initializes a database pool. +pub fn init_pool() -> Option { + let manager = ConnectionManager::::new(CONFIG.database_url.as_str()); + let mut builder = DbPool::builder() + .connection_customizer(Box::new(PragmaForeignKey)) + .min_idle(CONFIG.db_min_idle); + if let Some(max_size) = CONFIG.db_max_size { + builder = builder.max_size(max_size); + }; + let pool = builder.build(manager).ok()?; + Instance::cache_local(&pool.get().unwrap()); + Some(pool) +} + // From rocket documentation // Connection request guard type: a wrapper around an r2d2 pooled connection. diff --git a/plume-models/src/search/searcher.rs b/plume-models/src/search/searcher.rs index 4ed42f29..dda6cf82 100644 --- a/plume-models/src/search/searcher.rs +++ b/plume-models/src/search/searcher.rs @@ -1,11 +1,17 @@ use crate::{ config::SearchTokenizerConfig, db_conn::DbPool, instance::Instance, posts::Post, schema::posts, - search::query::PlumeQuery, tags::Tag, Error, Result, + search::query::PlumeQuery, tags::Tag, Error, Result, CONFIG, }; -use chrono::Datelike; +use chrono::{Datelike, Utc}; use diesel::{ExpressionMethods, QueryDsl, RunQueryDsl}; use itertools::Itertools; -use std::{cmp, fs::create_dir_all, io, path::Path, sync::Mutex}; +use std::{ + cmp, + fs::create_dir_all, + io, + path::Path, + sync::{Arc, Mutex}, +}; use tantivy::{ collector::TopDocs, directory::MmapDirectory, schema::*, Index, IndexReader, IndexWriter, ReloadPolicy, TantivyError, Term, @@ -29,6 +35,106 @@ pub struct Searcher { } impl Searcher { + /// Initializes a new `Searcher`, ready to be used by + /// Plume. + /// + /// The main task of this function is to try everything + /// to get a valid `Searcher`: + /// + /// - first it tries to open the search index normally (using the options from `CONFIG`) + /// - if it fails, it makes a back-up of the index files, deletes the original ones, + /// and recreate the whole index. It removes the backup only if the re-creation + /// succeeds. + /// + /// # Panics + /// + /// This function panics if it needs to create a backup and it can't, or if it fails + /// to recreate the search index. + /// + /// After that, it can also panic if there are still errors remaining. + /// + /// The panic messages are normally explicit enough for a human to + /// understand how to fix the issue when they see it. + pub fn new(db_pool: DbPool) -> Arc { + // We try to open the index a first time + let searcher = match Self::open( + &CONFIG.search_index, + db_pool.clone(), + &CONFIG.search_tokenizers, + ) { + // The index may be corrupted, inexistent or use an older format. + // In this case, we can easily recover by deleting and re-creating it. + Err(Error::Search(SearcherError::InvalidIndexDataError)) => { + if Self::create( + &CONFIG.search_index, + db_pool.clone(), + &CONFIG.search_tokenizers, + ) + .is_err() + { + let current_path = Path::new(&CONFIG.search_index); + let backup_path = + format!("{}.{}", ¤t_path.display(), Utc::now().timestamp()); + let backup_path = Path::new(&backup_path); + std::fs::rename(current_path, backup_path) + .expect("Error while backing up search index directory for re-creation"); + if Self::create( + &CONFIG.search_index, + db_pool.clone(), + &CONFIG.search_tokenizers, + ) + .is_ok() + { + if std::fs::remove_dir_all(backup_path).is_err() { + eprintln!( + "error on removing backup directory: {}. it remains", + backup_path.display() + ); + } + } else { + panic!("Error while re-creating search index in new index format. Remove search index and run `plm search init` manually."); + } + } + Self::open(&CONFIG.search_index, db_pool, &CONFIG.search_tokenizers) + } + // If it opened successfully or if it was another kind of + // error (that we don't know how to handle), don't do anything more + other => other, + }; + + // At this point, if there are still errors, we just panic + #[allow(clippy::match_wild_err_arm)] + match searcher { + Err(Error::Search(e)) => match e { + SearcherError::WriteLockAcquisitionError => panic!( + r#" +Your search index is locked. Plume can't start. To fix this issue +make sure no other Plume instance is started, and run: + + plm search unlock + +Then try to restart Plume. +"# + ), + SearcherError::IndexOpeningError => panic!( + r#" +Plume was unable to open the search index. If you created the index +before, make sure to run Plume in the same directory it was created in, or +to set SEARCH_INDEX accordingly. If you did not yet create the search +index, run this command: + + plm search init + +Then try to restart Plume +"# + ), + e => Err(e).unwrap(), + }, + Err(_) => panic!("Unexpected error while opening search index"), + Ok(s) => Arc::new(s), + } + } + pub fn schema() -> Schema { let tag_indexing = TextOptions::default().set_indexing_options( TextFieldIndexing::default() @@ -234,7 +340,7 @@ impl Searcher { let conn = match self.dbpool.get() { Ok(c) => c, - Err(_) => return Err(Error::DbPool), + Err(_) => return Vec::new(), }; res.get(min as usize..) diff --git a/src/main.rs b/src/main.rs index 4ae2f88f..71a90970 100755 --- a/src/main.rs +++ b/src/main.rs @@ -10,20 +10,10 @@ extern crate serde_json; #[macro_use] extern crate validator_derive; -use chrono::Utc; use clap::App; -use diesel::r2d2::ConnectionManager; -use plume_models::{ - db_conn::{DbPool, PragmaForeignKey}, - instance::Instance, - migrations::IMPORTED_MIGRATIONS, - search::{Searcher as UnmanagedSearcher, SearcherError}, - Connection, Error, CONFIG, -}; +use plume_models::{db_conn::init_pool, migrations::IMPORTED_MIGRATIONS, search::Searcher, CONFIG}; use rocket_csrf::CsrfFairingBuilder; use scheduled_thread_pool::ScheduledThreadPool; -use std::fs; -use std::path::Path; use std::process::exit; use std::sync::{Arc, Mutex}; use std::time::Duration; @@ -48,26 +38,6 @@ include!(concat!(env!("OUT_DIR"), "/templates.rs")); compile_i18n!(); -/// Initializes a database pool. -fn init_pool() -> Option { - match dotenv::dotenv() { - Ok(path) => println!("Configuration read from {}", path.display()), - Err(ref e) if e.not_found() => eprintln!("no .env was found"), - e => e.map(|_| ()).unwrap(), - } - - let manager = ConnectionManager::::new(CONFIG.database_url.as_str()); - let mut builder = DbPool::builder() - .connection_customizer(Box::new(PragmaForeignKey)) - .min_idle(CONFIG.db_min_idle); - if let Some(max_size) = CONFIG.db_max_size { - builder = builder.max_size(max_size); - }; - let pool = builder.build(manager).ok()?; - Instance::cache_local(&pool.get().unwrap()); - Some(pool) -} - fn main() { App::new("Plume") .bin_name("plume") @@ -82,6 +52,13 @@ and https://docs.joinplu.me/installation/init for more info. "#, ) .get_matches(); + + match dotenv::dotenv() { + Ok(path) => println!("Configuration read from {}", path.display()), + Err(ref e) if e.not_found() => eprintln!("no .env was found"), + e => e.map(|_| ()).unwrap(), + } + let dbpool = init_pool().expect("main: database pool initialization error"); if IMPORTED_MIGRATIONS .is_pending(&dbpool.get().unwrap()) @@ -100,60 +77,8 @@ Then try to restart Plume. ) } let workpool = ScheduledThreadPool::with_name("worker {}", num_cpus::get()); - // we want a fast exit here, so - let mut open_searcher = - UnmanagedSearcher::open(&CONFIG.search_index, &CONFIG.search_tokenizers); - if let Err(Error::Search(SearcherError::InvalidIndexDataError)) = open_searcher { - if UnmanagedSearcher::create(&CONFIG.search_index, &CONFIG.search_tokenizers).is_err() { - let current_path = Path::new(&CONFIG.search_index); - let backup_path = format!("{}.{}", ¤t_path.display(), Utc::now().timestamp()); - let backup_path = Path::new(&backup_path); - fs::rename(current_path, backup_path) - .expect("main: error on backing up search index directory for recreating"); - if UnmanagedSearcher::create(&CONFIG.search_index, &CONFIG.search_tokenizers).is_ok() { - if fs::remove_dir_all(backup_path).is_err() { - eprintln!( - "error on removing backup directory: {}. it remains", - backup_path.display() - ); - } - } else { - panic!("main: error on recreating search index in new index format. remove search index and run `plm search init` manually"); - } - } - open_searcher = UnmanagedSearcher::open(&CONFIG.search_index, &CONFIG.search_tokenizers); - } - #[allow(clippy::match_wild_err_arm)] - let searcher = match open_searcher { - Err(Error::Search(e)) => match e { - SearcherError::WriteLockAcquisitionError => panic!( - r#" -Your search index is locked. Plume can't start. To fix this issue -make sure no other Plume instance is started, and run: - - plm search unlock - -Then try to restart Plume. -"# - ), - SearcherError::IndexOpeningError => panic!( - r#" -Plume was unable to open the search index. If you created the index -before, make sure to run Plume in the same directory it was created in, or -to set SEARCH_INDEX accordingly. If you did not yet create the search -index, run this command: - - plm search init - -Then try to restart Plume -"# - ), - e => Err(e).unwrap(), - }, - Err(_) => panic!("Unexpected error while opening search index"), - Ok(s) => Arc::new(s), - }; + let searcher = Searcher::new(dbpool.clone()); let commiter = searcher.clone(); workpool.execute_with_fixed_delay( Duration::from_secs(5), diff --git a/src/routes/search.rs b/src/routes/search.rs index 0341bf8f..5def8263 100644 --- a/src/routes/search.rs +++ b/src/routes/search.rs @@ -51,7 +51,6 @@ macro_rules! param_to_query { #[get("/search?")] pub fn search(query: Option>, rockets: PlumeRocket) -> Ructe { - let conn = &*rockets.conn; let query = query.map(Form::into_inner).unwrap_or_default(); let page = query.page.unwrap_or_default(); let mut parsed_query = @@ -72,7 +71,7 @@ pub fn search(query: Option>, rockets: PlumeRocket) -> Ructe { } else { let res = rockets .searcher - .search_document(&conn, parsed_query, page.limits()); + .search_document(parsed_query, page.limits()); let next_page = if res.is_empty() { 0 } else { page.0 + 1 }; render!(search::result( &rockets.to_context(),