Browse Source

Introduce Searcher::new

This function does what was previously done in main:
create a search index, and try to recover for errors
if possible.

This commit also fixes plume-cli to use the new Searcher
API (that depends on a DbPool, not on a single Connection).
pull/813/head
Ana Gelez 4 months ago
parent
commit
f6d169567c
Signed by: igalic <me+github@igalic.co> GPG Key ID: ACFEFF7F6A123A86
6 changed files with 164 additions and 116 deletions
  1. +20
    -16
      plume-cli/src/main.rs
  2. +9
    -9
      plume-cli/src/search.rs
  3. +15
    -1
      plume-models/src/db_conn.rs
  4. +110
    -4
      plume-models/src/search/searcher.rs
  5. +9
    -84
      src/main.rs
  6. +1
    -2
      src/routes/search.rs

+ 20
- 16
plume-cli/src/main.rs View File

@@ -1,8 +1,7 @@
use dotenv;

use clap::App;
use diesel::Connection;
use plume_models::{instance::Instance, Connection as Conn, CONFIG};
use plume_models::{db_conn::init_pool, instance::Instance};
use std::io::{self, prelude::*};

mod instance;
@@ -26,22 +25,27 @@ fn main() {
Err(ref e) if e.not_found() => eprintln!("no .env was found"),
e => e.map(|_| ()).unwrap(),
}
let conn = Conn::establish(CONFIG.database_url.as_str());
let _ = conn.as_ref().map(|conn| Instance::cache_local(conn));
let db_pool = init_pool()
.expect("Couldn't create a database pool, please check DATABASE_URL in your .env");
let _ = db_pool
.get()
.as_ref()
.map(|conn| Instance::cache_local(conn));

match matches.subcommand() {
("instance", Some(args)) => {
instance::run(args, &conn.expect("Couldn't connect to the database."))
}
("migration", Some(args)) => {
migration::run(args, &conn.expect("Couldn't connect to the database."))
}
("search", Some(args)) => {
search::run(args, &conn.expect("Couldn't connect to the database."))
}
("users", Some(args)) => {
users::run(args, &conn.expect("Couldn't connect to the database."))
}
("instance", Some(args)) => instance::run(
args,
&db_pool.get().expect("Couldn't connect to the database."),
),
("migration", Some(args)) => migration::run(
args,
&db_pool.get().expect("Couldn't connect to the database."),
),
("search", Some(args)) => search::run(args, db_pool),
("users", Some(args)) => users::run(
args,
&db_pool.get().expect("Couldn't connect to the database."),
),
_ => app.print_help().expect("Couldn't print help"),
};
}


+ 9
- 9
plume-cli/src/search.rs View File

@@ -1,6 +1,6 @@
use clap::{App, Arg, ArgMatches, SubCommand};

use plume_models::{search::Searcher, Connection, CONFIG};
use plume_models::{db_conn::DbPool, search::Searcher, CONFIG};
use std::fs::{read_dir, remove_file};
use std::io::ErrorKind;
use std::path::Path;
@@ -52,7 +52,7 @@ pub fn command<'a, 'b>() -> App<'a, 'b> {
)
}

pub fn run<'a>(args: &ArgMatches<'a>, conn: &Connection) {
pub fn run<'a>(args: &ArgMatches<'a>, conn: DbPool) {
let conn = conn;
match args.subcommand() {
("init", Some(x)) => init(x, conn),
@@ -63,7 +63,7 @@ pub fn run<'a>(args: &ArgMatches<'a>, conn: &Connection) {
}
}

fn init<'a>(args: &ArgMatches<'a>, conn: &Connection) {
fn init<'a>(args: &ArgMatches<'a>, db_pool: DbPool) {
let path = args
.value_of("path")
.map(|p| Path::new(p).join("search_index"))
@@ -82,8 +82,8 @@ fn init<'a>(args: &ArgMatches<'a>, conn: &Connection) {
}
};
if can_do || force {
let searcher = Searcher::create(&path, &CONFIG.search_tokenizers).unwrap();
refill(args, conn, Some(searcher));
let searcher = Searcher::create(&path, db_pool.clone(), &CONFIG.search_tokenizers).unwrap();
refill(args, db_pool, Some(searcher));
} else {
eprintln!(
"Can't create new index, {} exist and is not empty",
@@ -92,16 +92,16 @@ fn init<'a>(args: &ArgMatches<'a>, conn: &Connection) {
}
}

fn refill<'a>(args: &ArgMatches<'a>, conn: &Connection, searcher: Option<Searcher>) {
fn refill<'a>(args: &ArgMatches<'a>, db_pool: DbPool, searcher: Option<Searcher>) {
let path = args.value_of("path");
let path = match path {
Some(path) => Path::new(path).join("search_index"),
None => Path::new(&CONFIG.search_index).to_path_buf(),
};
let searcher =
searcher.unwrap_or_else(|| Searcher::open(&path, &CONFIG.search_tokenizers).unwrap());
let searcher = searcher
.unwrap_or_else(|| Searcher::open(&path, db_pool, &CONFIG.search_tokenizers).unwrap());

searcher.fill(conn).expect("Couldn't import post");
searcher.fill().expect("Couldn't import post");
println!("Commiting result");
searcher.commit();
}


+ 15
- 1
plume-models/src/db_conn.rs View File

@@ -1,4 +1,4 @@
use crate::Connection;
use crate::{instance::Instance, Connection, CONFIG};
use diesel::r2d2::{
ConnectionManager, CustomizeConnection, Error as ConnError, Pool, PooledConnection,
};
@@ -13,6 +13,20 @@ use std::ops::Deref;

pub type DbPool = Pool<ConnectionManager<Connection>>;

/// Initializes a database pool.
pub fn init_pool() -> Option<DbPool> {
let manager = ConnectionManager::<Connection>::new(CONFIG.database_url.as_str());
let mut builder = DbPool::builder()
.connection_customizer(Box::new(PragmaForeignKey))
.min_idle(CONFIG.db_min_idle);
if let Some(max_size) = CONFIG.db_max_size {
builder = builder.max_size(max_size);
};
let pool = builder.build(manager).ok()?;
Instance::cache_local(&pool.get().unwrap());
Some(pool)
}

// From rocket documentation

// Connection request guard type: a wrapper around an r2d2 pooled connection.


+ 110
- 4
plume-models/src/search/searcher.rs View File

@@ -1,11 +1,17 @@
use crate::{
config::SearchTokenizerConfig, db_conn::DbPool, instance::Instance, posts::Post, schema::posts,
search::query::PlumeQuery, tags::Tag, Error, Result,
search::query::PlumeQuery, tags::Tag, Error, Result, CONFIG,
};
use chrono::Datelike;
use chrono::{Datelike, Utc};
use diesel::{ExpressionMethods, QueryDsl, RunQueryDsl};
use itertools::Itertools;
use std::{cmp, fs::create_dir_all, io, path::Path, sync::Mutex};
use std::{
cmp,
fs::create_dir_all,
io,
path::Path,
sync::{Arc, Mutex},
};
use tantivy::{
collector::TopDocs, directory::MmapDirectory, schema::*, Index, IndexReader, IndexWriter,
ReloadPolicy, TantivyError, Term,
@@ -29,6 +35,106 @@ pub struct Searcher {
}

impl Searcher {
/// Initializes a new `Searcher`, ready to be used by
/// Plume.
///
/// The main task of this function is to try everything
/// to get a valid `Searcher`:
///
/// - first it tries to open the search index normally (using the options from `CONFIG`)
/// - if it fails, it makes a back-up of the index files, deletes the original ones,
/// and recreate the whole index. It removes the backup only if the re-creation
/// succeeds.
///
/// # Panics
///
/// This function panics if it needs to create a backup and it can't, or if it fails
/// to recreate the search index.
///
/// After that, it can also panic if there are still errors remaining.
///
/// The panic messages are normally explicit enough for a human to
/// understand how to fix the issue when they see it.
pub fn new(db_pool: DbPool) -> Arc<Self> {
// We try to open the index a first time
let searcher = match Self::open(
&CONFIG.search_index,
db_pool.clone(),
&CONFIG.search_tokenizers,
) {
// The index may be corrupted, inexistent or use an older format.
// In this case, we can easily recover by deleting and re-creating it.
Err(Error::Search(SearcherError::InvalidIndexDataError)) => {
if Self::create(
&CONFIG.search_index,
db_pool.clone(),
&CONFIG.search_tokenizers,
)
.is_err()
{
let current_path = Path::new(&CONFIG.search_index);
let backup_path =
format!("{}.{}", &current_path.display(), Utc::now().timestamp());
let backup_path = Path::new(&backup_path);
std::fs::rename(current_path, backup_path)
.expect("Error while backing up search index directory for re-creation");
if Self::create(
&CONFIG.search_index,
db_pool.clone(),
&CONFIG.search_tokenizers,
)
.is_ok()
{
if std::fs::remove_dir_all(backup_path).is_err() {
eprintln!(
"error on removing backup directory: {}. it remains",
backup_path.display()
);
}
} else {
panic!("Error while re-creating search index in new index format. Remove search index and run `plm search init` manually.");
}
}
Self::open(&CONFIG.search_index, db_pool, &CONFIG.search_tokenizers)
}
// If it opened successfully or if it was another kind of
// error (that we don't know how to handle), don't do anything more
other => other,
};

// At this point, if there are still errors, we just panic
#[allow(clippy::match_wild_err_arm)]
match searcher {
Err(Error::Search(e)) => match e {
SearcherError::WriteLockAcquisitionError => panic!(
r#"
Your search index is locked. Plume can't start. To fix this issue
make sure no other Plume instance is started, and run:

plm search unlock

Then try to restart Plume.
"#
),
SearcherError::IndexOpeningError => panic!(
r#"
Plume was unable to open the search index. If you created the index
before, make sure to run Plume in the same directory it was created in, or
to set SEARCH_INDEX accordingly. If you did not yet create the search
index, run this command:

plm search init

Then try to restart Plume
"#
),
e => Err(e).unwrap(),
},
Err(_) => panic!("Unexpected error while opening search index"),
Ok(s) => Arc::new(s),
}
}

pub fn schema() -> Schema {
let tag_indexing = TextOptions::default().set_indexing_options(
TextFieldIndexing::default()
@@ -234,7 +340,7 @@ impl Searcher {

let conn = match self.dbpool.get() {
Ok(c) => c,
Err(_) => return Err(Error::DbPool),
Err(_) => return Vec::new(),
};

res.get(min as usize..)


+ 9
- 84
src/main.rs View File

@@ -10,20 +10,10 @@ extern crate serde_json;
#[macro_use]
extern crate validator_derive;

use chrono::Utc;
use clap::App;
use diesel::r2d2::ConnectionManager;
use plume_models::{
db_conn::{DbPool, PragmaForeignKey},
instance::Instance,
migrations::IMPORTED_MIGRATIONS,
search::{Searcher as UnmanagedSearcher, SearcherError},
Connection, Error, CONFIG,
};
use plume_models::{db_conn::init_pool, migrations::IMPORTED_MIGRATIONS, search::Searcher, CONFIG};
use rocket_csrf::CsrfFairingBuilder;
use scheduled_thread_pool::ScheduledThreadPool;
use std::fs;
use std::path::Path;
use std::process::exit;
use std::sync::{Arc, Mutex};
use std::time::Duration;
@@ -48,26 +38,6 @@ include!(concat!(env!("OUT_DIR"), "/templates.rs"));

compile_i18n!();

/// Initializes a database pool.
fn init_pool() -> Option<DbPool> {
match dotenv::dotenv() {
Ok(path) => println!("Configuration read from {}", path.display()),
Err(ref e) if e.not_found() => eprintln!("no .env was found"),
e => e.map(|_| ()).unwrap(),
}

let manager = ConnectionManager::<Connection>::new(CONFIG.database_url.as_str());
let mut builder = DbPool::builder()
.connection_customizer(Box::new(PragmaForeignKey))
.min_idle(CONFIG.db_min_idle);
if let Some(max_size) = CONFIG.db_max_size {
builder = builder.max_size(max_size);
};
let pool = builder.build(manager).ok()?;
Instance::cache_local(&pool.get().unwrap());
Some(pool)
}

fn main() {
App::new("Plume")
.bin_name("plume")
@@ -82,6 +52,13 @@ and https://docs.joinplu.me/installation/init for more info.
"#,
)
.get_matches();

match dotenv::dotenv() {
Ok(path) => println!("Configuration read from {}", path.display()),
Err(ref e) if e.not_found() => eprintln!("no .env was found"),
e => e.map(|_| ()).unwrap(),
}

let dbpool = init_pool().expect("main: database pool initialization error");
if IMPORTED_MIGRATIONS
.is_pending(&dbpool.get().unwrap())
@@ -100,60 +77,8 @@ Then try to restart Plume.
)
}
let workpool = ScheduledThreadPool::with_name("worker {}", num_cpus::get());
// we want a fast exit here, so
let mut open_searcher =
UnmanagedSearcher::open(&CONFIG.search_index, &CONFIG.search_tokenizers);
if let Err(Error::Search(SearcherError::InvalidIndexDataError)) = open_searcher {
if UnmanagedSearcher::create(&CONFIG.search_index, &CONFIG.search_tokenizers).is_err() {
let current_path = Path::new(&CONFIG.search_index);
let backup_path = format!("{}.{}", &current_path.display(), Utc::now().timestamp());
let backup_path = Path::new(&backup_path);
fs::rename(current_path, backup_path)
.expect("main: error on backing up search index directory for recreating");
if UnmanagedSearcher::create(&CONFIG.search_index, &CONFIG.search_tokenizers).is_ok() {
if fs::remove_dir_all(backup_path).is_err() {
eprintln!(
"error on removing backup directory: {}. it remains",
backup_path.display()
);
}
} else {
panic!("main: error on recreating search index in new index format. remove search index and run `plm search init` manually");
}
}
open_searcher = UnmanagedSearcher::open(&CONFIG.search_index, &CONFIG.search_tokenizers);
}
#[allow(clippy::match_wild_err_arm)]
let searcher = match open_searcher {
Err(Error::Search(e)) => match e {
SearcherError::WriteLockAcquisitionError => panic!(
r#"
Your search index is locked. Plume can't start. To fix this issue
make sure no other Plume instance is started, and run:

plm search unlock

Then try to restart Plume.
"#
),
SearcherError::IndexOpeningError => panic!(
r#"
Plume was unable to open the search index. If you created the index
before, make sure to run Plume in the same directory it was created in, or
to set SEARCH_INDEX accordingly. If you did not yet create the search
index, run this command:

plm search init

Then try to restart Plume
"#
),
e => Err(e).unwrap(),
},
Err(_) => panic!("Unexpected error while opening search index"),
Ok(s) => Arc::new(s),
};

let searcher = Searcher::new(dbpool.clone());
let commiter = searcher.clone();
workpool.execute_with_fixed_delay(
Duration::from_secs(5),


+ 1
- 2
src/routes/search.rs View File

@@ -51,7 +51,6 @@ macro_rules! param_to_query {

#[get("/search?<query..>")]
pub fn search(query: Option<Form<SearchQuery>>, rockets: PlumeRocket) -> Ructe {
let conn = &*rockets.conn;
let query = query.map(Form::into_inner).unwrap_or_default();
let page = query.page.unwrap_or_default();
let mut parsed_query =
@@ -72,7 +71,7 @@ pub fn search(query: Option<Form<SearchQuery>>, rockets: PlumeRocket) -> Ructe {
} else {
let res = rockets
.searcher
.search_document(&conn, parsed_query, page.limits());
.search_document(parsed_query, page.limits());
let next_page = if res.is_empty() { 0 } else { page.0 + 1 };
render!(search::result(
&rockets.to_context(),


Loading…
Cancel
Save