|
|
|
@ -1,10 +1,11 @@
|
|
|
|
|
use crate::{
|
|
|
|
|
config::SearchTokenizerConfig, instance::Instance, posts::Post, schema::posts,
|
|
|
|
|
search::query::PlumeQuery, tags::Tag, Connection, Result,
|
|
|
|
|
config::SearchTokenizerConfig, db_conn::DbPool, instance::Instance, posts::Post, schema::posts,
|
|
|
|
|
search::query::PlumeQuery, tags::Tag, Error, Result, CONFIG,
|
|
|
|
|
};
|
|
|
|
|
use chrono::Datelike;
|
|
|
|
|
use chrono::{Datelike, Utc};
|
|
|
|
|
use diesel::{ExpressionMethods, QueryDsl, RunQueryDsl};
|
|
|
|
|
use itertools::Itertools;
|
|
|
|
|
use rocket::request::{self, FromRequest, Outcome, Request, State};
|
|
|
|
|
use std::{cmp, fs::create_dir_all, io, path::Path, sync::Mutex};
|
|
|
|
|
use tantivy::{
|
|
|
|
|
collector::TopDocs, directory::MmapDirectory, schema::*, Index, IndexReader, IndexWriter,
|
|
|
|
@ -25,9 +26,110 @@ pub struct Searcher {
|
|
|
|
|
index: Index,
|
|
|
|
|
reader: IndexReader,
|
|
|
|
|
writer: Mutex<Option<IndexWriter>>,
|
|
|
|
|
dbpool: DbPool,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Searcher {
|
|
|
|
|
/// Initializes a new `Searcher`, ready to be used by
|
|
|
|
|
/// Plume.
|
|
|
|
|
///
|
|
|
|
|
/// The main task of this function is to try everything
|
|
|
|
|
/// to get a valid `Searcher`:
|
|
|
|
|
///
|
|
|
|
|
/// - first it tries to open the search index normally (using the options from `CONFIG`)
|
|
|
|
|
/// - if it fails, it makes a back-up of the index files, deletes the original ones,
|
|
|
|
|
/// and recreate the whole index. It removes the backup only if the re-creation
|
|
|
|
|
/// succeeds.
|
|
|
|
|
///
|
|
|
|
|
/// # Panics
|
|
|
|
|
///
|
|
|
|
|
/// This function panics if it needs to create a backup and it can't, or if it fails
|
|
|
|
|
/// to recreate the search index.
|
|
|
|
|
///
|
|
|
|
|
/// After that, it can also panic if there are still errors remaining.
|
|
|
|
|
///
|
|
|
|
|
/// The panic messages are normally explicit enough for a human to
|
|
|
|
|
/// understand how to fix the issue when they see it.
|
|
|
|
|
pub fn new(db_pool: DbPool) -> Self {
|
|
|
|
|
// We try to open the index a first time
|
|
|
|
|
let searcher = match Self::open(
|
|
|
|
|
&CONFIG.search_index,
|
|
|
|
|
db_pool.clone(),
|
|
|
|
|
&CONFIG.search_tokenizers,
|
|
|
|
|
) {
|
|
|
|
|
// The index may be corrupted, inexistent or use an older format.
|
|
|
|
|
// In this case, we can easily recover by deleting and re-creating it.
|
|
|
|
|
Err(Error::Search(SearcherError::InvalidIndexDataError)) => {
|
|
|
|
|
if Self::create(
|
|
|
|
|
&CONFIG.search_index,
|
|
|
|
|
db_pool.clone(),
|
|
|
|
|
&CONFIG.search_tokenizers,
|
|
|
|
|
)
|
|
|
|
|
.is_err()
|
|
|
|
|
{
|
|
|
|
|
let current_path = Path::new(&CONFIG.search_index);
|
|
|
|
|
let backup_path =
|
|
|
|
|
format!("{}.{}", ¤t_path.display(), Utc::now().timestamp());
|
|
|
|
|
let backup_path = Path::new(&backup_path);
|
|
|
|
|
std::fs::rename(current_path, backup_path)
|
|
|
|
|
.expect("Error while backing up search index directory for re-creation");
|
|
|
|
|
if Self::create(
|
|
|
|
|
&CONFIG.search_index,
|
|
|
|
|
db_pool.clone(),
|
|
|
|
|
&CONFIG.search_tokenizers,
|
|
|
|
|
)
|
|
|
|
|
.is_ok()
|
|
|
|
|
{
|
|
|
|
|
if std::fs::remove_dir_all(backup_path).is_err() {
|
|
|
|
|
eprintln!(
|
|
|
|
|
"error on removing backup directory: {}. it remains",
|
|
|
|
|
backup_path.display()
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
panic!("Error while re-creating search index in new index format. Remove search index and run `plm search init` manually.");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Self::open(&CONFIG.search_index, db_pool, &CONFIG.search_tokenizers)
|
|
|
|
|
}
|
|
|
|
|
// If it opened successfully or if it was another kind of
|
|
|
|
|
// error (that we don't know how to handle), don't do anything more
|
|
|
|
|
other => other,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// At this point, if there are still errors, we just panic
|
|
|
|
|
#[allow(clippy::match_wild_err_arm)]
|
|
|
|
|
match searcher {
|
|
|
|
|
Err(Error::Search(e)) => match e {
|
|
|
|
|
SearcherError::WriteLockAcquisitionError => panic!(
|
|
|
|
|
r#"
|
|
|
|
|
Your search index is locked. Plume can't start. To fix this issue
|
|
|
|
|
make sure no other Plume instance is started, and run:
|
|
|
|
|
|
|
|
|
|
plm search unlock
|
|
|
|
|
|
|
|
|
|
Then try to restart Plume.
|
|
|
|
|
"#
|
|
|
|
|
),
|
|
|
|
|
SearcherError::IndexOpeningError => panic!(
|
|
|
|
|
r#"
|
|
|
|
|
Plume was unable to open the search index. If you created the index
|
|
|
|
|
before, make sure to run Plume in the same directory it was created in, or
|
|
|
|
|
to set SEARCH_INDEX accordingly. If you did not yet create the search
|
|
|
|
|
index, run this command:
|
|
|
|
|
|
|
|
|
|
plm search init
|
|
|
|
|
|
|
|
|
|
Then try to restart Plume
|
|
|
|
|
"#
|
|
|
|
|
),
|
|
|
|
|
e => Err(e).unwrap(),
|
|
|
|
|
},
|
|
|
|
|
Err(_) => panic!("Unexpected error while opening search index"),
|
|
|
|
|
Ok(s) => s,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn schema() -> Schema {
|
|
|
|
|
let tag_indexing = TextOptions::default().set_indexing_options(
|
|
|
|
|
TextFieldIndexing::default()
|
|
|
|
@ -67,7 +169,11 @@ impl Searcher {
|
|
|
|
|
schema_builder.build()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn create(path: &dyn AsRef<Path>, tokenizers: &SearchTokenizerConfig) -> Result<Self> {
|
|
|
|
|
pub fn create(
|
|
|
|
|
path: &dyn AsRef<Path>,
|
|
|
|
|
dbpool: DbPool,
|
|
|
|
|
tokenizers: &SearchTokenizerConfig,
|
|
|
|
|
) -> Result<Self> {
|
|
|
|
|
let schema = Self::schema();
|
|
|
|
|
|
|
|
|
|
create_dir_all(path).map_err(|_| SearcherError::IndexCreationError)?;
|
|
|
|
@ -95,10 +201,15 @@ impl Searcher {
|
|
|
|
|
.try_into()
|
|
|
|
|
.map_err(|_| SearcherError::IndexCreationError)?,
|
|
|
|
|
index,
|
|
|
|
|
dbpool,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn open(path: &dyn AsRef<Path>, tokenizers: &SearchTokenizerConfig) -> Result<Self> {
|
|
|
|
|
pub fn open(
|
|
|
|
|
path: &dyn AsRef<Path>,
|
|
|
|
|
dbpool: DbPool,
|
|
|
|
|
tokenizers: &SearchTokenizerConfig,
|
|
|
|
|
) -> Result<Self> {
|
|
|
|
|
let mut index =
|
|
|
|
|
Index::open(MmapDirectory::open(path).map_err(|_| SearcherError::IndexOpeningError)?)
|
|
|
|
|
.map_err(|_| SearcherError::IndexOpeningError)?;
|
|
|
|
@ -150,10 +261,11 @@ impl Searcher {
|
|
|
|
|
}
|
|
|
|
|
})?,
|
|
|
|
|
index,
|
|
|
|
|
dbpool,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn add_document(&self, conn: &Connection, post: &Post) -> Result<()> {
|
|
|
|
|
pub fn add_document(&self, post: &Post) -> Result<()> {
|
|
|
|
|
if !post.published {
|
|
|
|
|
return Ok(());
|
|
|
|
|
}
|
|
|
|
@ -175,15 +287,19 @@ impl Searcher {
|
|
|
|
|
let lang = schema.get_field("lang").unwrap();
|
|
|
|
|
let license = schema.get_field("license").unwrap();
|
|
|
|
|
|
|
|
|
|
let conn = match self.dbpool.get() {
|
|
|
|
|
Ok(c) => c,
|
|
|
|
|
Err(_) => return Err(Error::DbPool),
|
|
|
|
|
};
|
|
|
|
|
let mut writer = self.writer.lock().unwrap();
|
|
|
|
|
let writer = writer.as_mut().unwrap();
|
|
|
|
|
writer.add_document(doc!(
|
|
|
|
|
post_id => i64::from(post.id),
|
|
|
|
|
author => post.get_authors(conn)?.into_iter().map(|u| u.fqn).join(" "),
|
|
|
|
|
author => post.get_authors(&conn)?.into_iter().map(|u| u.fqn).join(" "),
|
|
|
|
|
creation_date => i64::from(post.creation_date.num_days_from_ce()),
|
|
|
|
|
instance => Instance::get(conn, post.get_blog(conn)?.instance_id)?.public_domain,
|
|
|
|
|
tag => Tag::for_post(conn, post.id)?.into_iter().map(|t| t.tag).join(" "),
|
|
|
|
|
blog_name => post.get_blog(conn)?.title,
|
|
|
|
|
instance => Instance::get(&conn, post.get_blog(&conn)?.instance_id)?.public_domain,
|
|
|
|
|
tag => Tag::for_post(&conn, post.id)?.into_iter().map(|t| t.tag).join(" "),
|
|
|
|
|
blog_name => post.get_blog(&conn)?.title,
|
|
|
|
|
content => post.content.get().clone(),
|
|
|
|
|
subtitle => post.subtitle.clone(),
|
|
|
|
|
title => post.title.clone(),
|
|
|
|
@ -203,17 +319,12 @@ impl Searcher {
|
|
|
|
|
writer.delete_term(doc_id);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn update_document(&self, conn: &Connection, post: &Post) -> Result<()> {
|
|
|
|
|
pub fn update_document(&self, post: &Post) -> Result<()> {
|
|
|
|
|
self.delete_document(post);
|
|
|
|
|
self.add_document(conn, post)
|
|
|
|
|
self.add_document(post)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn search_document(
|
|
|
|
|
&self,
|
|
|
|
|
conn: &Connection,
|
|
|
|
|
query: PlumeQuery,
|
|
|
|
|
(min, max): (i32, i32),
|
|
|
|
|
) -> Vec<Post> {
|
|
|
|
|
pub fn search_document(&self, query: PlumeQuery, (min, max): (i32, i32)) -> Vec<Post> {
|
|
|
|
|
let schema = self.index.schema();
|
|
|
|
|
let post_id = schema.get_field("post_id").unwrap();
|
|
|
|
|
|
|
|
|
@ -222,24 +333,33 @@ impl Searcher {
|
|
|
|
|
let searcher = self.reader.searcher();
|
|
|
|
|
let res = searcher.search(&query.into_query(), &collector).unwrap();
|
|
|
|
|
|
|
|
|
|
let conn = match self.dbpool.get() {
|
|
|
|
|
Ok(c) => c,
|
|
|
|
|
Err(_) => return Vec::new(),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
res.get(min as usize..)
|
|
|
|
|
.unwrap_or(&[])
|
|
|
|
|
.iter()
|
|
|
|
|
.filter_map(|(_, doc_add)| {
|
|
|
|
|
let doc = searcher.doc(*doc_add).ok()?;
|
|
|
|
|
let id = doc.get_first(post_id)?;
|
|
|
|
|
Post::get(conn, id.i64_value() as i32).ok()
|
|
|
|
|
Post::get(&conn, id.i64_value() as i32).ok()
|
|
|
|
|
//borrow checker don't want me to use filter_map or and_then here
|
|
|
|
|
})
|
|
|
|
|
.collect()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn fill(&self, conn: &Connection) -> Result<()> {
|
|
|
|
|
pub fn fill(&self) -> Result<()> {
|
|
|
|
|
let conn = match self.dbpool.get() {
|
|
|
|
|
Ok(c) => c,
|
|
|
|
|
Err(_) => return Err(Error::DbPool),
|
|
|
|
|
};
|
|
|
|
|
for post in posts::table
|
|
|
|
|
.filter(posts::published.eq(true))
|
|
|
|
|
.load::<Post>(conn)?
|
|
|
|
|
.load::<Post>(&conn)?
|
|
|
|
|
{
|
|
|
|
|
self.update_document(conn, &post)?
|
|
|
|
|
self.update_document(&post)?
|
|
|
|
|
}
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
@ -254,3 +374,12 @@ impl Searcher {
|
|
|
|
|
self.writer.lock().unwrap().take();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'a, 'r> FromRequest<'a, 'r> for Searcher {
|
|
|
|
|
type Error = ();
|
|
|
|
|
|
|
|
|
|
fn from_request(request: &'a Request<'r>) -> request::Outcome<Searcher, Self::Error> {
|
|
|
|
|
let searcher = request.guard::<State<'_, Searcher>>()?;
|
|
|
|
|
Outcome::Success(*searcher.inner())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|