@ -1,10 +1,11 @@
use crate ::{
config ::SearchTokenizerConfig , instance::Instance , posts ::Post , schema ::posts ,
search ::query ::PlumeQuery , tags ::Tag , Connection, Result ,
config ::SearchTokenizerConfig , db_conn::DbPool , instance::Instance , posts ::Post , schema ::posts ,
search ::query ::PlumeQuery , tags ::Tag , Error, Result , CONFIG ,
} ;
use chrono ::Datelike ;
use chrono ::{ Datelike , Utc } ;
use diesel ::{ ExpressionMethods , QueryDsl , RunQueryDsl } ;
use itertools ::Itertools ;
use rocket ::request ::{ self , FromRequest , Outcome , Request , State } ;
use std ::{ cmp , fs ::create_dir_all , io , path ::Path , sync ::Mutex } ;
use tantivy ::{
collector ::TopDocs , directory ::MmapDirectory , schema ::* , Index , IndexReader , IndexWriter ,
@ -25,9 +26,110 @@ pub struct Searcher {
index : Index ,
reader : IndexReader ,
writer : Mutex < Option < IndexWriter > > ,
dbpool : DbPool ,
}
impl Searcher {
/// Initializes a new `Searcher`, ready to be used by
/// Plume.
///
/// The main task of this function is to try everything
/// to get a valid `Searcher`:
///
/// - first it tries to open the search index normally (using the options from `CONFIG`)
/// - if it fails, it makes a back-up of the index files, deletes the original ones,
/// and recreate the whole index. It removes the backup only if the re-creation
/// succeeds.
///
/// # Panics
///
/// This function panics if it needs to create a backup and it can't, or if it fails
/// to recreate the search index.
///
/// After that, it can also panic if there are still errors remaining.
///
/// The panic messages are normally explicit enough for a human to
/// understand how to fix the issue when they see it.
pub fn new ( db_pool : DbPool ) -> Self {
// We try to open the index a first time
let searcher = match Self ::open (
& CONFIG . search_index ,
db_pool . clone ( ) ,
& CONFIG . search_tokenizers ,
) {
// The index may be corrupted, inexistent or use an older format.
// In this case, we can easily recover by deleting and re-creating it.
Err ( Error ::Search ( SearcherError ::InvalidIndexDataError ) ) = > {
if Self ::create (
& CONFIG . search_index ,
db_pool . clone ( ) ,
& CONFIG . search_tokenizers ,
)
. is_err ( )
{
let current_path = Path ::new ( & CONFIG . search_index ) ;
let backup_path =
format! ( "{}.{}" , & current_path . display ( ) , Utc ::now ( ) . timestamp ( ) ) ;
let backup_path = Path ::new ( & backup_path ) ;
std ::fs ::rename ( current_path , backup_path )
. expect ( "Error while backing up search index directory for re-creation" ) ;
if Self ::create (
& CONFIG . search_index ,
db_pool . clone ( ) ,
& CONFIG . search_tokenizers ,
)
. is_ok ( )
{
if std ::fs ::remove_dir_all ( backup_path ) . is_err ( ) {
eprintln! (
"error on removing backup directory: {}. it remains" ,
backup_path . display ( )
) ;
}
} else {
panic! ( "Error while re-creating search index in new index format. Remove search index and run `plm search init` manually." ) ;
}
}
Self ::open ( & CONFIG . search_index , db_pool , & CONFIG . search_tokenizers )
}
// If it opened successfully or if it was another kind of
// error (that we don't know how to handle), don't do anything more
other = > other ,
} ;
// At this point, if there are still errors, we just panic
#[ allow(clippy::match_wild_err_arm) ]
match searcher {
Err ( Error ::Search ( e ) ) = > match e {
SearcherError ::WriteLockAcquisitionError = > panic! (
r #"
Your search index is locked . Plume can ' t start . To fix this issue
make sure no other Plume instance is started , and run :
plm search unlock
Then try to restart Plume .
" #
) ,
SearcherError ::IndexOpeningError = > panic! (
r #"
Plume was unable to open the search index . If you created the index
before , make sure to run Plume in the same directory it was created in , or
to set SEARCH_INDEX accordingly . If you did not yet create the search
index , run this command :
plm search init
Then try to restart Plume
" #
) ,
e = > Err ( e ) . unwrap ( ) ,
} ,
Err ( _ ) = > panic! ( "Unexpected error while opening search index" ) ,
Ok ( s ) = > s ,
}
}
pub fn schema ( ) -> Schema {
let tag_indexing = TextOptions ::default ( ) . set_indexing_options (
TextFieldIndexing ::default ( )
@ -67,7 +169,11 @@ impl Searcher {
schema_builder . build ( )
}
pub fn create ( path : & dyn AsRef < Path > , tokenizers : & SearchTokenizerConfig ) -> Result < Self > {
pub fn create (
path : & dyn AsRef < Path > ,
dbpool : DbPool ,
tokenizers : & SearchTokenizerConfig ,
) -> Result < Self > {
let schema = Self ::schema ( ) ;
create_dir_all ( path ) . map_err ( | _ | SearcherError ::IndexCreationError ) ? ;
@ -95,10 +201,15 @@ impl Searcher {
. try_into ( )
. map_err ( | _ | SearcherError ::IndexCreationError ) ? ,
index ,
dbpool ,
} )
}
pub fn open ( path : & dyn AsRef < Path > , tokenizers : & SearchTokenizerConfig ) -> Result < Self > {
pub fn open (
path : & dyn AsRef < Path > ,
dbpool : DbPool ,
tokenizers : & SearchTokenizerConfig ,
) -> Result < Self > {
let mut index =
Index ::open ( MmapDirectory ::open ( path ) . map_err ( | _ | SearcherError ::IndexOpeningError ) ? )
. map_err ( | _ | SearcherError ::IndexOpeningError ) ? ;
@ -150,10 +261,11 @@ impl Searcher {
}
} ) ? ,
index ,
dbpool ,
} )
}
pub fn add_document ( & self , conn: & Connection , post: & Post ) -> Result < ( ) > {
pub fn add_document ( & self , post: & Post ) -> Result < ( ) > {
if ! post . published {
return Ok ( ( ) ) ;
}
@ -175,15 +287,19 @@ impl Searcher {
let lang = schema . get_field ( "lang" ) . unwrap ( ) ;
let license = schema . get_field ( "license" ) . unwrap ( ) ;
let conn = match self . dbpool . get ( ) {
Ok ( c ) = > c ,
Err ( _ ) = > return Err ( Error ::DbPool ) ,
} ;
let mut writer = self . writer . lock ( ) . unwrap ( ) ;
let writer = writer . as_mut ( ) . unwrap ( ) ;
writer . add_document ( doc ! (
post_id = > i64 ::from ( post . id ) ,
author = > post . get_authors ( conn ) ? . into_iter ( ) . map ( | u | u . fqn ) . join ( " " ) ,
author = > post . get_authors ( & conn ) ? . into_iter ( ) . map ( | u | u . fqn ) . join ( " " ) ,
creation_date = > i64 ::from ( post . creation_date . num_days_from_ce ( ) ) ,
instance = > Instance ::get ( conn , post . get_blog ( conn ) ? . instance_id ) ? . public_domain ,
tag = > Tag ::for_post ( conn , post . id ) ? . into_iter ( ) . map ( | t | t . tag ) . join ( " " ) ,
blog_name = > post . get_blog ( conn ) ? . title ,
instance = > Instance ::get ( & conn , post . get_blog ( & conn ) ? . instance_id ) ? . public_domain ,
tag = > Tag ::for_post ( & conn , post . id ) ? . into_iter ( ) . map ( | t | t . tag ) . join ( " " ) ,
blog_name = > post . get_blog ( & conn ) ? . title ,
content = > post . content . get ( ) . clone ( ) ,
subtitle = > post . subtitle . clone ( ) ,
title = > post . title . clone ( ) ,
@ -203,17 +319,12 @@ impl Searcher {
writer . delete_term ( doc_id ) ;
}
pub fn update_document ( & self , conn: & Connection , post: & Post ) -> Result < ( ) > {
pub fn update_document ( & self , post: & Post ) -> Result < ( ) > {
self . delete_document ( post ) ;
self . add_document ( conn, post)
self . add_document ( post)
}
pub fn search_document (
& self ,
conn : & Connection ,
query : PlumeQuery ,
( min , max ) : ( i32 , i32 ) ,
) -> Vec < Post > {
pub fn search_document ( & self , query : PlumeQuery , ( min , max ) : ( i32 , i32 ) ) -> Vec < Post > {
let schema = self . index . schema ( ) ;
let post_id = schema . get_field ( "post_id" ) . unwrap ( ) ;
@ -222,24 +333,33 @@ impl Searcher {
let searcher = self . reader . searcher ( ) ;
let res = searcher . search ( & query . into_query ( ) , & collector ) . unwrap ( ) ;
let conn = match self . dbpool . get ( ) {
Ok ( c ) = > c ,
Err ( _ ) = > return Vec ::new ( ) ,
} ;
res . get ( min as usize .. )
. unwrap_or ( & [ ] )
. iter ( )
. filter_map ( | ( _ , doc_add ) | {
let doc = searcher . doc ( * doc_add ) . ok ( ) ? ;
let id = doc . get_first ( post_id ) ? ;
Post ::get ( conn , id . i64_value ( ) as i32 ) . ok ( )
Post ::get ( & conn , id . i64_value ( ) as i32 ) . ok ( )
//borrow checker don't want me to use filter_map or and_then here
} )
. collect ( )
}
pub fn fill ( & self , conn : & Connection ) -> Result < ( ) > {
pub fn fill ( & self ) -> Result < ( ) > {
let conn = match self . dbpool . get ( ) {
Ok ( c ) = > c ,
Err ( _ ) = > return Err ( Error ::DbPool ) ,
} ;
for post in posts ::table
. filter ( posts ::published . eq ( true ) )
. load ::< Post > ( conn ) ?
. load ::< Post > ( & conn ) ?
{
self . update_document ( conn , & post ) ?
self . update_document ( & post ) ?
}
Ok ( ( ) )
}
@ -254,3 +374,12 @@ impl Searcher {
self . writer . lock ( ) . unwrap ( ) . take ( ) ;
}
}
impl < ' a , ' r > FromRequest < ' a , ' r > for Searcher {
type Error = ( ) ;
fn from_request ( request : & ' a Request < ' r > ) -> request ::Outcome < Searcher , Self ::Error > {
let searcher = request . guard ::< State < ' _ , Searcher > > ( ) ? ;
Outcome ::Success ( * searcher . inner ( ) )
}
}