You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Plume/plume-models/src/search/mod.rs

253 lines
8.3 KiB
Rust

mod query;
mod searcher;
mod tokenizer;
pub use self::query::PlumeQuery as Query;
pub use self::searcher::*;
pub use self::tokenizer::TokenizerKind;
#[cfg(test)]
pub(crate) mod tests {
use super::{Query, Searcher};
use diesel::Connection;
use plume_common::utils::random_hex;
use std::env::temp_dir;
use std::str::FromStr;
use crate::{
blogs::tests::fill_database,
config::SearchTokenizerConfig,
post_authors::*,
posts::{NewPost, Post},
safe_string::SafeString,
tests::db,
tests::pool,
CONFIG,
};
pub(crate) fn get_searcher(tokenizers: &SearchTokenizerConfig) -> Searcher {
Add support for generic timeline (#525) * Begin adding support for timeline * fix some bugs with parser * fmt * add error reporting for parser * add tests for timeline query parser * add rejection tests for parse * begin adding support for lists also run migration before compiling, so schema.rs is up to date * add sqlite migration * end adding lists still miss tests and query integration * cargo fmt * try to add some tests * Add some constraint to db, and fix list test and refactor other tests to use begin_transaction * add more tests for lists * add support for lists in query executor * add keywords for including/excluding boosts and likes * cargo fmt * add function to list lists used by query will make it easier to warn users when creating timeline with unknown lists * add lang support * add timeline creation error message when using unexisting lists * Update .po files * WIP: interface for timelines * don't use diesel for migrations not sure how it passed the ci on the other branch * add some tests for timeline add an int representing the order of timelines (first one will be on top, second just under...) use first() instead of limit(1).get().into_iter().nth(0) remove migrations from build artifacts as they are now compiled in * cargo fmt * remove timeline order * fix tests * add tests for timeline creation failure * cargo fmt * add tests for timelines * add test for matching direct lists and keywords * add test for language filtering * Add a more complex test for Timeline::matches, and fix TQ::matches for TQ::Or * Make the main crate compile + FMT * Use the new timeline system - Replace the old "feed" system with timelines - Display all timelines someone can access on their home page (either their personal ones, or instance timelines) - Remove functions that were used to get user/local/federated feed - Add new posts to timelines - Create a default timeline called "My feed" for everyone, and "Local feed"/"Federated feed" with timelines @fdb-hiroshima I don't know if that's how you pictured it? If you imagined it differently I can of course make changes. I hope I didn't forgot anything… * Cargo fmt * Try to fix the migration * Fix tests * Fix the test (for real this time ?) * Fix the tests ? + fmt * Use Kind::Like and Kind::Reshare when needed * Forgot to run cargo fmt once again * revert translations * fix reviewed stuff * reduce code duplication by macros * cargo fmt
5 years ago
let dir = temp_dir().join(&format!("plume-test-{}", random_hex()));
if dir.exists() {
Searcher::open(&dir, pool(), tokenizers)
} else {
Searcher::create(&dir, pool(), tokenizers)
}
.unwrap()
}
#[test]
fn get_first_token() {
let vector = vec![
("+\"my token\" other", ("+\"my token\"", " other")),
("-\"my token\" other", ("-\"my token\"", " other")),
(" \"my token\" other", ("\"my token\"", " other")),
("\"my token\" other", ("\"my token\"", " other")),
("+my token other", ("+my", " token other")),
("-my token other", ("-my", " token other")),
(" my token other", ("my", " token other")),
("my token other", ("my", " token other")),
("+\"my token other", ("+\"my token other", "")),
("-\"my token other", ("-\"my token other", "")),
(" \"my token other", ("\"my token other", "")),
("\"my token other", ("\"my token other", "")),
];
for (source, res) in vector {
assert_eq!(Query::get_first_token(source), res);
}
}
#[test]
fn from_str() {
let vector = vec![
("", ""),
("a query", "a query"),
("\"a query\"", "\"a query\""),
("+a -\"query\"", "+a -query"),
("title:\"something\" a query", "a query title:something"),
("-title:\"something\" a query", "a query -title:something"),
("author:user@domain", "author:user@domain"),
("-author:@user@domain", "-author:user@domain"),
("before:2017-11-05 before:2018-01-01", "before:2017-11-05"),
("after:2017-11-05 after:2018-01-01", "after:2018-01-01"),
];
for (source, res) in vector {
assert_eq!(&Query::from_str(source).unwrap().to_string(), res);
assert_eq!(Query::new().parse_query(source).to_string(), res);
}
}
#[test]
fn setters() {
let vector = vec![
("something", "title:something"),
("+something", "+title:something"),
("-something", "-title:something"),
("+\"something\"", "+title:something"),
("+some thing", "+title:\"some thing\""),
];
for (source, res) in vector {
assert_eq!(&Query::new().title(source, None).to_string(), res);
}
let vector = vec![
("something", "author:something"),
("+something", "+author:something"),
("-something", "-author:something"),
("+\"something\"", "+author:something"),
("+@someone@somewhere", "+author:someone@somewhere"),
];
for (source, res) in vector {
assert_eq!(&Query::new().author(source, None).to_string(), res);
}
}
#[test]
fn open() {
Add support for generic timeline (#525) * Begin adding support for timeline * fix some bugs with parser * fmt * add error reporting for parser * add tests for timeline query parser * add rejection tests for parse * begin adding support for lists also run migration before compiling, so schema.rs is up to date * add sqlite migration * end adding lists still miss tests and query integration * cargo fmt * try to add some tests * Add some constraint to db, and fix list test and refactor other tests to use begin_transaction * add more tests for lists * add support for lists in query executor * add keywords for including/excluding boosts and likes * cargo fmt * add function to list lists used by query will make it easier to warn users when creating timeline with unknown lists * add lang support * add timeline creation error message when using unexisting lists * Update .po files * WIP: interface for timelines * don't use diesel for migrations not sure how it passed the ci on the other branch * add some tests for timeline add an int representing the order of timelines (first one will be on top, second just under...) use first() instead of limit(1).get().into_iter().nth(0) remove migrations from build artifacts as they are now compiled in * cargo fmt * remove timeline order * fix tests * add tests for timeline creation failure * cargo fmt * add tests for timelines * add test for matching direct lists and keywords * add test for language filtering * Add a more complex test for Timeline::matches, and fix TQ::matches for TQ::Or * Make the main crate compile + FMT * Use the new timeline system - Replace the old "feed" system with timelines - Display all timelines someone can access on their home page (either their personal ones, or instance timelines) - Remove functions that were used to get user/local/federated feed - Add new posts to timelines - Create a default timeline called "My feed" for everyone, and "Local feed"/"Federated feed" with timelines @fdb-hiroshima I don't know if that's how you pictured it? If you imagined it differently I can of course make changes. I hope I didn't forgot anything… * Cargo fmt * Try to fix the migration * Fix tests * Fix the test (for real this time ?) * Fix the tests ? + fmt * Use Kind::Like and Kind::Reshare when needed * Forgot to run cargo fmt once again * revert translations * fix reviewed stuff * reduce code duplication by macros * cargo fmt
5 years ago
let dir = temp_dir().join(format!("plume-test-{}", random_hex()));
{
Searcher::create(&dir, pool(), &CONFIG.search_tokenizers).unwrap();
Add support for generic timeline (#525) * Begin adding support for timeline * fix some bugs with parser * fmt * add error reporting for parser * add tests for timeline query parser * add rejection tests for parse * begin adding support for lists also run migration before compiling, so schema.rs is up to date * add sqlite migration * end adding lists still miss tests and query integration * cargo fmt * try to add some tests * Add some constraint to db, and fix list test and refactor other tests to use begin_transaction * add more tests for lists * add support for lists in query executor * add keywords for including/excluding boosts and likes * cargo fmt * add function to list lists used by query will make it easier to warn users when creating timeline with unknown lists * add lang support * add timeline creation error message when using unexisting lists * Update .po files * WIP: interface for timelines * don't use diesel for migrations not sure how it passed the ci on the other branch * add some tests for timeline add an int representing the order of timelines (first one will be on top, second just under...) use first() instead of limit(1).get().into_iter().nth(0) remove migrations from build artifacts as they are now compiled in * cargo fmt * remove timeline order * fix tests * add tests for timeline creation failure * cargo fmt * add tests for timelines * add test for matching direct lists and keywords * add test for language filtering * Add a more complex test for Timeline::matches, and fix TQ::matches for TQ::Or * Make the main crate compile + FMT * Use the new timeline system - Replace the old "feed" system with timelines - Display all timelines someone can access on their home page (either their personal ones, or instance timelines) - Remove functions that were used to get user/local/federated feed - Add new posts to timelines - Create a default timeline called "My feed" for everyone, and "Local feed"/"Federated feed" with timelines @fdb-hiroshima I don't know if that's how you pictured it? If you imagined it differently I can of course make changes. I hope I didn't forgot anything… * Cargo fmt * Try to fix the migration * Fix tests * Fix the test (for real this time ?) * Fix the tests ? + fmt * Use Kind::Like and Kind::Reshare when needed * Forgot to run cargo fmt once again * revert translations * fix reviewed stuff * reduce code duplication by macros * cargo fmt
5 years ago
}
Searcher::open(&dir, pool(), &CONFIG.search_tokenizers).unwrap();
}
#[test]
fn create() {
let dir = temp_dir().join(format!("plume-test-{}", random_hex()));
assert!(Searcher::open(&dir, pool(), &CONFIG.search_tokenizers).is_err());
{
Searcher::create(&dir, pool(), &CONFIG.search_tokenizers).unwrap();
}
Searcher::open(&dir, pool(), &CONFIG.search_tokenizers).unwrap(); //verify it's well created
}
#[test]
fn search() {
let conn = &db();
conn.test_transaction::<_, (), _>(|| {
let searcher = get_searcher(&CONFIG.search_tokenizers);
let blog = &fill_database(conn).1[0];
let author = &blog.list_authors(conn).unwrap()[0];
let title = random_hex()[..8].to_owned();
let mut post = Post::insert(
conn,
NewPost {
blog_id: blog.id,
slug: title.clone(),
title: title.clone(),
content: SafeString::new(""),
published: true,
license: "CC-BY-SA".to_owned(),
ap_url: "".to_owned(),
creation_date: None,
subtitle: "".to_owned(),
source: "".to_owned(),
cover_id: None,
},
&searcher,
)
.unwrap();
PostAuthor::insert(
conn,
NewPostAuthor {
post_id: post.id,
author_id: author.id,
},
)
.unwrap();
searcher.commit();
assert_eq!(
searcher.search_document(Query::from_str(&title).unwrap(), (0, 1))[0].id,
post.id
);
let newtitle = random_hex()[..8].to_owned();
post.title = newtitle.clone();
post.update(conn).unwrap();
searcher.commit();
assert_eq!(
searcher.search_document(Query::from_str(&newtitle).unwrap(), (0, 1))[0].id,
post.id
);
assert!(searcher
.search_document(Query::from_str(&title).unwrap(), (0, 1))
.is_empty());
Big refactoring of the Inbox (#443) * Big refactoring of the Inbox We now have a type that routes an activity through the registered handlers until one of them matches. Each Actor/Activity/Object combination is represented by an implementation of AsObject These combinations are then registered on the Inbox type, which will try to deserialize the incoming activity in the requested types. Advantages: - nicer syntax: the final API is clearer and more idiomatic - more generic: only two traits (`AsActor` and `AsObject`) instead of one for each kind of activity - it is easier to see which activities we handle and which one we don't * Small fixes - Avoid panics - Don't search for AP ID infinitely - Code style issues * Fix tests * Introduce a new trait: FromId It should be implemented for any AP object. It allows to look for an object in database using its AP ID, or to dereference it if it was not present in database Also moves the inbox code to plume-models to test it (and write a basic test for each activity type we handle) * Use if let instead of match * Don't require PlumeRocket::intl for tests * Return early and remove a forgotten dbg! * Add more tests to try to understand where the issues come from * Also add a test for comment federation * Don't check creation_date is the same for blogs * Make user and blog federation more tolerant to errors/missing fields * Make clippy happy * Use the correct Accept header when dereferencing * Fix follow approval with Mastodon * Add spaces to characters that should not be in usernames And validate blog names too * Smarter dereferencing: only do it once for each actor/object * Forgot some files * Cargo fmt * Delete plume_test * Delete plume_tests * Update get_id docs + Remove useless : Sized * Appease cargo fmt * Remove dbg! + Use as_ref instead of clone when possible + Use and_then instead of map when possible * Remove .po~ * send unfollow to local instance * read cover from update activity * Make sure "cc" and "to" are never empty and fix a typo in a constant name * Cargo fmt
5 years ago
post.delete(conn, &searcher).unwrap();
searcher.commit();
assert!(searcher
.search_document(Query::from_str(&newtitle).unwrap(), (0, 1))
.is_empty());
Ok(())
});
}
#[cfg(feature = "search-lindera")]
#[test]
fn search_japanese() {
let conn = &db();
conn.test_transaction::<_, (), _>(|| {
let tokenizers = SearchTokenizerConfig {
tag_tokenizer: TokenizerKind::Lindera,
content_tokenizer: TokenizerKind::Lindera,
property_tokenizer: TokenizerKind::Ngram,
};
let searcher = get_searcher(&tokenizers);
let blog = &fill_database(conn).1[0];
let title = random_hex()[..8].to_owned();
let post = Post::insert(
conn,
NewPost {
blog_id: blog.id,
slug: title.clone(),
title: title.clone(),
content: SafeString::new("ブログエンジンPlumeです。"),
published: true,
license: "CC-BY-SA".to_owned(),
ap_url: "".to_owned(),
creation_date: None,
subtitle: "".to_owned(),
source: "".to_owned(),
cover_id: None,
},
&searcher,
)
.unwrap();
searcher.commit();
assert_eq!(
searcher.search_document(conn, Query::from_str("ブログエンジン").unwrap(), (0, 1))
[0]
.id,
post.id
);
assert_eq!(
searcher.search_document(conn, Query::from_str("Plume").unwrap(), (0, 1))[0].id,
post.id
);
assert_eq!(
searcher.search_document(conn, Query::from_str("です").unwrap(), (0, 1))[0].id,
post.id
);
assert_eq!(
searcher.search_document(conn, Query::from_str("。").unwrap(), (0, 1))[0].id,
post.id
);
Ok(())
});
}
#[test]
fn drop_writer() {
let searcher = get_searcher(&CONFIG.search_tokenizers);
searcher.drop_writer();
get_searcher(&CONFIG.search_tokenizers);
}
}