AppFlowy/frontend/rust-lib/flowy-search/tests/tantivy_test.rs

use tantivy::collector::TopDocs;
use tantivy::query::QueryParser;
use tantivy::schema::*;
use tantivy::{doc, DocAddress, Index, Score};

#[test]
fn search_folder_test() {
  let mut schema_builder = Schema::builder();
  let id = schema_builder.add_text_field("id", TEXT);
  let title = schema_builder.add_text_field("title", TEXT | STORED);
  let schema = schema_builder.build();

  // Indexing documents
  let index = Index::create_from_tempdir(schema.clone()).unwrap();

  // Here we use a buffer of 100MB that will be split
  // between indexing threads.
  let mut index_writer = index.writer(100_000_000).unwrap();

  // Let's index one documents!
  index_writer
    .add_document(doc!(
        id => "123456789",
        title => "The Old Man and the Seawhale",
    ))
    .unwrap();

  // We need to call .commit() explicitly to force the
  // index_writer to finish processing the documents in the queue,
  // flush the current index to the disk, and advertise
  // the existence of new documents.
  index_writer.commit().unwrap();

  // # Searching
  let reader = index.reader().unwrap();

  let searcher = reader.searcher();

  let mut query_parser = QueryParser::for_index(&index, vec![title]);
  query_parser.set_field_fuzzy(title, true, 2, true);
  let query = query_parser.parse_query("sewhals").unwrap();

  // Perform search.
  // `topdocs` contains the 10 most relevant doc ids, sorted by decreasing scores...
  let top_docs: Vec<(Score, DocAddress)> =
    searcher.search(&query, &TopDocs::with_limit(10)).unwrap();

  for (_score, doc_address) in top_docs {
    // Retrieve the actual content of documents given its `doc_address`.
    let retrieved_doc = searcher.doc(doc_address).unwrap();
    println!("{}", schema.to_json(&retrieved_doc));
  }
}
feat: search mvp (#5064) * feat: implement folder indexer * feat: sqlite search views using fts5 * feat: add view indexing to user manager * feat: implement folder indexer * feat: add sqlite search documents * feat: add document indexing to user manager * feat: add document indexing to folder indexer * chore: update collab rev * feat: search frontend integration * refactor: search index * test: add event test * chore: fix ci * feat: initial command palette overlay impl (#4619) * chore: test search engine * chore: initial structure * chore: replace old search request * chore: enable log for lib-dispatch * chore: move search manager to core * feat: move traits and responsibility to search crate * feat: move search to search crate * feat: replace sqlite with tantivy * feat: deserialize tantivy documents * chore: fixes after rebase * chore: clean code * feat: fetch and sort results * fix: code review + cleaning * feat: support custom icons * feat: support view layout icons * feat: rename bloc and fix indexing * fix: prettify dialog * feat: score results * chore: update collab rev * feat: add recent view history to command palette * test: add integration_tests * fix: clippy changes * fix: focus traversal in cmd palette * fix: remove file after merging main * chore: code review and panic-safe * feat: index all views if index does not exist * chore: improve logic with conditional * chore: add is_empty check * chore: abstract logic from folder manager init * chore: update collab rev * chore: code review * chore: fixes after merge + update lock file * chore: revert cargo lock * fix: set icon type when removing icon * fix: code review + dependency inversion * fix: remove icon fix for not persisting icon type * test: simple tests manipulating views * test: create 100 views * fix: tauri build * chore: create 1000 views * chore: create util methods * chore: test * chore: test * chore: remove logs * chore: fix build.rs * chore: export models * chore: enable clear cache on Rust-CI * fix: navigate to newly created views * fix: force disable setting workspace listener on rebuilds * fix: remove late final * fix: missing returns * fix: localization and minor fixes * test: add index assert to large test * fix: missing section param after merging main * chore: try fix unzip file error * chore: lower the test * feat: show hint when result is in trash * feat: one index_writer per index * fix: minor changes after merge * fix: make create_log_filter public after merge * chore: fix test * chore: fix test * chore: flutter analyze * chore: flutter analyze * chore: fix tauri build --------- Co-authored-by: nathan <nathan@appflowy.io> Co-authored-by: Lucas.Xu <lucas.xu@appflowy.io> Co-authored-by: Nathan.fooo <86001920+appflowy@users.noreply.github.com> 2024-04-12 08:21:41 +00:00			`use tantivy::collector::TopDocs;`
			`use tantivy::query::QueryParser;`
			`use tantivy::schema::*;`
			`use tantivy::{doc, DocAddress, Index, Score};`

			`#[test]`
			`fn search_folder_test() {`
			`let mut schema_builder = Schema::builder();`
			`let id = schema_builder.add_text_field("id", TEXT);`
			`let title = schema_builder.add_text_field("title", TEXT \| STORED);`
			`let schema = schema_builder.build();`

			`// Indexing documents`
			`let index = Index::create_from_tempdir(schema.clone()).unwrap();`

			`// Here we use a buffer of 100MB that will be split`
			`// between indexing threads.`
			`let mut index_writer = index.writer(100_000_000).unwrap();`

			`// Let's index one documents!`
			`index_writer`
			`.add_document(doc!(`
			`id => "123456789",`
			`title => "The Old Man and the Seawhale",`
			`))`
			`.unwrap();`

			`// We need to call .commit() explicitly to force the`
			`// index_writer to finish processing the documents in the queue,`
			`// flush the current index to the disk, and advertise`
			`// the existence of new documents.`
			`index_writer.commit().unwrap();`

			`// # Searching`
			`let reader = index.reader().unwrap();`

			`let searcher = reader.searcher();`

			`let mut query_parser = QueryParser::for_index(&index, vec![title]);`
			`query_parser.set_field_fuzzy(title, true, 2, true);`
			`let query = query_parser.parse_query("sewhals").unwrap();`

			`// Perform search.`
			// `topdocs` contains the 10 most relevant doc ids, sorted by decreasing scores...
			`let top_docs: Vec<(Score, DocAddress)> =`
			`searcher.search(&query, &TopDocs::with_limit(10)).unwrap();`

			`for (_score, doc_address) in top_docs {`
			// Retrieve the actual content of documents given its `doc_address`.
			`let retrieved_doc = searcher.doc(doc_address).unwrap();`
			`println!("{}", schema.to_json(&retrieved_doc));`
			`}`
			`}`