fix: search improvements (#5473)

* fix: search workspace sync indexing

* chore: update collab rev temporarily

* feat: revert comparison and implement index check

* chore: fixes after merg

* feat: clean code + support delete workspace

* fix: improve code

* fix: improvements after merge

* fix: cargo fmt

* fix: remove indices for workspace method

* fix: clippy errors

* fix: clippy too many arguments
This commit is contained in:
Mathias Mogensen
2024-06-05 13:44:32 +02:00
committed by GitHub
parent 6e7d044208
commit bd7977d8ba
16 changed files with 240 additions and 124 deletions

View File

@ -15,15 +15,15 @@ use crate::{
},
};
use collab::core::collab::{IndexContent, IndexContentReceiver};
use collab_folder::{View, ViewIcon, ViewIndexContent, ViewLayout};
use collab_folder::{folder_diff::FolderViewChange, View, ViewIcon, ViewIndexContent, ViewLayout};
use flowy_error::{FlowyError, FlowyResult};
use flowy_search_pub::entities::{FolderIndexManager, IndexManager, IndexableData};
use flowy_user::services::authenticate_user::AuthenticateUser;
use lib_dispatch::prelude::af_spawn;
use strsim::levenshtein;
use tantivy::{
collector::TopDocs, directory::MmapDirectory, doc, query::QueryParser, Index, IndexReader,
IndexWriter, Term,
collector::TopDocs, directory::MmapDirectory, doc, query::QueryParser, schema::Field, Index,
IndexReader, IndexWriter, Term,
};
use super::entities::FolderIndexData;
@ -40,7 +40,6 @@ const FOLDER_INDEX_DIR: &str = "folder_index";
impl FolderIndexManagerImpl {
pub fn new(auth_user: Option<Weak<AuthenticateUser>>) -> Self {
// TODO(Mathias): Temporarily disable seaerch
let auth_user = match auth_user {
Some(auth_user) => auth_user,
None => {
@ -74,52 +73,45 @@ impl FolderIndexManagerImpl {
}
}
// We open the existing or newly created folder_index directory
// This is required by the Tantivy Index, as it will use it to store
// and read index data
let dir = MmapDirectory::open(index_path);
if let Err(e) = dir {
tracing::error!("FolderIndexManager failed to open index directory: {:?}", e);
return FolderIndexManagerImpl::empty();
}
// The folder schema is used to define the fields of the index along
// with how they are stored and if the field is indexed
let folder_schema = FolderSchema::new();
// We open or create an index that takes the directory r/w and the schema.
let index_res = Index::open_or_create(dir.unwrap(), folder_schema.schema.clone());
if let Err(e) = index_res {
tracing::error!("FolderIndexManager failed to open index: {:?}", e);
return FolderIndexManagerImpl::empty();
}
// We open the existing or newly created folder_index directory
// This is required by the Tantivy Index, as it will use it to store
// and read index data
let index = match MmapDirectory::open(index_path) {
// We open or create an index that takes the directory r/w and the schema.
Ok(dir) => match Index::open_or_create(dir, folder_schema.schema.clone()) {
Ok(index) => index,
Err(e) => {
tracing::error!("FolderIndexManager failed to open index: {:?}", e);
return FolderIndexManagerImpl::empty();
},
},
Err(e) => {
tracing::error!("FolderIndexManager failed to open index directory: {:?}", e);
return FolderIndexManagerImpl::empty();
},
};
let index = index_res.unwrap();
// We read the index reader, we only need one IndexReader per index
// We only need one IndexReader per index
let index_reader = index.reader();
if let Err(e) = index_reader {
tracing::error!(
"FolderIndexManager failed to instantiate index reader: {:?}",
e
);
return FolderIndexManagerImpl::empty();
}
let index_writer = index.writer(50_000_000);
if let Err(e) = index_writer {
tracing::error!(
"FolderIndexManager failed to instantiate index writer: {:?}",
e
);
return FolderIndexManagerImpl::empty();
}
let (index_reader, index_writer) = match (index_reader, index_writer) {
(Ok(reader), Ok(writer)) => (reader, writer),
_ => {
tracing::error!("FolderIndexManager failed to instantiate index writer and/or reader");
return FolderIndexManagerImpl::empty();
},
};
Self {
folder_schema: Some(folder_schema),
index: Some(index),
index_reader: Some(index_reader.unwrap()),
index_writer: Some(Arc::new(Mutex::new(index_writer.unwrap()))),
index_reader: Some(index_reader),
index_writer: Some(Arc::new(Mutex::new(index_writer))),
}
}
@ -129,7 +121,6 @@ impl FolderIndexManagerImpl {
}
let mut index_writer = self.get_index_writer()?;
let folder_schema = self.get_folder_schema()?;
let id_field = folder_schema.schema.get_field(FOLDER_ID_FIELD_NAME)?;
@ -223,15 +214,11 @@ impl FolderIndexManagerImpl {
) -> Result<Vec<SearchResultPB>, FlowyError> {
let folder_schema = self.get_folder_schema()?;
let index = match &self.index {
Some(index) => index,
None => return Err(FlowyError::folder_index_manager_unavailable()),
};
let index_reader = match &self.index_reader {
Some(index_reader) => index_reader,
None => return Err(FlowyError::folder_index_manager_unavailable()),
};
let (index, index_reader) = self
.index
.as_ref()
.zip(self.index_reader.as_ref())
.ok_or_else(FlowyError::folder_index_manager_unavailable)?;
let title_field = folder_schema.schema.get_field(FOLDER_TITLE_FIELD_NAME)?;
@ -272,6 +259,29 @@ impl FolderIndexManagerImpl {
let distance = levenshtein(query, term) as f64;
1.0 / (distance + 1.0)
}
fn get_schema_fields(&self) -> Result<(Field, Field, Field, Field, Field), FlowyError> {
let folder_schema = match self.folder_schema.clone() {
Some(schema) => schema,
_ => return Err(FlowyError::folder_index_manager_unavailable()),
};
let id_field = folder_schema.schema.get_field(FOLDER_ID_FIELD_NAME)?;
let title_field = folder_schema.schema.get_field(FOLDER_TITLE_FIELD_NAME)?;
let icon_field = folder_schema.schema.get_field(FOLDER_ICON_FIELD_NAME)?;
let icon_ty_field = folder_schema.schema.get_field(FOLDER_ICON_TY_FIELD_NAME)?;
let workspace_id_field = folder_schema
.schema
.get_field(FOLDER_WORKSPACE_ID_FIELD_NAME)?;
Ok((
id_field,
title_field,
icon_field,
icon_ty_field,
workspace_id_field,
))
}
}
impl IndexManager for FolderIndexManagerImpl {
@ -288,9 +298,12 @@ impl IndexManager for FolderIndexManagerImpl {
let wid = workspace_id.clone();
af_spawn(async move {
while let Ok(msg) = rx.recv().await {
tracing::warn!("[Indexer] Message received: {:?}", msg);
match msg {
IndexContent::Create(value) => match serde_json::from_value::<ViewIndexContent>(value) {
Ok(view) => {
tracing::warn!("[Indexer] CREATE: {:?}", view);
let _ = indexer.add_index(IndexableData {
id: view.id,
data: view.name,
@ -303,6 +316,7 @@ impl IndexManager for FolderIndexManagerImpl {
},
IndexContent::Update(value) => match serde_json::from_value::<ViewIndexContent>(value) {
Ok(view) => {
tracing::warn!("[Indexer] UPDATE: {:?}", view);
let _ = indexer.update_index(IndexableData {
id: view.id,
data: view.name,
@ -314,6 +328,7 @@ impl IndexManager for FolderIndexManagerImpl {
Err(err) => tracing::error!("FolderIndexManager error deserialize: {:?}", err),
},
IndexContent::Delete(ids) => {
tracing::warn!("[Indexer] DELETE: {:?}", ids);
if let Err(e) = indexer.remove_indices(ids) {
tracing::error!("FolderIndexManager error deserialize: {:?}", e);
}
@ -326,15 +341,8 @@ impl IndexManager for FolderIndexManagerImpl {
fn update_index(&self, data: IndexableData) -> Result<(), FlowyError> {
let mut index_writer = self.get_index_writer()?;
let folder_schema = self.get_folder_schema()?;
let id_field = folder_schema.schema.get_field(FOLDER_ID_FIELD_NAME)?;
let title_field = folder_schema.schema.get_field(FOLDER_TITLE_FIELD_NAME)?;
let icon_field = folder_schema.schema.get_field(FOLDER_ICON_FIELD_NAME)?;
let icon_ty_field: tantivy::schema::Field =
folder_schema.schema.get_field(FOLDER_ICON_TY_FIELD_NAME)?;
let workspace_id_field = folder_schema
.schema
.get_field(FOLDER_WORKSPACE_ID_FIELD_NAME)?;
let (id_field, title_field, icon_field, icon_ty_field, workspace_id_field) =
self.get_schema_fields()?;
let delete_term = Term::from_field_text(id_field, &data.id.clone());
@ -361,7 +369,6 @@ impl IndexManager for FolderIndexManagerImpl {
let mut index_writer = self.get_index_writer()?;
let folder_schema = self.get_folder_schema()?;
let id_field = folder_schema.schema.get_field(FOLDER_ID_FIELD_NAME)?;
for id in ids {
let delete_term = Term::from_field_text(id_field, &id);
@ -376,15 +383,8 @@ impl IndexManager for FolderIndexManagerImpl {
fn add_index(&self, data: IndexableData) -> Result<(), FlowyError> {
let mut index_writer = self.get_index_writer()?;
let folder_schema = self.get_folder_schema()?;
let id_field = folder_schema.schema.get_field(FOLDER_ID_FIELD_NAME)?;
let title_field = folder_schema.schema.get_field(FOLDER_TITLE_FIELD_NAME)?;
let icon_field = folder_schema.schema.get_field(FOLDER_ICON_FIELD_NAME)?;
let icon_ty_field = folder_schema.schema.get_field(FOLDER_ICON_TY_FIELD_NAME)?;
let workspace_id_field = folder_schema
.schema
.get_field(FOLDER_WORKSPACE_ID_FIELD_NAME)?;
let (id_field, title_field, icon_field, icon_ty_field, workspace_id_field) =
self.get_schema_fields()?;
let (icon, icon_ty) = self.extract_icon(data.icon, data.layout);
@ -402,6 +402,24 @@ impl IndexManager for FolderIndexManagerImpl {
Ok(())
}
/// Removes all indexes that are related by workspace id. This is useful
/// for cleaning indexes when eg. removing/leaving a workspace.
///
fn remove_indices_for_workspace(&self, workspace_id: String) -> Result<(), FlowyError> {
let mut index_writer = self.get_index_writer()?;
let folder_schema = self.get_folder_schema()?;
let id_field = folder_schema
.schema
.get_field(FOLDER_WORKSPACE_ID_FIELD_NAME)?;
let delete_term = Term::from_field_text(id_field, &workspace_id);
index_writer.delete_term(delete_term);
index_writer.commit()?;
Ok(())
}
fn as_any(&self) -> &dyn Any {
self
}
@ -416,4 +434,35 @@ impl FolderIndexManager for FolderIndexManagerImpl {
let _ = self.index_all(indexable_data);
}
fn index_view_changes(
&self,
views: Vec<Arc<View>>,
changes: Vec<FolderViewChange>,
workspace_id: String,
) {
let mut views_iter = views.into_iter();
for change in changes {
match change {
FolderViewChange::Inserted { view_id } => {
let view = views_iter.find(|view| view.id == view_id);
if let Some(view) = view {
let indexable_data = IndexableData::from_view(view, workspace_id.clone());
let _ = self.add_index(indexable_data);
}
},
FolderViewChange::Updated { view_id } => {
let view = views_iter.find(|view| view.id == view_id);
if let Some(view) = view {
let indexable_data = IndexableData::from_view(view, workspace_id.clone());
let _ = self.update_index(indexable_data);
}
},
FolderViewChange::Deleted { view_ids } => {
tracing::warn!("[Indexer] ViewChange Reached Deleted: {:?}", view_ids);
let _ = self.remove_indices(view_ids);
},
};
}
}
}