use crate::rev_queue::{RevCommandSender, RevisionCommand, RevisionQueue}; use crate::{ RevisionPersistence, RevisionSnapshotController, RevisionSnapshotData, RevisionSnapshotPersistence, WSDataProviderDataSource, }; use bytes::Bytes; use flowy_error::{internal_error, FlowyError, FlowyResult}; use lib_infra::future::FutureResult; use lib_infra::util::md5; use revision_model::{Revision, RevisionRange}; use std::sync::atomic::AtomicI64; use std::sync::atomic::Ordering::SeqCst; use std::sync::Arc; use tokio::sync::{mpsc, oneshot}; pub trait RevisionCloudService: Send + Sync { /// Read the object's revision from remote /// Returns a list of revisions that used to build the object /// # Arguments /// /// * `user_id`: the id of the user /// * `object_id`: the id of the object /// fn fetch_object(&self, user_id: &str, object_id: &str) -> FutureResult, FlowyError>; } pub trait RevisionObjectDeserializer: Send + Sync { type Output; /// Deserialize the list of revisions into an concrete object type. /// /// # Arguments /// /// * `object_id`: the id of the object /// * `revisions`: a list of revisions that represent the object /// fn deserialize_revisions(object_id: &str, revisions: Vec) -> FlowyResult; fn recover_from_revisions(revisions: Vec) -> Option<(Self::Output, i64)>; } pub trait RevisionObjectSerializer: Send + Sync { /// Serialize a list of revisions into one in `Bytes` format /// /// * `revisions`: a list of revisions will be serialized to `Bytes` /// fn combine_revisions(revisions: Vec) -> FlowyResult; } /// `RevisionCompress` is used to compress multiple revisions into one revision /// pub trait RevisionMergeable: Send + Sync { fn merge_revisions( &self, _user_id: &str, object_id: &str, mut revisions: Vec, ) -> FlowyResult { if revisions.is_empty() { return Err(FlowyError::internal().context("Can't compact the empty revisions")); } if revisions.len() == 1 { return Ok(revisions.pop().unwrap()); } // Select the last version, making sure version numbers don't overlap let last_revision = revisions.last().unwrap(); let (base_rev_id, rev_id) = last_revision.pair_rev_id(); let md5 = last_revision.md5.clone(); let bytes = self.combine_revisions(revisions)?; Ok(Revision::new(object_id, base_rev_id, rev_id, bytes, md5)) } fn combine_revisions(&self, revisions: Vec) -> FlowyResult; } pub struct RevisionManager { pub object_id: String, user_id: String, rev_id_counter: Arc, rev_persistence: Arc>, rev_snapshot: Arc>, rev_compress: Arc, #[cfg(feature = "flowy_unit_test")] rev_ack_notifier: tokio::sync::broadcast::Sender, rev_queue: RevCommandSender, } impl RevisionManager { pub fn new( user_id: &str, object_id: &str, rev_persistence: RevisionPersistence, rev_compress: Compress, snapshot_persistence: Snapshot, ) -> Self where Snapshot: 'static + RevisionSnapshotPersistence, Compress: 'static + RevisionMergeable, { let rev_id_counter = Arc::new(RevIdCounter::new(0)); let rev_compress = Arc::new(rev_compress); let rev_persistence = Arc::new(rev_persistence); let rev_snapshot = RevisionSnapshotController::new( user_id, object_id, snapshot_persistence, rev_id_counter.clone(), rev_persistence.clone(), rev_compress.clone(), ); let (rev_queue, receiver) = mpsc::channel(1000); let queue = RevisionQueue::new( object_id.to_owned(), rev_id_counter.clone(), rev_persistence.clone(), rev_compress.clone(), receiver, ); tokio::spawn(queue.run()); Self { object_id: object_id.to_string(), user_id: user_id.to_owned(), rev_id_counter, rev_persistence, rev_snapshot: Arc::new(rev_snapshot), rev_compress, #[cfg(feature = "flowy_unit_test")] rev_ack_notifier: tokio::sync::broadcast::channel(1).0, rev_queue, } } #[tracing::instrument(name = "revision_manager_initialize", level = "trace", skip_all, fields(deserializer, object_id, deserialize_revisions) err)] pub async fn initialize( &mut self, _cloud: Option>, ) -> FlowyResult where De: RevisionObjectDeserializer, { let revision_records = self.rev_persistence.load_all_records(&self.object_id)?; tracing::Span::current().record("object_id", self.object_id.as_str()); tracing::Span::current().record("deserializer", std::any::type_name::()); let revisions: Vec = revision_records .iter() .map(|record| record.revision.clone()) .collect(); tracing::Span::current().record("deserialize_revisions", revisions.len()); let last_rev_id = revisions .last() .as_ref() .map(|revision| revision.rev_id) .unwrap_or(0); match De::deserialize_revisions(&self.object_id, revisions.clone()) { Ok(object) => { self .rev_persistence .sync_revision_records(&revision_records) .await?; self.rev_id_counter.set(last_rev_id); Ok(object) }, Err(e) => match self.rev_snapshot.restore_from_snapshot::(last_rev_id) { None => { tracing::info!("[Restore] iterate restore from each revision"); let (output, recover_rev_id) = De::recover_from_revisions(revisions).ok_or(e)?; tracing::info!( "[Restore] last_rev_id:{}, recover_rev_id: {}", last_rev_id, recover_rev_id ); self.rev_id_counter.set(recover_rev_id); // delete the revisions whose rev_id is greater than recover_rev_id if recover_rev_id < last_rev_id { let range = RevisionRange { start: recover_rev_id + 1, end: last_rev_id, }; tracing::info!("[Restore] delete revisions in range: {}", range); let _ = self.rev_persistence.delete_revisions_from_range(range); } Ok(output) }, Some((object, snapshot_rev)) => { let snapshot_rev_id = snapshot_rev.rev_id; let _ = self.rev_persistence.reset(vec![snapshot_rev]).await; // revision_records.retain(|record| record.revision.rev_id <= snapshot_rev_id); // let _ = self.rev_persistence.sync_revision_records(&revision_records).await?; self.rev_id_counter.set(snapshot_rev_id); Ok(object) }, }, } } pub async fn close(&self) { let _ = self .rev_persistence .merge_lagging_revisions(&self.rev_compress) .await; } pub async fn generate_snapshot(&self) { self.rev_snapshot.generate_snapshot().await; } pub async fn read_snapshot( &self, rev_id: Option, ) -> FlowyResult> { match rev_id { None => self.rev_snapshot.read_last_snapshot(), Some(rev_id) => self.rev_snapshot.read_snapshot(rev_id), } } pub async fn load_revisions(&self) -> FlowyResult> { let revisions = RevisionLoader { object_id: self.object_id.clone(), user_id: self.user_id.clone(), cloud: None, rev_persistence: self.rev_persistence.clone(), } .load_revisions() .await?; Ok(revisions) } #[tracing::instrument(level = "trace", skip(self, revisions), err)] pub async fn reset_object(&self, revisions: Vec) -> FlowyResult<()> { let rev_id = pair_rev_id_from_revisions(&revisions).1; self.rev_persistence.reset(revisions).await?; self.rev_id_counter.set(rev_id); Ok(()) } #[tracing::instrument(level = "debug", skip(self, revision), err)] pub async fn add_remote_revision(&self, revision: &Revision) -> Result<(), FlowyError> { if revision.bytes.is_empty() { return Err(FlowyError::internal().context("Remote revisions is empty")); } self.rev_persistence.add_ack_revision(revision).await?; self.rev_id_counter.set(revision.rev_id); Ok(()) } /// Adds the revision that generated by user editing // #[tracing::instrument(level = "trace", skip_all, err)] pub async fn add_local_revision( &self, data: Bytes, object_md5: String, ) -> Result { if data.is_empty() { return Err(FlowyError::internal().context("The data of the revisions is empty")); } self.rev_snapshot.generate_snapshot_if_need(); let (ret, rx) = oneshot::channel(); self .rev_queue .send(RevisionCommand::RevisionData { data, object_md5, ret, }) .await .map_err(internal_error)?; rx.await.map_err(internal_error)? } #[tracing::instrument(level = "debug", skip(self), err)] pub async fn ack_revision(&self, rev_id: i64) -> Result<(), FlowyError> { if self.rev_persistence.ack_revision(rev_id).await.is_ok() { #[cfg(feature = "flowy_unit_test")] let _ = self.rev_ack_notifier.send(rev_id); } Ok(()) } /// Returns the current revision id pub fn rev_id(&self) -> i64 { self.rev_id_counter.value() } pub async fn next_sync_rev_id(&self) -> Option { self.rev_persistence.next_sync_rev_id().await } pub fn next_rev_id_pair(&self) -> (i64, i64) { let cur = self.rev_id_counter.value(); let next = self.rev_id_counter.next_id(); (cur, next) } pub fn number_of_sync_revisions(&self) -> usize { self.rev_persistence.number_of_sync_records() } pub fn number_of_revisions_in_disk(&self) -> usize { self.rev_persistence.number_of_records_in_disk() } pub async fn get_revisions_in_range( &self, range: RevisionRange, ) -> Result, FlowyError> { let revisions = self.rev_persistence.revisions_in_range(&range).await?; Ok(revisions) } pub async fn next_sync_revision(&self) -> FlowyResult> { self.rev_persistence.next_sync_revision().await } pub async fn get_revision(&self, rev_id: i64) -> Option { self .rev_persistence .get(rev_id) .await .map(|record| record.revision) } } impl WSDataProviderDataSource for Arc> { fn next_revision(&self) -> FutureResult, FlowyError> { let rev_manager = self.clone(); FutureResult::new(async move { rev_manager.next_sync_revision().await }) } fn ack_revision(&self, rev_id: i64) -> FutureResult<(), FlowyError> { let rev_manager = self.clone(); FutureResult::new(async move { (*rev_manager).ack_revision(rev_id).await }) } fn current_rev_id(&self) -> i64 { self.rev_id() } } #[cfg(feature = "flowy_unit_test")] impl RevisionManager { pub async fn revision_cache(&self) -> Arc> { self.rev_persistence.clone() } pub fn ack_notify(&self) -> tokio::sync::broadcast::Receiver { self.rev_ack_notifier.subscribe() } pub fn get_all_revision_records( &self, ) -> FlowyResult> { self.rev_persistence.load_all_records(&self.object_id) } } pub struct RevisionLoader { pub object_id: String, pub user_id: String, pub cloud: Option>, pub rev_persistence: Arc>, } impl RevisionLoader { pub async fn load_revisions(&self) -> Result, FlowyError> { let records = self.rev_persistence.load_all_records(&self.object_id)?; let revisions = records .into_iter() .map(|record| record.revision) .collect::<_>(); Ok(revisions) } } /// Represents as the md5 of the revision object after applying the /// revision. For example, RevisionMD5 will be the md5 of the document /// content. #[derive(Debug, Clone)] pub struct RevisionMD5(String); impl RevisionMD5 { pub fn from_bytes>(bytes: T) -> Result { Ok(RevisionMD5(md5(bytes))) } pub fn into_inner(self) -> String { self.0 } pub fn is_equal(&self, s: &str) -> bool { self.0 == s } } impl std::convert::From for String { fn from(md5: RevisionMD5) -> Self { md5.0 } } impl std::convert::From<&str> for RevisionMD5 { fn from(s: &str) -> Self { Self(s.to_owned()) } } impl std::convert::From for RevisionMD5 { fn from(s: String) -> Self { Self(s) } } impl std::ops::Deref for RevisionMD5 { type Target = String; fn deref(&self) -> &Self::Target { &self.0 } } impl PartialEq for RevisionMD5 { fn eq(&self, other: &Self) -> bool { self.0 == other.0 } } impl std::cmp::Eq for RevisionMD5 {} fn pair_rev_id_from_revisions(revisions: &[Revision]) -> (i64, i64) { let mut rev_id = 0; revisions.iter().for_each(|revision| { if rev_id < revision.rev_id { rev_id = revision.rev_id; } }); if rev_id > 0 { (rev_id - 1, rev_id) } else { (0, rev_id) } } #[derive(Debug)] pub struct RevIdCounter(pub AtomicI64); impl RevIdCounter { pub fn new(n: i64) -> Self { Self(AtomicI64::new(n)) } pub fn next_id(&self) -> i64 { let _ = self.0.fetch_add(1, SeqCst); self.value() } pub fn value(&self) -> i64 { self.0.load(SeqCst) } pub fn set(&self, n: i64) { let _ = self.0.fetch_update(SeqCst, SeqCst, |_| Some(n)); } }