mirror of
https://github.com/AppFlowy-IO/AppFlowy.git
synced 2024-08-30 18:12:39 +00:00
refactor: File upload (#5542)
* chore: rename service * refactor: upload * chore: save upload meta data * chore: add sql test * chore: uploader * chore: fix upload * chore: cache file and remove after finish * chore: retry upload * chore: pause when netowork unreachable * chore: add event test * chore: add test * chore: clippy * chore: update client-api commit id * chore: fix flutter test
This commit is contained in:
19
frontend/rust-lib/flowy-storage-pub/Cargo.toml
Normal file
19
frontend/rust-lib/flowy-storage-pub/Cargo.toml
Normal file
@ -0,0 +1,19 @@
|
||||
[package]
|
||||
name = "flowy-storage-pub"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
lib-infra.workspace = true
|
||||
serde_json.workspace = true
|
||||
serde.workspace = true
|
||||
async-trait.workspace = true
|
||||
mime = "0.3.17"
|
||||
flowy-error = { workspace = true, features = ["impl_from_reqwest"] }
|
||||
bytes.workspace = true
|
||||
mime_guess = "2.0.4"
|
||||
client-api-entity = { workspace = true }
|
||||
tokio = { workspace = true, features = ["sync", "io-util"] }
|
||||
anyhow = "1.0.86"
|
239
frontend/rust-lib/flowy-storage-pub/src/chunked_byte.rs
Normal file
239
frontend/rust-lib/flowy-storage-pub/src/chunked_byte.rs
Normal file
@ -0,0 +1,239 @@
|
||||
use anyhow::anyhow;
|
||||
use bytes::Bytes;
|
||||
use std::fmt::Display;
|
||||
use std::ops::Deref;
|
||||
use std::path::Path;
|
||||
|
||||
use tokio::io::AsyncReadExt;
|
||||
|
||||
/// In Amazon S3, the minimum chunk size for multipart uploads is 5 MB,except for the last part,
|
||||
/// which can be smaller.(https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html)
|
||||
pub const MIN_CHUNK_SIZE: usize = 5 * 1024 * 1024; // Minimum Chunk Size 5 MB
|
||||
pub struct ChunkedBytes {
|
||||
pub data: Bytes,
|
||||
pub chunk_size: i32,
|
||||
pub offsets: Vec<(usize, usize)>,
|
||||
pub current_offset: i32,
|
||||
}
|
||||
|
||||
impl Deref for ChunkedBytes {
|
||||
type Target = Bytes;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.data
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ChunkedBytes {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"ChunkedBytes: chunk_size: {}, offsets: {:?}, current_offset: {}",
|
||||
self.chunk_size, self.offsets, self.current_offset
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl ChunkedBytes {
|
||||
pub fn from_bytes_with_chunk_size(data: Bytes, chunk_size: i32) -> Result<Self, anyhow::Error> {
|
||||
if chunk_size < MIN_CHUNK_SIZE as i32 {
|
||||
return Err(anyhow!(
|
||||
"Chunk size should be greater than or equal to {}",
|
||||
MIN_CHUNK_SIZE
|
||||
));
|
||||
}
|
||||
|
||||
let offsets = split_into_chunks(&data, chunk_size as usize);
|
||||
Ok(ChunkedBytes {
|
||||
data,
|
||||
offsets,
|
||||
chunk_size,
|
||||
current_offset: 0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Used to create a `ChunkedBytes` from a `Bytes` object. The default chunk size is 5 MB.
|
||||
pub fn from_bytes(data: Bytes) -> Self {
|
||||
let chunk_size = MIN_CHUNK_SIZE as i32;
|
||||
let offsets = split_into_chunks(&data, MIN_CHUNK_SIZE);
|
||||
ChunkedBytes {
|
||||
data,
|
||||
offsets,
|
||||
chunk_size,
|
||||
current_offset: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn from_file<P: AsRef<Path>>(
|
||||
file_path: P,
|
||||
chunk_size: i32,
|
||||
) -> Result<Self, tokio::io::Error> {
|
||||
let mut file = tokio::fs::File::open(file_path).await?;
|
||||
let mut buffer = Vec::new();
|
||||
file.read_to_end(&mut buffer).await?;
|
||||
let data = Bytes::from(buffer);
|
||||
|
||||
let offsets = split_into_chunks(&data, chunk_size as usize);
|
||||
Ok(ChunkedBytes {
|
||||
data,
|
||||
offsets,
|
||||
chunk_size,
|
||||
current_offset: 0,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn set_current_offset(&mut self, offset: i32) {
|
||||
self.current_offset = offset;
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> ChunkedBytesIterator {
|
||||
ChunkedBytesIterator {
|
||||
chunked_data: self,
|
||||
current_index: self.current_offset as usize,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ChunkedBytesIterator<'a> {
|
||||
chunked_data: &'a ChunkedBytes,
|
||||
current_index: usize,
|
||||
}
|
||||
impl<'a> Iterator for ChunkedBytesIterator<'a> {
|
||||
type Item = Bytes;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.current_index >= self.chunked_data.offsets.len() {
|
||||
None
|
||||
} else {
|
||||
let (start, end) = self.chunked_data.offsets[self.current_index];
|
||||
self.current_index += 1;
|
||||
Some(self.chunked_data.data.slice(start..end))
|
||||
}
|
||||
}
|
||||
}
|
||||
// Function to split input bytes into several chunks and return offsets
|
||||
pub fn split_into_chunks(data: &Bytes, chunk_size: usize) -> Vec<(usize, usize)> {
|
||||
let mut offsets = Vec::new();
|
||||
let mut start = 0;
|
||||
|
||||
while start < data.len() {
|
||||
let end = std::cmp::min(start + chunk_size, data.len());
|
||||
offsets.push((start, end));
|
||||
start = end;
|
||||
}
|
||||
offsets
|
||||
}
|
||||
|
||||
// Function to get chunk data using chunk number
|
||||
pub async fn get_chunk(
|
||||
data: Bytes,
|
||||
chunk_number: usize,
|
||||
offsets: &[(usize, usize)],
|
||||
) -> Result<Bytes, anyhow::Error> {
|
||||
if chunk_number >= offsets.len() {
|
||||
return Err(anyhow!("Chunk number out of range"));
|
||||
}
|
||||
|
||||
let (start, end) = offsets[chunk_number];
|
||||
let chunk = data.slice(start..end);
|
||||
|
||||
Ok(chunk)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::chunked_byte::{ChunkedBytes, MIN_CHUNK_SIZE};
|
||||
use bytes::Bytes;
|
||||
use std::env::temp_dir;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_chunked_bytes_less_than_chunk_size() {
|
||||
let data = Bytes::from(vec![0; 1024 * 1024]); // 1 MB of zeroes
|
||||
let chunked_data =
|
||||
ChunkedBytes::from_bytes_with_chunk_size(data.clone(), MIN_CHUNK_SIZE as i32).unwrap();
|
||||
|
||||
// Check if the offsets are correct
|
||||
assert_eq!(chunked_data.offsets.len(), 1); // Should have 1 chunk
|
||||
assert_eq!(chunked_data.offsets[0], (0, 1024 * 1024));
|
||||
|
||||
// Check if the data can be iterated correctly
|
||||
let mut iter = chunked_data.iter();
|
||||
assert_eq!(iter.next().unwrap().len(), 1024 * 1024);
|
||||
assert!(iter.next().is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_chunked_bytes_from_bytes() {
|
||||
let data = Bytes::from(vec![0; 15 * 1024 * 1024]); // 15 MB of zeroes
|
||||
let chunked_data =
|
||||
ChunkedBytes::from_bytes_with_chunk_size(data.clone(), MIN_CHUNK_SIZE as i32).unwrap();
|
||||
|
||||
// Check if the offsets are correct
|
||||
assert_eq!(chunked_data.offsets.len(), 3); // Should have 3 chunks
|
||||
assert_eq!(chunked_data.offsets[0], (0, 5 * 1024 * 1024));
|
||||
assert_eq!(chunked_data.offsets[1], (5 * 1024 * 1024, 10 * 1024 * 1024));
|
||||
assert_eq!(
|
||||
chunked_data.offsets[2],
|
||||
(10 * 1024 * 1024, 15 * 1024 * 1024)
|
||||
);
|
||||
|
||||
// Check if the data can be iterated correctly
|
||||
let mut iter = chunked_data.iter();
|
||||
assert_eq!(iter.next().unwrap().len(), 5 * 1024 * 1024);
|
||||
assert_eq!(iter.next().unwrap().len(), 5 * 1024 * 1024);
|
||||
assert_eq!(iter.next().unwrap().len(), 5 * 1024 * 1024);
|
||||
assert!(iter.next().is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_chunked_bytes_from_file() {
|
||||
// Create a temporary file with 15 MB of zeroes
|
||||
let mut file_path = temp_dir();
|
||||
file_path.push("test_file");
|
||||
|
||||
let mut file = tokio::fs::File::create(&file_path).await.unwrap();
|
||||
file.write_all(&vec![0; 15 * 1024 * 1024]).await.unwrap();
|
||||
file.flush().await.unwrap();
|
||||
|
||||
// Read the file into ChunkedBytes
|
||||
let chunked_data = ChunkedBytes::from_file(&file_path, MIN_CHUNK_SIZE as i32)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Check if the offsets are correct
|
||||
assert_eq!(chunked_data.offsets.len(), 3); // Should have 3 chunks
|
||||
assert_eq!(chunked_data.offsets[0], (0, 5 * 1024 * 1024));
|
||||
assert_eq!(chunked_data.offsets[1], (5 * 1024 * 1024, 10 * 1024 * 1024));
|
||||
assert_eq!(
|
||||
chunked_data.offsets[2],
|
||||
(10 * 1024 * 1024, 15 * 1024 * 1024)
|
||||
);
|
||||
|
||||
// Check if the data can be iterated correctly
|
||||
let mut iter = chunked_data.iter();
|
||||
assert_eq!(iter.next().unwrap().len(), 5 * 1024 * 1024);
|
||||
assert_eq!(iter.next().unwrap().len(), 5 * 1024 * 1024);
|
||||
assert_eq!(iter.next().unwrap().len(), 5 * 1024 * 1024);
|
||||
assert!(iter.next().is_none());
|
||||
|
||||
// Clean up the temporary file
|
||||
tokio::fs::remove_file(file_path).await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_chunked_bytes_with_current_offset() {
|
||||
let data = Bytes::from(vec![0; 15 * 1024 * 1024]); // 15 MB of zeroes
|
||||
let mut chunked_data =
|
||||
ChunkedBytes::from_bytes_with_chunk_size(data.clone(), MIN_CHUNK_SIZE as i32).unwrap();
|
||||
|
||||
// Set the current offset to the second chunk
|
||||
chunked_data.set_current_offset(1);
|
||||
|
||||
// Check if the iterator starts from the second chunk
|
||||
let mut iter = chunked_data.iter();
|
||||
assert_eq!(iter.next().unwrap().len(), 5 * 1024 * 1024); // Second chunk
|
||||
assert_eq!(iter.next().unwrap().len(), 5 * 1024 * 1024); // Third chunk
|
||||
assert!(iter.next().is_none());
|
||||
}
|
||||
}
|
176
frontend/rust-lib/flowy-storage-pub/src/cloud.rs
Normal file
176
frontend/rust-lib/flowy-storage-pub/src/cloud.rs
Normal file
@ -0,0 +1,176 @@
|
||||
use crate::storage::{CompletedPartRequest, CreateUploadResponse, UploadPartResponse};
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use flowy_error::{FlowyError, FlowyResult};
|
||||
use lib_infra::future::FutureResult;
|
||||
use mime::Mime;
|
||||
|
||||
#[async_trait]
|
||||
pub trait StorageCloudService: Send + Sync {
|
||||
/// Creates a new storage object.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `url`: url of the object to be created.
|
||||
///
|
||||
/// # Returns
|
||||
/// - `Ok()`
|
||||
/// - `Err(Error)`: An error occurred during the operation.
|
||||
fn get_object_url(&self, object_id: ObjectIdentity) -> FutureResult<String, FlowyError>;
|
||||
|
||||
/// Creates a new storage object.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `url`: url of the object to be created.
|
||||
///
|
||||
/// # Returns
|
||||
/// - `Ok()`
|
||||
/// - `Err(Error)`: An error occurred during the operation.
|
||||
fn put_object(&self, url: String, object_value: ObjectValue) -> FutureResult<(), FlowyError>;
|
||||
|
||||
/// Deletes a storage object by its URL.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `url`: url of the object to be deleted.
|
||||
///
|
||||
/// # Returns
|
||||
/// - `Ok()`
|
||||
/// - `Err(Error)`: An error occurred during the operation.
|
||||
fn delete_object(&self, url: &str) -> FutureResult<(), FlowyError>;
|
||||
|
||||
/// Fetches a storage object by its URL.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `url`: url of the object
|
||||
///
|
||||
/// # Returns
|
||||
/// - `Ok(File)`: The returned file object.
|
||||
/// - `Err(Error)`: An error occurred during the operation.
|
||||
fn get_object(&self, url: String) -> FutureResult<ObjectValue, FlowyError>;
|
||||
fn get_object_url_v1(
|
||||
&self,
|
||||
workspace_id: &str,
|
||||
parent_dir: &str,
|
||||
file_id: &str,
|
||||
) -> FlowyResult<String>;
|
||||
|
||||
async fn create_upload(
|
||||
&self,
|
||||
workspace_id: &str,
|
||||
parent_dir: &str,
|
||||
file_id: &str,
|
||||
content_type: &str,
|
||||
) -> Result<CreateUploadResponse, FlowyError>;
|
||||
|
||||
async fn upload_part(
|
||||
&self,
|
||||
workspace_id: &str,
|
||||
parent_dir: &str,
|
||||
upload_id: &str,
|
||||
file_id: &str,
|
||||
part_number: i32,
|
||||
body: Vec<u8>,
|
||||
) -> Result<UploadPartResponse, FlowyError>;
|
||||
|
||||
async fn complete_upload(
|
||||
&self,
|
||||
workspace_id: &str,
|
||||
parent_dir: &str,
|
||||
upload_id: &str,
|
||||
file_id: &str,
|
||||
parts: Vec<CompletedPartRequest>,
|
||||
) -> Result<(), FlowyError>;
|
||||
}
|
||||
|
||||
pub trait FileStoragePlan: Send + Sync + 'static {
|
||||
fn storage_size(&self) -> FutureResult<u64, FlowyError>;
|
||||
fn maximum_file_size(&self) -> FutureResult<u64, FlowyError>;
|
||||
|
||||
fn check_upload_object(&self, object: &StorageObject) -> FutureResult<(), FlowyError>;
|
||||
}
|
||||
|
||||
pub struct ObjectIdentity {
|
||||
pub workspace_id: String,
|
||||
pub file_id: String,
|
||||
pub ext: String,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ObjectValue {
|
||||
pub raw: Bytes,
|
||||
pub mime: Mime,
|
||||
}
|
||||
|
||||
pub struct StorageObject {
|
||||
pub workspace_id: String,
|
||||
pub file_name: String,
|
||||
pub value: ObjectValueSupabase,
|
||||
}
|
||||
|
||||
pub enum ObjectValueSupabase {
|
||||
File { file_path: String },
|
||||
Bytes { bytes: Bytes, mime: String },
|
||||
}
|
||||
|
||||
impl ObjectValueSupabase {
|
||||
pub fn mime_type(&self) -> String {
|
||||
match self {
|
||||
ObjectValueSupabase::File { file_path } => mime_guess::from_path(file_path)
|
||||
.first_or_octet_stream()
|
||||
.to_string(),
|
||||
ObjectValueSupabase::Bytes { mime, .. } => mime.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl StorageObject {
|
||||
/// Creates a `StorageObject` from a file.
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// * `name`: The name of the storage object.
|
||||
/// * `file_path`: The file path to the storage object's data.
|
||||
///
|
||||
pub fn from_file<T: ToString>(workspace_id: &str, file_name: &str, file_path: T) -> Self {
|
||||
Self {
|
||||
workspace_id: workspace_id.to_string(),
|
||||
file_name: file_name.to_string(),
|
||||
value: ObjectValueSupabase::File {
|
||||
file_path: file_path.to_string(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a `StorageObject` from bytes.
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// * `name`: The name of the storage object.
|
||||
/// * `bytes`: The byte data of the storage object.
|
||||
/// * `mime`: The MIME type of the storage object.
|
||||
///
|
||||
pub fn from_bytes<B: Into<Bytes>>(
|
||||
workspace_id: &str,
|
||||
file_name: &str,
|
||||
bytes: B,
|
||||
mime: String,
|
||||
) -> Self {
|
||||
let bytes = bytes.into();
|
||||
Self {
|
||||
workspace_id: workspace_id.to_string(),
|
||||
file_name: file_name.to_string(),
|
||||
value: ObjectValueSupabase::Bytes { bytes, mime },
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the file size of the `StorageObject`.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// The file size in bytes.
|
||||
pub fn file_size(&self) -> u64 {
|
||||
match &self.value {
|
||||
ObjectValueSupabase::File { file_path } => std::fs::metadata(file_path).unwrap().len(),
|
||||
ObjectValueSupabase::Bytes { bytes, .. } => bytes.len() as u64,
|
||||
}
|
||||
}
|
||||
}
|
3
frontend/rust-lib/flowy-storage-pub/src/lib.rs
Normal file
3
frontend/rust-lib/flowy-storage-pub/src/lib.rs
Normal file
@ -0,0 +1,3 @@
|
||||
pub mod chunked_byte;
|
||||
pub mod cloud;
|
||||
pub mod storage;
|
53
frontend/rust-lib/flowy-storage-pub/src/storage.rs
Normal file
53
frontend/rust-lib/flowy-storage-pub/src/storage.rs
Normal file
@ -0,0 +1,53 @@
|
||||
use crate::chunked_byte::ChunkedBytes;
|
||||
use async_trait::async_trait;
|
||||
pub use client_api_entity::{CompletedPartRequest, CreateUploadResponse, UploadPartResponse};
|
||||
use flowy_error::{FlowyError, FlowyResult};
|
||||
use lib_infra::box_any::BoxAny;
|
||||
use lib_infra::future::FutureResult;
|
||||
|
||||
#[async_trait]
|
||||
pub trait StorageService: Send + Sync {
|
||||
fn upload_object(
|
||||
&self,
|
||||
workspace_id: &str,
|
||||
local_file_path: &str,
|
||||
) -> FutureResult<String, FlowyError>;
|
||||
|
||||
fn delete_object(&self, url: String, local_file_path: String) -> FlowyResult<()>;
|
||||
|
||||
fn download_object(&self, url: String, local_file_path: String) -> FlowyResult<()>;
|
||||
|
||||
fn create_upload(
|
||||
&self,
|
||||
workspace_id: &str,
|
||||
parent_dir: &str,
|
||||
local_file_path: &str,
|
||||
) -> FutureResult<CreatedUpload, FlowyError>;
|
||||
|
||||
async fn start_upload(&self, chunks: &ChunkedBytes, record: &BoxAny) -> Result<(), FlowyError>;
|
||||
|
||||
async fn resume_upload(
|
||||
&self,
|
||||
workspace_id: &str,
|
||||
parent_dir: &str,
|
||||
file_id: &str,
|
||||
) -> Result<(), FlowyError>;
|
||||
}
|
||||
|
||||
pub struct CreatedUpload {
|
||||
pub url: String,
|
||||
pub file_id: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct UploadResult {
|
||||
pub file_id: String,
|
||||
pub status: UploadStatus,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub enum UploadStatus {
|
||||
Finish,
|
||||
Failed,
|
||||
InProgress,
|
||||
}
|
Reference in New Issue
Block a user