mirror of
https://github.com/AppFlowy-IO/AppFlowy.git
synced 2024-08-30 18:12:39 +00:00
1. fix the potential infinite loop when composing delta. Because of calculating the wrong code unit offset.
2. add test of calculating Chinese character
This commit is contained in:
parent
c456687a18
commit
7e7254b306
@ -44,7 +44,19 @@ async fn document_sync_insert_test() {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn document_sync_delete_test1() {
|
||||
async fn document_sync_insert_in_chinese() {
|
||||
let s = "好".to_owned();
|
||||
let offset = count_utf16_code_units(&s);
|
||||
let scripts = vec![
|
||||
InsertText("你", 0),
|
||||
InsertText("好", offset),
|
||||
AssertJson(r#"[{"insert":"你好\n"}]"#),
|
||||
];
|
||||
EditorTest::new().await.run_scripts(scripts).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn document_sync_delete_in_english() {
|
||||
let scripts = vec![
|
||||
InsertText("1", 0),
|
||||
InsertText("2", 1),
|
||||
@ -55,6 +67,19 @@ async fn document_sync_delete_test1() {
|
||||
EditorTest::new().await.run_scripts(scripts).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn document_sync_delete_in_chinese() {
|
||||
let s = "好".to_owned();
|
||||
let offset = count_utf16_code_units(&s);
|
||||
let scripts = vec![
|
||||
InsertText("你", 0),
|
||||
InsertText("好", offset),
|
||||
Delete(Interval::new(0, offset)),
|
||||
AssertJson(r#"[{"insert":"好\n"}]"#),
|
||||
];
|
||||
EditorTest::new().await.run_scripts(scripts).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn document_sync_replace_test() {
|
||||
let scripts = vec![
|
||||
|
@ -727,6 +727,7 @@ fn attributes_format_emoji() {
|
||||
let len = s.utf16_size();
|
||||
assert_eq!(3, len);
|
||||
assert_eq!(2, s.graphemes(true).count());
|
||||
|
||||
let ops = vec![
|
||||
Insert(0, emoji_s, 0),
|
||||
AssertDocJson(0, r#"[{"insert":"👋 \n"}]"#),
|
||||
|
@ -184,7 +184,7 @@ fn delta_get_ops_in_interval_7() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delta_seek_1() {
|
||||
fn delta_op_seek() {
|
||||
let mut delta = RichTextDelta::default();
|
||||
let insert_a = OpBuilder::insert("12345").build();
|
||||
let retain_a = OpBuilder::retain(3).build();
|
||||
@ -196,41 +196,17 @@ fn delta_seek_1() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delta_seek_2() {
|
||||
fn delta_utf16_code_unit_seek() {
|
||||
let mut delta = RichTextDelta::default();
|
||||
delta.add(OpBuilder::insert("12345").build());
|
||||
|
||||
let mut iter = DeltaIter::new(&delta);
|
||||
assert_eq!(iter.next_op_with_len(1).unwrap(), OpBuilder::insert("1").build());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delta_seek_3() {
|
||||
let mut delta = RichTextDelta::default();
|
||||
delta.add(OpBuilder::insert("12345").build());
|
||||
|
||||
let mut iter = DeltaIter::new(&delta);
|
||||
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("12").build());
|
||||
|
||||
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("34").build());
|
||||
|
||||
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("5").build());
|
||||
|
||||
assert_eq!(iter.next_op_with_len(1), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delta_seek_4() {
|
||||
let mut delta = RichTextDelta::default();
|
||||
delta.add(OpBuilder::insert("12345").build());
|
||||
|
||||
let mut iter = DeltaIter::new(&delta);
|
||||
iter.seek::<CharMetric>(3);
|
||||
iter.seek::<Utf16CodeUnitMetric>(3);
|
||||
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("45").build());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delta_seek_5() {
|
||||
fn delta_utf16_code_unit_seek_with_attributes() {
|
||||
let mut delta = RichTextDelta::default();
|
||||
let attributes = AttributeBuilder::new()
|
||||
.add_attr(RichTextAttribute::Bold(true))
|
||||
@ -241,7 +217,7 @@ fn delta_seek_5() {
|
||||
delta.add(OpBuilder::insert("\n").build());
|
||||
|
||||
let mut iter = DeltaIter::new(&delta);
|
||||
iter.seek::<CharMetric>(0);
|
||||
iter.seek::<Utf16CodeUnitMetric>(0);
|
||||
|
||||
assert_eq!(
|
||||
iter.next_op_with_len(4).unwrap(),
|
||||
@ -250,12 +226,42 @@ fn delta_seek_5() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delta_next_op_len_test() {
|
||||
fn delta_next_op_len() {
|
||||
let mut delta = RichTextDelta::default();
|
||||
delta.add(OpBuilder::insert("12345").build());
|
||||
let mut iter = DeltaIter::new(&delta);
|
||||
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("12").build());
|
||||
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("34").build());
|
||||
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("5").build());
|
||||
assert_eq!(iter.next_op_with_len(1), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delta_next_op_len_with_chinese() {
|
||||
let mut delta = RichTextDelta::default();
|
||||
delta.add(OpBuilder::insert("你好").build());
|
||||
|
||||
let mut iter = DeltaIter::new(&delta);
|
||||
iter.seek::<CharMetric>(3);
|
||||
assert_eq!(iter.next_op_len().unwrap(), 2);
|
||||
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("你好").build());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delta_next_op_len_with_english() {
|
||||
let mut delta = RichTextDelta::default();
|
||||
delta.add(OpBuilder::insert("ab").build());
|
||||
let mut iter = DeltaIter::new(&delta);
|
||||
assert_eq!(iter.next_op_len().unwrap(), 2);
|
||||
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("ab").build());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delta_next_op_len_after_seek() {
|
||||
let mut delta = RichTextDelta::default();
|
||||
delta.add(OpBuilder::insert("12345").build());
|
||||
let mut iter = DeltaIter::new(&delta);
|
||||
assert_eq!(iter.next_op_len().unwrap(), 5);
|
||||
iter.seek::<Utf16CodeUnitMetric>(3);
|
||||
assert_eq!(iter.next_op_len().unwrap(), 2);
|
||||
assert_eq!(iter.next_op_with_len(1).unwrap(), OpBuilder::insert("4").build());
|
||||
assert_eq!(iter.next_op_len().unwrap(), 1);
|
||||
@ -263,7 +269,7 @@ fn delta_next_op_len_test() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delta_next_op_len_test2() {
|
||||
fn delta_next_op_len_none() {
|
||||
let mut delta = RichTextDelta::default();
|
||||
delta.add(OpBuilder::insert("12345").build());
|
||||
let mut iter = DeltaIter::new(&delta);
|
||||
@ -290,7 +296,7 @@ fn delta_next_op_with_len_cross_op_return_last() {
|
||||
delta.add(OpBuilder::insert("678").build());
|
||||
|
||||
let mut iter = DeltaIter::new(&delta);
|
||||
iter.seek::<CharMetric>(4);
|
||||
iter.seek::<Utf16CodeUnitMetric>(4);
|
||||
assert_eq!(iter.next_op_len().unwrap(), 1);
|
||||
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::retain(1).build());
|
||||
}
|
||||
@ -475,7 +481,7 @@ fn transform_random_delta() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transform_with_two_delta_test() {
|
||||
fn transform_with_two_delta() {
|
||||
let mut a = RichTextDelta::default();
|
||||
let mut a_s = String::new();
|
||||
a.insert(
|
||||
@ -515,7 +521,7 @@ fn transform_with_two_delta_test() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transform_two_plain_delta_test() {
|
||||
fn transform_two_plain_delta() {
|
||||
let ops = vec![
|
||||
Insert(0, "123", 0),
|
||||
Insert(1, "456", 0),
|
||||
@ -527,7 +533,7 @@ fn transform_two_plain_delta_test() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transform_two_plain_delta_test2() {
|
||||
fn transform_two_plain_delta2() {
|
||||
let ops = vec![
|
||||
Insert(0, "123", 0),
|
||||
Insert(1, "456", 0),
|
||||
@ -721,6 +727,16 @@ fn delta_invert_attribute_delta_with_attribute_delta() {
|
||||
TestBuilder::new().run_scripts::<PlainDoc>(ops);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delta_compose_str() {
|
||||
let ops = vec![
|
||||
Insert(0, "1", 0),
|
||||
Insert(0, "2", 1),
|
||||
AssertDocJson(0, r#"[{"insert":"12\n"}]"#),
|
||||
];
|
||||
TestBuilder::new().run_scripts::<NewlineDoc>(ops);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn delta_compose_with_missing_delta() {
|
||||
|
@ -112,7 +112,6 @@ impl Document {
|
||||
let text = data.to_string();
|
||||
let interval = Interval::new(index, index);
|
||||
let _ = validate_interval(&self.delta, &interval)?;
|
||||
|
||||
let delta = self.view.insert(&self.delta, &text, interval)?;
|
||||
self.compose_delta(delta.clone())?;
|
||||
Ok(delta)
|
||||
|
@ -1,6 +1,6 @@
|
||||
use crate::{document::DeleteExt, util::is_newline};
|
||||
use lib_ot::{
|
||||
core::{Attributes, CharMetric, DeltaBuilder, DeltaIter, Interval, NEW_LINE},
|
||||
core::{Attributes, DeltaBuilder, DeltaIter, Interval, Utf16CodeUnitMetric, NEW_LINE},
|
||||
rich_text::{plain_attributes, RichTextDelta},
|
||||
};
|
||||
|
||||
@ -22,7 +22,7 @@ impl DeleteExt for PreserveLineFormatOnMerge {
|
||||
return None;
|
||||
}
|
||||
|
||||
iter.seek::<CharMetric>(interval.size() - 1);
|
||||
iter.seek::<Utf16CodeUnitMetric>(interval.size() - 1);
|
||||
let mut new_delta = DeltaBuilder::new()
|
||||
.retain(interval.start)
|
||||
.delete(interval.size())
|
||||
|
@ -1,6 +1,6 @@
|
||||
use crate::{document::InsertExt, util::is_newline};
|
||||
use lib_ot::{
|
||||
core::{CharMetric, DeltaBuilder, DeltaIter, NEW_LINE},
|
||||
core::{DeltaBuilder, DeltaIter, Utf16CodeUnitMetric, NEW_LINE},
|
||||
rich_text::{RichTextAttributeKey, RichTextAttributes, RichTextDelta},
|
||||
};
|
||||
|
||||
@ -14,7 +14,7 @@ impl InsertExt for ResetLineFormatOnNewLine {
|
||||
}
|
||||
|
||||
let mut iter = DeltaIter::new(delta);
|
||||
iter.seek::<CharMetric>(index);
|
||||
iter.seek::<Utf16CodeUnitMetric>(index);
|
||||
let next_op = iter.next_op()?;
|
||||
if !next_op.get_data().starts_with(NEW_LINE) {
|
||||
return None;
|
||||
|
@ -32,7 +32,7 @@ impl View {
|
||||
for ext in &self.insert_exts {
|
||||
if let Some(mut delta) = ext.apply(delta, interval.size(), text, interval.start) {
|
||||
trim(&mut delta);
|
||||
tracing::debug!("[{}]: applied, delta: {}", ext.ext_name(), delta);
|
||||
tracing::debug!("[{}]: process delta: {}", ext.ext_name(), delta);
|
||||
new_delta = Some(delta);
|
||||
break;
|
||||
}
|
||||
|
@ -35,13 +35,13 @@ where
|
||||
}
|
||||
|
||||
// get the next operation interval
|
||||
pub fn next_iv(&self) -> Interval { self.next_iv_before(None).unwrap_or_else(|| Interval::new(0, 0)) }
|
||||
pub fn next_iv(&self) -> Interval { self.next_iv_with_len(None).unwrap_or_else(|| Interval::new(0, 0)) }
|
||||
|
||||
pub fn next_op(&mut self) -> Option<Operation<T>> { self.next_with_len(None) }
|
||||
|
||||
// get the last operation before the end.
|
||||
// checkout the delta_next_op_with_len_cross_op_return_last test for more detail
|
||||
pub fn next_with_len(&mut self, force_end: Option<usize>) -> Option<Operation<T>> {
|
||||
pub fn next_with_len(&mut self, expected_len: Option<usize>) -> Option<Operation<T>> {
|
||||
let mut find_op = None;
|
||||
let holder = self.next_op.clone();
|
||||
let mut next_op = holder.as_ref();
|
||||
@ -53,7 +53,9 @@ where
|
||||
let mut consume_len = 0;
|
||||
while find_op.is_none() && next_op.is_some() {
|
||||
let op = next_op.take().unwrap();
|
||||
let interval = self.next_iv_before(force_end).unwrap_or_else(|| Interval::new(0, 0));
|
||||
let interval = self
|
||||
.next_iv_with_len(expected_len)
|
||||
.unwrap_or_else(|| Interval::new(0, 0));
|
||||
|
||||
// cache the op if the interval is empty. e.g. last_op_before(Some(0))
|
||||
if interval.is_empty() {
|
||||
@ -79,7 +81,7 @@ where
|
||||
}
|
||||
|
||||
if find_op.is_some() {
|
||||
if let Some(end) = force_end {
|
||||
if let Some(end) = expected_len {
|
||||
// try to find the next op before the index if consume_len less than index
|
||||
if end > consume_len && self.has_next() {
|
||||
return self.next_with_len(Some(end - consume_len));
|
||||
@ -111,12 +113,12 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
fn next_iv_before(&self, force_end: Option<usize>) -> Option<Interval> {
|
||||
fn next_iv_with_len(&self, expected_len: Option<usize>) -> Option<Interval> {
|
||||
let op = self.next_iter_op()?;
|
||||
let start = self.consume_count;
|
||||
let end = match force_end {
|
||||
let end = match expected_len {
|
||||
None => self.consume_count + op.len(),
|
||||
Some(index) => self.consume_count + min(index, op.len()),
|
||||
Some(expected_len) => self.consume_count + min(expected_len, op.len()),
|
||||
};
|
||||
|
||||
let intersect = Interval::new(start, end).intersect(self.consume_iv);
|
||||
@ -155,34 +157,34 @@ where
|
||||
|
||||
type SeekResult = Result<(), OTError>;
|
||||
pub trait Metric {
|
||||
fn seek<T: Attributes>(cursor: &mut OpCursor<T>, index: usize) -> SeekResult;
|
||||
fn seek<T: Attributes>(cursor: &mut OpCursor<T>, offset: usize) -> SeekResult;
|
||||
}
|
||||
|
||||
pub struct OpMetric();
|
||||
|
||||
impl Metric for OpMetric {
|
||||
fn seek<T: Attributes>(cursor: &mut OpCursor<T>, index: usize) -> SeekResult {
|
||||
let _ = check_bound(cursor.op_index, index)?;
|
||||
fn seek<T: Attributes>(cursor: &mut OpCursor<T>, offset: usize) -> SeekResult {
|
||||
let _ = check_bound(cursor.op_index, offset)?;
|
||||
let mut seek_cursor = OpCursor::new(cursor.delta, cursor.origin_iv);
|
||||
let mut offset = 0;
|
||||
let mut cur_offset = 0;
|
||||
while let Some((_, op)) = seek_cursor.iter.next() {
|
||||
offset += op.len();
|
||||
if offset > index {
|
||||
cur_offset += op.len();
|
||||
if cur_offset > offset {
|
||||
break;
|
||||
}
|
||||
}
|
||||
cursor.descend(offset);
|
||||
cursor.descend(cur_offset);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CharMetric();
|
||||
pub struct Utf16CodeUnitMetric();
|
||||
|
||||
impl Metric for CharMetric {
|
||||
fn seek<T: Attributes>(cursor: &mut OpCursor<T>, index: usize) -> SeekResult {
|
||||
if index > 0 {
|
||||
let _ = check_bound(cursor.consume_count, index)?;
|
||||
let _ = cursor.next_with_len(Some(index));
|
||||
impl Metric for Utf16CodeUnitMetric {
|
||||
fn seek<T: Attributes>(cursor: &mut OpCursor<T>, offset: usize) -> SeekResult {
|
||||
if offset > 0 {
|
||||
let _ = check_bound(cursor.consume_count, offset)?;
|
||||
let _ = cursor.next_with_len(Some(offset));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
@ -187,7 +187,7 @@ where
|
||||
}
|
||||
},
|
||||
Operation::Insert(insert) => {
|
||||
inverted.delete(insert.count_of_utf16_code_units());
|
||||
inverted.delete(insert.utf16_size());
|
||||
},
|
||||
Operation::Delete(delete) => {
|
||||
inverted.insert(&chars.take(*delete as usize).collect::<String>(), op.get_attributes());
|
||||
@ -294,12 +294,12 @@ where
|
||||
(Some(Operation::Insert(insert)), _) => {
|
||||
// let composed_attrs = transform_attributes(&next_op1, &next_op2, true);
|
||||
a_prime.insert(&insert.s, insert.attributes.clone());
|
||||
b_prime.retain(insert.count_of_utf16_code_units(), insert.attributes.clone());
|
||||
b_prime.retain(insert.utf16_size(), insert.attributes.clone());
|
||||
next_op1 = ops1.next();
|
||||
},
|
||||
(_, Some(Operation::Insert(o_insert))) => {
|
||||
let composed_attrs = transform_op_attribute(&next_op1, &next_op2)?;
|
||||
a_prime.retain(o_insert.count_of_utf16_code_units(), composed_attrs.clone());
|
||||
a_prime.retain(o_insert.utf16_size(), composed_attrs.clone());
|
||||
b_prime.insert(&o_insert.s, composed_attrs);
|
||||
next_op2 = ops2.next();
|
||||
},
|
||||
|
@ -23,7 +23,7 @@ where
|
||||
pub fn from_offset(delta: &'a Delta<T>, offset: usize) -> Self {
|
||||
let interval = Interval::new(0, MAX_IV_LEN);
|
||||
let mut iter = Self::from_interval(delta, interval);
|
||||
iter.seek::<CharMetric>(offset);
|
||||
iter.seek::<Utf16CodeUnitMetric>(offset);
|
||||
iter
|
||||
}
|
||||
|
||||
@ -181,7 +181,7 @@ where
|
||||
Operation::<T>::Insert(insert) => {
|
||||
tracing::trace!("extend insert attributes with {} ", &insert.attributes);
|
||||
attributes.extend_other(insert.attributes.clone());
|
||||
length = insert.count_of_utf16_code_units();
|
||||
length = insert.utf16_size();
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
use serde::{de, de::Visitor, Deserialize, Deserializer, Serialize, Serializer};
|
||||
use std::{fmt, fmt::Formatter, slice};
|
||||
use std::{fmt, fmt::Formatter};
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct FlowyStr(pub String);
|
||||
@ -10,18 +10,11 @@ impl FlowyStr {
|
||||
|
||||
pub fn utf16_code_unit_iter(&self) -> Utf16CodeUnitIterator { Utf16CodeUnitIterator::new(self) }
|
||||
|
||||
pub fn sub_str(&self, interval: Interval) -> String {
|
||||
match self.with_interval(interval) {
|
||||
None => "".to_owned(),
|
||||
Some(s) => s.0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_interval(&self, interval: Interval) -> Option<FlowyStr> {
|
||||
pub fn sub_str(&self, interval: Interval) -> Option<String> {
|
||||
let mut iter = Utf16CodeUnitIterator::new(self);
|
||||
let mut buf = vec![];
|
||||
while let Some((byte, _len)) = iter.next() {
|
||||
if interval.start < iter.code_unit_offset && interval.end >= iter.code_unit_offset {
|
||||
if iter.utf16_offset >= interval.start && iter.utf16_offset < interval.end {
|
||||
buf.extend_from_slice(byte);
|
||||
}
|
||||
}
|
||||
@ -31,7 +24,7 @@ impl FlowyStr {
|
||||
}
|
||||
|
||||
match str::from_utf8(&buf) {
|
||||
Ok(item) => Some(item.into()),
|
||||
Ok(item) => Some(item.to_owned()),
|
||||
Err(_e) => None,
|
||||
}
|
||||
}
|
||||
@ -40,69 +33,6 @@ impl FlowyStr {
|
||||
fn utf16_code_point_iter(&self) -> FlowyUtf16CodePointIterator { FlowyUtf16CodePointIterator::new(self, 0) }
|
||||
}
|
||||
|
||||
pub struct Utf16CodeUnitIterator<'a> {
|
||||
s: &'a FlowyStr,
|
||||
bytes_offset: usize,
|
||||
code_unit_offset: usize,
|
||||
iter_index: usize,
|
||||
iter: slice::Iter<'a, u8>,
|
||||
}
|
||||
|
||||
impl<'a> Utf16CodeUnitIterator<'a> {
|
||||
pub fn new(s: &'a FlowyStr) -> Self {
|
||||
Utf16CodeUnitIterator {
|
||||
s,
|
||||
bytes_offset: 0,
|
||||
code_unit_offset: 0,
|
||||
iter_index: 0,
|
||||
iter: s.as_bytes().iter(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Utf16CodeUnitIterator<'a> {
|
||||
type Item = (&'a [u8], usize);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let start = self.bytes_offset;
|
||||
let _end = start;
|
||||
|
||||
while let Some(&b) = self.iter.next() {
|
||||
self.iter_index += 1;
|
||||
|
||||
let mut code_unit_count = 0;
|
||||
if self.bytes_offset > self.iter_index {
|
||||
continue;
|
||||
}
|
||||
|
||||
if self.bytes_offset == self.iter_index {
|
||||
break;
|
||||
}
|
||||
|
||||
if (b as i8) >= -0x40 {
|
||||
code_unit_count += 1
|
||||
}
|
||||
if b >= 0xf0 {
|
||||
code_unit_count += 1
|
||||
}
|
||||
|
||||
self.bytes_offset += len_utf8_from_first_byte(b);
|
||||
self.code_unit_offset += code_unit_count;
|
||||
|
||||
if code_unit_count == 1 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if start == self.bytes_offset {
|
||||
return None;
|
||||
}
|
||||
|
||||
let byte = &self.s.as_bytes()[start..self.bytes_offset];
|
||||
Some((byte, self.bytes_offset - start))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for FlowyStr {
|
||||
type Target = String;
|
||||
|
||||
@ -170,6 +100,52 @@ impl<'de> Deserialize<'de> for FlowyStr {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Utf16CodeUnitIterator<'a> {
|
||||
s: &'a FlowyStr,
|
||||
byte_offset: usize,
|
||||
utf16_offset: usize,
|
||||
utf16_count: usize,
|
||||
}
|
||||
|
||||
impl<'a> Utf16CodeUnitIterator<'a> {
|
||||
pub fn new(s: &'a FlowyStr) -> Self {
|
||||
Utf16CodeUnitIterator {
|
||||
s,
|
||||
byte_offset: 0,
|
||||
utf16_offset: 0,
|
||||
utf16_count: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Utf16CodeUnitIterator<'a> {
|
||||
type Item = (&'a [u8], usize);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let _len = self.s.len();
|
||||
if self.byte_offset == self.s.len() {
|
||||
None
|
||||
} else {
|
||||
let b = self.s.as_bytes()[self.byte_offset];
|
||||
let start = self.byte_offset;
|
||||
let end = self.byte_offset + len_utf8_from_first_byte(b);
|
||||
if (b as i8) >= -0x40 {
|
||||
self.utf16_count += 1;
|
||||
}
|
||||
if b >= 0xf0 {
|
||||
self.utf16_count += 1;
|
||||
}
|
||||
|
||||
if self.utf16_count > 0 {
|
||||
self.utf16_offset = self.utf16_count - 1;
|
||||
}
|
||||
self.byte_offset = end;
|
||||
let byte = &self.s.as_bytes()[start..end];
|
||||
Some((byte, end - start))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FlowyUtf16CodePointIterator<'a> {
|
||||
s: &'a FlowyStr,
|
||||
offset: usize,
|
||||
@ -230,38 +206,76 @@ pub fn len_utf8_from_first_byte(b: u8) -> usize {
|
||||
mod tests {
|
||||
use crate::core::{FlowyStr, Interval};
|
||||
|
||||
#[test]
|
||||
fn flowy_str_code_unit() {
|
||||
let size = FlowyStr::from("👋").utf16_size();
|
||||
assert_eq!(size, 2);
|
||||
|
||||
let s: FlowyStr = "👋 \n👋".into();
|
||||
let output = s.sub_str(Interval::new(0, size)).unwrap();
|
||||
assert_eq!(output, "👋");
|
||||
|
||||
let output = s.sub_str(Interval::new(2, 3)).unwrap();
|
||||
assert_eq!(output, " ");
|
||||
|
||||
let output = s.sub_str(Interval::new(3, 4)).unwrap();
|
||||
assert_eq!(output, "\n");
|
||||
|
||||
let output = s.sub_str(Interval::new(4, 4 + size)).unwrap();
|
||||
assert_eq!(output, "👋");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flowy_str_sub_str_in_chinese() {
|
||||
let s: FlowyStr = "你好\n😁".into();
|
||||
let size = s.utf16_size();
|
||||
assert_eq!(size, 5);
|
||||
|
||||
let output1 = s.sub_str(Interval::new(0, 2)).unwrap();
|
||||
let output2 = s.sub_str(Interval::new(2, 3)).unwrap();
|
||||
let output3 = s.sub_str(Interval::new(3, 5)).unwrap();
|
||||
assert_eq!(output1, "你好");
|
||||
assert_eq!(output2, "\n");
|
||||
assert_eq!(output3, "😁");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flowy_str_sub_str_in_chinese2() {
|
||||
let s: FlowyStr = "😁 \n".into();
|
||||
let size = s.utf16_size();
|
||||
assert_eq!(size, 4);
|
||||
|
||||
let output1 = s.sub_str(Interval::new(0, 3)).unwrap();
|
||||
let output2 = s.sub_str(Interval::new(3, 4)).unwrap();
|
||||
assert_eq!(output1, "😁 ");
|
||||
assert_eq!(output2, "\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flowy_str_sub_str_in_english() {
|
||||
let s: FlowyStr = "ab".into();
|
||||
let size = s.utf16_size();
|
||||
assert_eq!(size, 2);
|
||||
|
||||
let output = s.sub_str(Interval::new(0, 2)).unwrap();
|
||||
assert_eq!(output, "ab");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flowy_str_utf16_code_point_iter_test1() {
|
||||
let s: FlowyStr = "👋😁👋😁".into();
|
||||
let s: FlowyStr = "👋😁👋".into();
|
||||
let mut iter = s.utf16_code_point_iter();
|
||||
assert_eq!(iter.next().unwrap(), "👋".to_string());
|
||||
assert_eq!(iter.next().unwrap(), "😁".to_string());
|
||||
assert_eq!(iter.next().unwrap(), "👋".to_string());
|
||||
assert_eq!(iter.next().unwrap(), "😁".to_string());
|
||||
assert_eq!(iter.next(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flowy_str_utf16_code_point_iter_test2() {
|
||||
let s: FlowyStr = "👋👋😁😁👋👋".into();
|
||||
let s: FlowyStr = "👋😁👋".into();
|
||||
let iter = s.utf16_code_point_iter();
|
||||
let result = iter.skip(2).take(2).collect::<String>();
|
||||
assert_eq!(result, "😁😁".to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flowy_str_code_unit_test() {
|
||||
let s: FlowyStr = "👋 \n👋".into();
|
||||
let output = s.with_interval(Interval::new(0, 2)).unwrap().0;
|
||||
assert_eq!(output, "👋");
|
||||
|
||||
let output = s.with_interval(Interval::new(2, 3)).unwrap().0;
|
||||
assert_eq!(output, " ");
|
||||
|
||||
let output = s.with_interval(Interval::new(3, 4)).unwrap().0;
|
||||
assert_eq!(output, "\n");
|
||||
|
||||
let output = s.with_interval(Interval::new(4, 6)).unwrap().0;
|
||||
assert_eq!(output, "👋");
|
||||
let result = iter.skip(1).take(1).collect::<String>();
|
||||
assert_eq!(result, "😁".to_string());
|
||||
}
|
||||
}
|
||||
|
@ -67,7 +67,7 @@ where
|
||||
match self {
|
||||
Operation::Delete(n) => *n,
|
||||
Operation::Retain(r) => r.n,
|
||||
Operation::Insert(i) => i.count_of_utf16_code_units(),
|
||||
Operation::Insert(i) => i.utf16_size(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -95,7 +95,7 @@ where
|
||||
.build(),
|
||||
);
|
||||
right = Some(
|
||||
OpBuilder::<T>::insert(&insert.s[index..insert.count_of_utf16_code_units()])
|
||||
OpBuilder::<T>::insert(&insert.s[index..insert.utf16_size()])
|
||||
.attributes(attributes)
|
||||
.build(),
|
||||
);
|
||||
@ -112,17 +112,10 @@ where
|
||||
.attributes(retain.attributes.clone())
|
||||
.build(),
|
||||
Operation::Insert(insert) => {
|
||||
if interval.start > insert.count_of_utf16_code_units() {
|
||||
if interval.start > insert.utf16_size() {
|
||||
OpBuilder::insert("").build()
|
||||
} else {
|
||||
// let s = &insert
|
||||
// .s
|
||||
// .chars()
|
||||
// .skip(interval.start)
|
||||
// .take(min(interval.size(), insert.count_of_code_units()))
|
||||
// .collect::<String>();
|
||||
|
||||
let s = insert.s.sub_str(interval);
|
||||
let s = insert.s.sub_str(interval).unwrap_or_else(|| "".to_owned());
|
||||
OpBuilder::insert(&s).attributes(insert.attributes.clone()).build()
|
||||
}
|
||||
},
|
||||
@ -291,7 +284,7 @@ impl<T> Insert<T>
|
||||
where
|
||||
T: Attributes,
|
||||
{
|
||||
pub fn count_of_utf16_code_units(&self) -> usize { self.s.utf16_size() }
|
||||
pub fn utf16_size(&self) -> usize { self.s.utf16_size() }
|
||||
|
||||
pub fn merge_or_new_op(&mut self, s: &str, attributes: T) -> Option<Operation<T>> {
|
||||
if self.attributes == attributes {
|
||||
|
Loading…
Reference in New Issue
Block a user