1. fix the potential infinite loop when composing delta. Because of calculating the wrong code unit offset.

2. add test of calculating Chinese character
This commit is contained in:
appflowy 2022-01-09 11:12:34 +08:00
parent c456687a18
commit 7e7254b306
12 changed files with 225 additions and 175 deletions

View File

@ -44,7 +44,19 @@ async fn document_sync_insert_test() {
} }
#[tokio::test] #[tokio::test]
async fn document_sync_delete_test1() { async fn document_sync_insert_in_chinese() {
let s = "".to_owned();
let offset = count_utf16_code_units(&s);
let scripts = vec![
InsertText("", 0),
InsertText("", offset),
AssertJson(r#"[{"insert":"你好\n"}]"#),
];
EditorTest::new().await.run_scripts(scripts).await;
}
#[tokio::test]
async fn document_sync_delete_in_english() {
let scripts = vec![ let scripts = vec![
InsertText("1", 0), InsertText("1", 0),
InsertText("2", 1), InsertText("2", 1),
@ -55,6 +67,19 @@ async fn document_sync_delete_test1() {
EditorTest::new().await.run_scripts(scripts).await; EditorTest::new().await.run_scripts(scripts).await;
} }
#[tokio::test]
async fn document_sync_delete_in_chinese() {
let s = "".to_owned();
let offset = count_utf16_code_units(&s);
let scripts = vec![
InsertText("", 0),
InsertText("", offset),
Delete(Interval::new(0, offset)),
AssertJson(r#"[{"insert":"好\n"}]"#),
];
EditorTest::new().await.run_scripts(scripts).await;
}
#[tokio::test] #[tokio::test]
async fn document_sync_replace_test() { async fn document_sync_replace_test() {
let scripts = vec![ let scripts = vec![

View File

@ -727,6 +727,7 @@ fn attributes_format_emoji() {
let len = s.utf16_size(); let len = s.utf16_size();
assert_eq!(3, len); assert_eq!(3, len);
assert_eq!(2, s.graphemes(true).count()); assert_eq!(2, s.graphemes(true).count());
let ops = vec![ let ops = vec![
Insert(0, emoji_s, 0), Insert(0, emoji_s, 0),
AssertDocJson(0, r#"[{"insert":"👋 \n"}]"#), AssertDocJson(0, r#"[{"insert":"👋 \n"}]"#),

View File

@ -184,7 +184,7 @@ fn delta_get_ops_in_interval_7() {
} }
#[test] #[test]
fn delta_seek_1() { fn delta_op_seek() {
let mut delta = RichTextDelta::default(); let mut delta = RichTextDelta::default();
let insert_a = OpBuilder::insert("12345").build(); let insert_a = OpBuilder::insert("12345").build();
let retain_a = OpBuilder::retain(3).build(); let retain_a = OpBuilder::retain(3).build();
@ -196,41 +196,17 @@ fn delta_seek_1() {
} }
#[test] #[test]
fn delta_seek_2() { fn delta_utf16_code_unit_seek() {
let mut delta = RichTextDelta::default(); let mut delta = RichTextDelta::default();
delta.add(OpBuilder::insert("12345").build()); delta.add(OpBuilder::insert("12345").build());
let mut iter = DeltaIter::new(&delta); let mut iter = DeltaIter::new(&delta);
assert_eq!(iter.next_op_with_len(1).unwrap(), OpBuilder::insert("1").build()); iter.seek::<Utf16CodeUnitMetric>(3);
}
#[test]
fn delta_seek_3() {
let mut delta = RichTextDelta::default();
delta.add(OpBuilder::insert("12345").build());
let mut iter = DeltaIter::new(&delta);
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("12").build());
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("34").build());
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("5").build());
assert_eq!(iter.next_op_with_len(1), None);
}
#[test]
fn delta_seek_4() {
let mut delta = RichTextDelta::default();
delta.add(OpBuilder::insert("12345").build());
let mut iter = DeltaIter::new(&delta);
iter.seek::<CharMetric>(3);
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("45").build()); assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("45").build());
} }
#[test] #[test]
fn delta_seek_5() { fn delta_utf16_code_unit_seek_with_attributes() {
let mut delta = RichTextDelta::default(); let mut delta = RichTextDelta::default();
let attributes = AttributeBuilder::new() let attributes = AttributeBuilder::new()
.add_attr(RichTextAttribute::Bold(true)) .add_attr(RichTextAttribute::Bold(true))
@ -241,7 +217,7 @@ fn delta_seek_5() {
delta.add(OpBuilder::insert("\n").build()); delta.add(OpBuilder::insert("\n").build());
let mut iter = DeltaIter::new(&delta); let mut iter = DeltaIter::new(&delta);
iter.seek::<CharMetric>(0); iter.seek::<Utf16CodeUnitMetric>(0);
assert_eq!( assert_eq!(
iter.next_op_with_len(4).unwrap(), iter.next_op_with_len(4).unwrap(),
@ -250,12 +226,42 @@ fn delta_seek_5() {
} }
#[test] #[test]
fn delta_next_op_len_test() { fn delta_next_op_len() {
let mut delta = RichTextDelta::default(); let mut delta = RichTextDelta::default();
delta.add(OpBuilder::insert("12345").build()); delta.add(OpBuilder::insert("12345").build());
let mut iter = DeltaIter::new(&delta);
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("12").build());
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("34").build());
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("5").build());
assert_eq!(iter.next_op_with_len(1), None);
}
#[test]
fn delta_next_op_len_with_chinese() {
let mut delta = RichTextDelta::default();
delta.add(OpBuilder::insert("你好").build());
let mut iter = DeltaIter::new(&delta); let mut iter = DeltaIter::new(&delta);
iter.seek::<CharMetric>(3); assert_eq!(iter.next_op_len().unwrap(), 2);
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("你好").build());
}
#[test]
fn delta_next_op_len_with_english() {
let mut delta = RichTextDelta::default();
delta.add(OpBuilder::insert("ab").build());
let mut iter = DeltaIter::new(&delta);
assert_eq!(iter.next_op_len().unwrap(), 2);
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("ab").build());
}
#[test]
fn delta_next_op_len_after_seek() {
let mut delta = RichTextDelta::default();
delta.add(OpBuilder::insert("12345").build());
let mut iter = DeltaIter::new(&delta);
assert_eq!(iter.next_op_len().unwrap(), 5);
iter.seek::<Utf16CodeUnitMetric>(3);
assert_eq!(iter.next_op_len().unwrap(), 2); assert_eq!(iter.next_op_len().unwrap(), 2);
assert_eq!(iter.next_op_with_len(1).unwrap(), OpBuilder::insert("4").build()); assert_eq!(iter.next_op_with_len(1).unwrap(), OpBuilder::insert("4").build());
assert_eq!(iter.next_op_len().unwrap(), 1); assert_eq!(iter.next_op_len().unwrap(), 1);
@ -263,7 +269,7 @@ fn delta_next_op_len_test() {
} }
#[test] #[test]
fn delta_next_op_len_test2() { fn delta_next_op_len_none() {
let mut delta = RichTextDelta::default(); let mut delta = RichTextDelta::default();
delta.add(OpBuilder::insert("12345").build()); delta.add(OpBuilder::insert("12345").build());
let mut iter = DeltaIter::new(&delta); let mut iter = DeltaIter::new(&delta);
@ -290,7 +296,7 @@ fn delta_next_op_with_len_cross_op_return_last() {
delta.add(OpBuilder::insert("678").build()); delta.add(OpBuilder::insert("678").build());
let mut iter = DeltaIter::new(&delta); let mut iter = DeltaIter::new(&delta);
iter.seek::<CharMetric>(4); iter.seek::<Utf16CodeUnitMetric>(4);
assert_eq!(iter.next_op_len().unwrap(), 1); assert_eq!(iter.next_op_len().unwrap(), 1);
assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::retain(1).build()); assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::retain(1).build());
} }
@ -475,7 +481,7 @@ fn transform_random_delta() {
} }
#[test] #[test]
fn transform_with_two_delta_test() { fn transform_with_two_delta() {
let mut a = RichTextDelta::default(); let mut a = RichTextDelta::default();
let mut a_s = String::new(); let mut a_s = String::new();
a.insert( a.insert(
@ -515,7 +521,7 @@ fn transform_with_two_delta_test() {
} }
#[test] #[test]
fn transform_two_plain_delta_test() { fn transform_two_plain_delta() {
let ops = vec![ let ops = vec![
Insert(0, "123", 0), Insert(0, "123", 0),
Insert(1, "456", 0), Insert(1, "456", 0),
@ -527,7 +533,7 @@ fn transform_two_plain_delta_test() {
} }
#[test] #[test]
fn transform_two_plain_delta_test2() { fn transform_two_plain_delta2() {
let ops = vec![ let ops = vec![
Insert(0, "123", 0), Insert(0, "123", 0),
Insert(1, "456", 0), Insert(1, "456", 0),
@ -721,6 +727,16 @@ fn delta_invert_attribute_delta_with_attribute_delta() {
TestBuilder::new().run_scripts::<PlainDoc>(ops); TestBuilder::new().run_scripts::<PlainDoc>(ops);
} }
#[test]
fn delta_compose_str() {
let ops = vec![
Insert(0, "1", 0),
Insert(0, "2", 1),
AssertDocJson(0, r#"[{"insert":"12\n"}]"#),
];
TestBuilder::new().run_scripts::<NewlineDoc>(ops);
}
#[test] #[test]
#[should_panic] #[should_panic]
fn delta_compose_with_missing_delta() { fn delta_compose_with_missing_delta() {

View File

@ -112,7 +112,6 @@ impl Document {
let text = data.to_string(); let text = data.to_string();
let interval = Interval::new(index, index); let interval = Interval::new(index, index);
let _ = validate_interval(&self.delta, &interval)?; let _ = validate_interval(&self.delta, &interval)?;
let delta = self.view.insert(&self.delta, &text, interval)?; let delta = self.view.insert(&self.delta, &text, interval)?;
self.compose_delta(delta.clone())?; self.compose_delta(delta.clone())?;
Ok(delta) Ok(delta)

View File

@ -1,6 +1,6 @@
use crate::{document::DeleteExt, util::is_newline}; use crate::{document::DeleteExt, util::is_newline};
use lib_ot::{ use lib_ot::{
core::{Attributes, CharMetric, DeltaBuilder, DeltaIter, Interval, NEW_LINE}, core::{Attributes, DeltaBuilder, DeltaIter, Interval, Utf16CodeUnitMetric, NEW_LINE},
rich_text::{plain_attributes, RichTextDelta}, rich_text::{plain_attributes, RichTextDelta},
}; };
@ -22,7 +22,7 @@ impl DeleteExt for PreserveLineFormatOnMerge {
return None; return None;
} }
iter.seek::<CharMetric>(interval.size() - 1); iter.seek::<Utf16CodeUnitMetric>(interval.size() - 1);
let mut new_delta = DeltaBuilder::new() let mut new_delta = DeltaBuilder::new()
.retain(interval.start) .retain(interval.start)
.delete(interval.size()) .delete(interval.size())

View File

@ -1,6 +1,6 @@
use crate::{document::InsertExt, util::is_newline}; use crate::{document::InsertExt, util::is_newline};
use lib_ot::{ use lib_ot::{
core::{CharMetric, DeltaBuilder, DeltaIter, NEW_LINE}, core::{DeltaBuilder, DeltaIter, Utf16CodeUnitMetric, NEW_LINE},
rich_text::{RichTextAttributeKey, RichTextAttributes, RichTextDelta}, rich_text::{RichTextAttributeKey, RichTextAttributes, RichTextDelta},
}; };
@ -14,7 +14,7 @@ impl InsertExt for ResetLineFormatOnNewLine {
} }
let mut iter = DeltaIter::new(delta); let mut iter = DeltaIter::new(delta);
iter.seek::<CharMetric>(index); iter.seek::<Utf16CodeUnitMetric>(index);
let next_op = iter.next_op()?; let next_op = iter.next_op()?;
if !next_op.get_data().starts_with(NEW_LINE) { if !next_op.get_data().starts_with(NEW_LINE) {
return None; return None;

View File

@ -32,7 +32,7 @@ impl View {
for ext in &self.insert_exts { for ext in &self.insert_exts {
if let Some(mut delta) = ext.apply(delta, interval.size(), text, interval.start) { if let Some(mut delta) = ext.apply(delta, interval.size(), text, interval.start) {
trim(&mut delta); trim(&mut delta);
tracing::debug!("[{}]: applied, delta: {}", ext.ext_name(), delta); tracing::debug!("[{}]: process delta: {}", ext.ext_name(), delta);
new_delta = Some(delta); new_delta = Some(delta);
break; break;
} }

View File

@ -35,13 +35,13 @@ where
} }
// get the next operation interval // get the next operation interval
pub fn next_iv(&self) -> Interval { self.next_iv_before(None).unwrap_or_else(|| Interval::new(0, 0)) } pub fn next_iv(&self) -> Interval { self.next_iv_with_len(None).unwrap_or_else(|| Interval::new(0, 0)) }
pub fn next_op(&mut self) -> Option<Operation<T>> { self.next_with_len(None) } pub fn next_op(&mut self) -> Option<Operation<T>> { self.next_with_len(None) }
// get the last operation before the end. // get the last operation before the end.
// checkout the delta_next_op_with_len_cross_op_return_last test for more detail // checkout the delta_next_op_with_len_cross_op_return_last test for more detail
pub fn next_with_len(&mut self, force_end: Option<usize>) -> Option<Operation<T>> { pub fn next_with_len(&mut self, expected_len: Option<usize>) -> Option<Operation<T>> {
let mut find_op = None; let mut find_op = None;
let holder = self.next_op.clone(); let holder = self.next_op.clone();
let mut next_op = holder.as_ref(); let mut next_op = holder.as_ref();
@ -53,7 +53,9 @@ where
let mut consume_len = 0; let mut consume_len = 0;
while find_op.is_none() && next_op.is_some() { while find_op.is_none() && next_op.is_some() {
let op = next_op.take().unwrap(); let op = next_op.take().unwrap();
let interval = self.next_iv_before(force_end).unwrap_or_else(|| Interval::new(0, 0)); let interval = self
.next_iv_with_len(expected_len)
.unwrap_or_else(|| Interval::new(0, 0));
// cache the op if the interval is empty. e.g. last_op_before(Some(0)) // cache the op if the interval is empty. e.g. last_op_before(Some(0))
if interval.is_empty() { if interval.is_empty() {
@ -79,7 +81,7 @@ where
} }
if find_op.is_some() { if find_op.is_some() {
if let Some(end) = force_end { if let Some(end) = expected_len {
// try to find the next op before the index if consume_len less than index // try to find the next op before the index if consume_len less than index
if end > consume_len && self.has_next() { if end > consume_len && self.has_next() {
return self.next_with_len(Some(end - consume_len)); return self.next_with_len(Some(end - consume_len));
@ -111,12 +113,12 @@ where
} }
} }
fn next_iv_before(&self, force_end: Option<usize>) -> Option<Interval> { fn next_iv_with_len(&self, expected_len: Option<usize>) -> Option<Interval> {
let op = self.next_iter_op()?; let op = self.next_iter_op()?;
let start = self.consume_count; let start = self.consume_count;
let end = match force_end { let end = match expected_len {
None => self.consume_count + op.len(), None => self.consume_count + op.len(),
Some(index) => self.consume_count + min(index, op.len()), Some(expected_len) => self.consume_count + min(expected_len, op.len()),
}; };
let intersect = Interval::new(start, end).intersect(self.consume_iv); let intersect = Interval::new(start, end).intersect(self.consume_iv);
@ -155,34 +157,34 @@ where
type SeekResult = Result<(), OTError>; type SeekResult = Result<(), OTError>;
pub trait Metric { pub trait Metric {
fn seek<T: Attributes>(cursor: &mut OpCursor<T>, index: usize) -> SeekResult; fn seek<T: Attributes>(cursor: &mut OpCursor<T>, offset: usize) -> SeekResult;
} }
pub struct OpMetric(); pub struct OpMetric();
impl Metric for OpMetric { impl Metric for OpMetric {
fn seek<T: Attributes>(cursor: &mut OpCursor<T>, index: usize) -> SeekResult { fn seek<T: Attributes>(cursor: &mut OpCursor<T>, offset: usize) -> SeekResult {
let _ = check_bound(cursor.op_index, index)?; let _ = check_bound(cursor.op_index, offset)?;
let mut seek_cursor = OpCursor::new(cursor.delta, cursor.origin_iv); let mut seek_cursor = OpCursor::new(cursor.delta, cursor.origin_iv);
let mut offset = 0; let mut cur_offset = 0;
while let Some((_, op)) = seek_cursor.iter.next() { while let Some((_, op)) = seek_cursor.iter.next() {
offset += op.len(); cur_offset += op.len();
if offset > index { if cur_offset > offset {
break; break;
} }
} }
cursor.descend(offset); cursor.descend(cur_offset);
Ok(()) Ok(())
} }
} }
pub struct CharMetric(); pub struct Utf16CodeUnitMetric();
impl Metric for CharMetric { impl Metric for Utf16CodeUnitMetric {
fn seek<T: Attributes>(cursor: &mut OpCursor<T>, index: usize) -> SeekResult { fn seek<T: Attributes>(cursor: &mut OpCursor<T>, offset: usize) -> SeekResult {
if index > 0 { if offset > 0 {
let _ = check_bound(cursor.consume_count, index)?; let _ = check_bound(cursor.consume_count, offset)?;
let _ = cursor.next_with_len(Some(index)); let _ = cursor.next_with_len(Some(offset));
} }
Ok(()) Ok(())

View File

@ -187,7 +187,7 @@ where
} }
}, },
Operation::Insert(insert) => { Operation::Insert(insert) => {
inverted.delete(insert.count_of_utf16_code_units()); inverted.delete(insert.utf16_size());
}, },
Operation::Delete(delete) => { Operation::Delete(delete) => {
inverted.insert(&chars.take(*delete as usize).collect::<String>(), op.get_attributes()); inverted.insert(&chars.take(*delete as usize).collect::<String>(), op.get_attributes());
@ -294,12 +294,12 @@ where
(Some(Operation::Insert(insert)), _) => { (Some(Operation::Insert(insert)), _) => {
// let composed_attrs = transform_attributes(&next_op1, &next_op2, true); // let composed_attrs = transform_attributes(&next_op1, &next_op2, true);
a_prime.insert(&insert.s, insert.attributes.clone()); a_prime.insert(&insert.s, insert.attributes.clone());
b_prime.retain(insert.count_of_utf16_code_units(), insert.attributes.clone()); b_prime.retain(insert.utf16_size(), insert.attributes.clone());
next_op1 = ops1.next(); next_op1 = ops1.next();
}, },
(_, Some(Operation::Insert(o_insert))) => { (_, Some(Operation::Insert(o_insert))) => {
let composed_attrs = transform_op_attribute(&next_op1, &next_op2)?; let composed_attrs = transform_op_attribute(&next_op1, &next_op2)?;
a_prime.retain(o_insert.count_of_utf16_code_units(), composed_attrs.clone()); a_prime.retain(o_insert.utf16_size(), composed_attrs.clone());
b_prime.insert(&o_insert.s, composed_attrs); b_prime.insert(&o_insert.s, composed_attrs);
next_op2 = ops2.next(); next_op2 = ops2.next();
}, },

View File

@ -23,7 +23,7 @@ where
pub fn from_offset(delta: &'a Delta<T>, offset: usize) -> Self { pub fn from_offset(delta: &'a Delta<T>, offset: usize) -> Self {
let interval = Interval::new(0, MAX_IV_LEN); let interval = Interval::new(0, MAX_IV_LEN);
let mut iter = Self::from_interval(delta, interval); let mut iter = Self::from_interval(delta, interval);
iter.seek::<CharMetric>(offset); iter.seek::<Utf16CodeUnitMetric>(offset);
iter iter
} }
@ -181,7 +181,7 @@ where
Operation::<T>::Insert(insert) => { Operation::<T>::Insert(insert) => {
tracing::trace!("extend insert attributes with {} ", &insert.attributes); tracing::trace!("extend insert attributes with {} ", &insert.attributes);
attributes.extend_other(insert.attributes.clone()); attributes.extend_other(insert.attributes.clone());
length = insert.count_of_utf16_code_units(); length = insert.utf16_size();
}, },
} }

View File

@ -1,5 +1,5 @@
use serde::{de, de::Visitor, Deserialize, Deserializer, Serialize, Serializer}; use serde::{de, de::Visitor, Deserialize, Deserializer, Serialize, Serializer};
use std::{fmt, fmt::Formatter, slice}; use std::{fmt, fmt::Formatter};
#[derive(Clone, Debug, Eq, PartialEq)] #[derive(Clone, Debug, Eq, PartialEq)]
pub struct FlowyStr(pub String); pub struct FlowyStr(pub String);
@ -10,18 +10,11 @@ impl FlowyStr {
pub fn utf16_code_unit_iter(&self) -> Utf16CodeUnitIterator { Utf16CodeUnitIterator::new(self) } pub fn utf16_code_unit_iter(&self) -> Utf16CodeUnitIterator { Utf16CodeUnitIterator::new(self) }
pub fn sub_str(&self, interval: Interval) -> String { pub fn sub_str(&self, interval: Interval) -> Option<String> {
match self.with_interval(interval) {
None => "".to_owned(),
Some(s) => s.0,
}
}
pub fn with_interval(&self, interval: Interval) -> Option<FlowyStr> {
let mut iter = Utf16CodeUnitIterator::new(self); let mut iter = Utf16CodeUnitIterator::new(self);
let mut buf = vec![]; let mut buf = vec![];
while let Some((byte, _len)) = iter.next() { while let Some((byte, _len)) = iter.next() {
if interval.start < iter.code_unit_offset && interval.end >= iter.code_unit_offset { if iter.utf16_offset >= interval.start && iter.utf16_offset < interval.end {
buf.extend_from_slice(byte); buf.extend_from_slice(byte);
} }
} }
@ -31,7 +24,7 @@ impl FlowyStr {
} }
match str::from_utf8(&buf) { match str::from_utf8(&buf) {
Ok(item) => Some(item.into()), Ok(item) => Some(item.to_owned()),
Err(_e) => None, Err(_e) => None,
} }
} }
@ -40,69 +33,6 @@ impl FlowyStr {
fn utf16_code_point_iter(&self) -> FlowyUtf16CodePointIterator { FlowyUtf16CodePointIterator::new(self, 0) } fn utf16_code_point_iter(&self) -> FlowyUtf16CodePointIterator { FlowyUtf16CodePointIterator::new(self, 0) }
} }
pub struct Utf16CodeUnitIterator<'a> {
s: &'a FlowyStr,
bytes_offset: usize,
code_unit_offset: usize,
iter_index: usize,
iter: slice::Iter<'a, u8>,
}
impl<'a> Utf16CodeUnitIterator<'a> {
pub fn new(s: &'a FlowyStr) -> Self {
Utf16CodeUnitIterator {
s,
bytes_offset: 0,
code_unit_offset: 0,
iter_index: 0,
iter: s.as_bytes().iter(),
}
}
}
impl<'a> Iterator for Utf16CodeUnitIterator<'a> {
type Item = (&'a [u8], usize);
fn next(&mut self) -> Option<Self::Item> {
let start = self.bytes_offset;
let _end = start;
while let Some(&b) = self.iter.next() {
self.iter_index += 1;
let mut code_unit_count = 0;
if self.bytes_offset > self.iter_index {
continue;
}
if self.bytes_offset == self.iter_index {
break;
}
if (b as i8) >= -0x40 {
code_unit_count += 1
}
if b >= 0xf0 {
code_unit_count += 1
}
self.bytes_offset += len_utf8_from_first_byte(b);
self.code_unit_offset += code_unit_count;
if code_unit_count == 1 {
break;
}
}
if start == self.bytes_offset {
return None;
}
let byte = &self.s.as_bytes()[start..self.bytes_offset];
Some((byte, self.bytes_offset - start))
}
}
impl std::ops::Deref for FlowyStr { impl std::ops::Deref for FlowyStr {
type Target = String; type Target = String;
@ -170,6 +100,52 @@ impl<'de> Deserialize<'de> for FlowyStr {
} }
} }
pub struct Utf16CodeUnitIterator<'a> {
s: &'a FlowyStr,
byte_offset: usize,
utf16_offset: usize,
utf16_count: usize,
}
impl<'a> Utf16CodeUnitIterator<'a> {
pub fn new(s: &'a FlowyStr) -> Self {
Utf16CodeUnitIterator {
s,
byte_offset: 0,
utf16_offset: 0,
utf16_count: 0,
}
}
}
impl<'a> Iterator for Utf16CodeUnitIterator<'a> {
type Item = (&'a [u8], usize);
fn next(&mut self) -> Option<Self::Item> {
let _len = self.s.len();
if self.byte_offset == self.s.len() {
None
} else {
let b = self.s.as_bytes()[self.byte_offset];
let start = self.byte_offset;
let end = self.byte_offset + len_utf8_from_first_byte(b);
if (b as i8) >= -0x40 {
self.utf16_count += 1;
}
if b >= 0xf0 {
self.utf16_count += 1;
}
if self.utf16_count > 0 {
self.utf16_offset = self.utf16_count - 1;
}
self.byte_offset = end;
let byte = &self.s.as_bytes()[start..end];
Some((byte, end - start))
}
}
}
pub struct FlowyUtf16CodePointIterator<'a> { pub struct FlowyUtf16CodePointIterator<'a> {
s: &'a FlowyStr, s: &'a FlowyStr,
offset: usize, offset: usize,
@ -230,38 +206,76 @@ pub fn len_utf8_from_first_byte(b: u8) -> usize {
mod tests { mod tests {
use crate::core::{FlowyStr, Interval}; use crate::core::{FlowyStr, Interval};
#[test]
fn flowy_str_code_unit() {
let size = FlowyStr::from("👋").utf16_size();
assert_eq!(size, 2);
let s: FlowyStr = "👋 \n👋".into();
let output = s.sub_str(Interval::new(0, size)).unwrap();
assert_eq!(output, "👋");
let output = s.sub_str(Interval::new(2, 3)).unwrap();
assert_eq!(output, " ");
let output = s.sub_str(Interval::new(3, 4)).unwrap();
assert_eq!(output, "\n");
let output = s.sub_str(Interval::new(4, 4 + size)).unwrap();
assert_eq!(output, "👋");
}
#[test]
fn flowy_str_sub_str_in_chinese() {
let s: FlowyStr = "你好\n😁".into();
let size = s.utf16_size();
assert_eq!(size, 5);
let output1 = s.sub_str(Interval::new(0, 2)).unwrap();
let output2 = s.sub_str(Interval::new(2, 3)).unwrap();
let output3 = s.sub_str(Interval::new(3, 5)).unwrap();
assert_eq!(output1, "你好");
assert_eq!(output2, "\n");
assert_eq!(output3, "😁");
}
#[test]
fn flowy_str_sub_str_in_chinese2() {
let s: FlowyStr = "😁 \n".into();
let size = s.utf16_size();
assert_eq!(size, 4);
let output1 = s.sub_str(Interval::new(0, 3)).unwrap();
let output2 = s.sub_str(Interval::new(3, 4)).unwrap();
assert_eq!(output1, "😁 ");
assert_eq!(output2, "\n");
}
#[test]
fn flowy_str_sub_str_in_english() {
let s: FlowyStr = "ab".into();
let size = s.utf16_size();
assert_eq!(size, 2);
let output = s.sub_str(Interval::new(0, 2)).unwrap();
assert_eq!(output, "ab");
}
#[test] #[test]
fn flowy_str_utf16_code_point_iter_test1() { fn flowy_str_utf16_code_point_iter_test1() {
let s: FlowyStr = "👋😁👋😁".into(); let s: FlowyStr = "👋😁👋".into();
let mut iter = s.utf16_code_point_iter(); let mut iter = s.utf16_code_point_iter();
assert_eq!(iter.next().unwrap(), "👋".to_string()); assert_eq!(iter.next().unwrap(), "👋".to_string());
assert_eq!(iter.next().unwrap(), "😁".to_string()); assert_eq!(iter.next().unwrap(), "😁".to_string());
assert_eq!(iter.next().unwrap(), "👋".to_string()); assert_eq!(iter.next().unwrap(), "👋".to_string());
assert_eq!(iter.next().unwrap(), "😁".to_string());
assert_eq!(iter.next(), None); assert_eq!(iter.next(), None);
} }
#[test] #[test]
fn flowy_str_utf16_code_point_iter_test2() { fn flowy_str_utf16_code_point_iter_test2() {
let s: FlowyStr = "👋👋😁😁👋👋".into(); let s: FlowyStr = "👋😁👋".into();
let iter = s.utf16_code_point_iter(); let iter = s.utf16_code_point_iter();
let result = iter.skip(2).take(2).collect::<String>(); let result = iter.skip(1).take(1).collect::<String>();
assert_eq!(result, "😁😁".to_string()); assert_eq!(result, "😁".to_string());
}
#[test]
fn flowy_str_code_unit_test() {
let s: FlowyStr = "👋 \n👋".into();
let output = s.with_interval(Interval::new(0, 2)).unwrap().0;
assert_eq!(output, "👋");
let output = s.with_interval(Interval::new(2, 3)).unwrap().0;
assert_eq!(output, " ");
let output = s.with_interval(Interval::new(3, 4)).unwrap().0;
assert_eq!(output, "\n");
let output = s.with_interval(Interval::new(4, 6)).unwrap().0;
assert_eq!(output, "👋");
} }
} }

View File

@ -67,7 +67,7 @@ where
match self { match self {
Operation::Delete(n) => *n, Operation::Delete(n) => *n,
Operation::Retain(r) => r.n, Operation::Retain(r) => r.n,
Operation::Insert(i) => i.count_of_utf16_code_units(), Operation::Insert(i) => i.utf16_size(),
} }
} }
@ -95,7 +95,7 @@ where
.build(), .build(),
); );
right = Some( right = Some(
OpBuilder::<T>::insert(&insert.s[index..insert.count_of_utf16_code_units()]) OpBuilder::<T>::insert(&insert.s[index..insert.utf16_size()])
.attributes(attributes) .attributes(attributes)
.build(), .build(),
); );
@ -112,17 +112,10 @@ where
.attributes(retain.attributes.clone()) .attributes(retain.attributes.clone())
.build(), .build(),
Operation::Insert(insert) => { Operation::Insert(insert) => {
if interval.start > insert.count_of_utf16_code_units() { if interval.start > insert.utf16_size() {
OpBuilder::insert("").build() OpBuilder::insert("").build()
} else { } else {
// let s = &insert let s = insert.s.sub_str(interval).unwrap_or_else(|| "".to_owned());
// .s
// .chars()
// .skip(interval.start)
// .take(min(interval.size(), insert.count_of_code_units()))
// .collect::<String>();
let s = insert.s.sub_str(interval);
OpBuilder::insert(&s).attributes(insert.attributes.clone()).build() OpBuilder::insert(&s).attributes(insert.attributes.clone()).build()
} }
}, },
@ -291,7 +284,7 @@ impl<T> Insert<T>
where where
T: Attributes, T: Attributes,
{ {
pub fn count_of_utf16_code_units(&self) -> usize { self.s.utf16_size() } pub fn utf16_size(&self) -> usize { self.s.utf16_size() }
pub fn merge_or_new_op(&mut self, s: &str, attributes: T) -> Option<Operation<T>> { pub fn merge_or_new_op(&mut self, s: &str, attributes: T) -> Option<Operation<T>> {
if self.attributes == attributes { if self.attributes == attributes {