Initialize options based on options; reformat

bidi-plume
Marcus Klaas de Vries 4 years ago
parent 2c34494445
commit 2ecccb9b68

@ -672,211 +672,208 @@ impl<'a, 'b> FirstPass<'a, 'b> {
let mut last_pipe_ix = start;
let mut begin_text = start;
let (final_ix, brk) =
iterate_special_bytes(&self.lookup_table, bytes, start, |ix, byte| {
match byte {
b'\n' | b'\r' => {
if let TableParseMode::Active = mode {
return LoopInstruction::BreakAtWith(ix, None);
}
let (final_ix, brk) = iterate_special_bytes(&self.lookup_table, bytes, start, |ix, byte| {
match byte {
b'\n' | b'\r' => {
if let TableParseMode::Active = mode {
return LoopInstruction::BreakAtWith(ix, None);
}
let mut i = ix;
let eol_bytes = scan_eol(&bytes[ix..]).unwrap();
if mode == TableParseMode::Scan && pipes > 0 {
// check if we may be parsing a table
let next_line_ix = ix + eol_bytes;
let mut line_start = LineStart::new(&bytes[next_line_ix..]);
if scan_containers(&self.tree, &mut line_start) == self.tree.spine_len()
{
let table_head_ix = next_line_ix + line_start.bytes_scanned();
let (table_head_bytes, alignment) =
scan_table_head(&bytes[table_head_ix..]);
if table_head_bytes > 0 {
// computing header count from number of pipes
let header_count =
count_header_cols(bytes, pipes, start, last_pipe_ix);
// make sure they match the number of columns we find in separator line
if alignment.len() == header_count {
let alignment_ix =
self.allocs.allocate_alignment(alignment);
let end_ix = table_head_ix + table_head_bytes;
return LoopInstruction::BreakAtWith(
end_ix,
Some(Item {
start: i,
end: end_ix, // must update later
body: ItemBody::Table(alignment_ix),
}),
);
}
let mut i = ix;
let eol_bytes = scan_eol(&bytes[ix..]).unwrap();
if mode == TableParseMode::Scan && pipes > 0 {
// check if we may be parsing a table
let next_line_ix = ix + eol_bytes;
let mut line_start = LineStart::new(&bytes[next_line_ix..]);
if scan_containers(&self.tree, &mut line_start) == self.tree.spine_len() {
let table_head_ix = next_line_ix + line_start.bytes_scanned();
let (table_head_bytes, alignment) =
scan_table_head(&bytes[table_head_ix..]);
if table_head_bytes > 0 {
// computing header count from number of pipes
let header_count =
count_header_cols(bytes, pipes, start, last_pipe_ix);
// make sure they match the number of columns we find in separator line
if alignment.len() == header_count {
let alignment_ix = self.allocs.allocate_alignment(alignment);
let end_ix = table_head_ix + table_head_bytes;
return LoopInstruction::BreakAtWith(
end_ix,
Some(Item {
start: i,
end: end_ix, // must update later
body: ItemBody::Table(alignment_ix),
}),
);
}
}
}
}
let end_ix = ix + eol_bytes;
let trailing_backslashes = scan_rev_while(&bytes[..ix], |b| b == b'\\');
if trailing_backslashes % 2 == 1 && end_ix < self.text.len() {
i -= 1;
self.tree.append_text(begin_text, i);
return LoopInstruction::BreakAtWith(
end_ix,
Some(Item {
start: i,
end: end_ix,
body: ItemBody::HardBreak,
}),
);
}
let trailing_whitespace =
scan_rev_while(&bytes[..ix], is_ascii_whitespace_no_nl);
if trailing_whitespace >= 2 {
i -= trailing_whitespace;
self.tree.append_text(begin_text, i);
return LoopInstruction::BreakAtWith(
end_ix,
Some(Item {
start: i,
end: end_ix,
body: ItemBody::HardBreak,
}),
);
}
self.tree.append_text(begin_text, ix);
LoopInstruction::BreakAtWith(
let end_ix = ix + eol_bytes;
let trailing_backslashes = scan_rev_while(&bytes[..ix], |b| b == b'\\');
if trailing_backslashes % 2 == 1 && end_ix < self.text.len() {
i -= 1;
self.tree.append_text(begin_text, i);
return LoopInstruction::BreakAtWith(
end_ix,
Some(Item {
start: i,
end: end_ix,
body: ItemBody::SoftBreak,
body: ItemBody::HardBreak,
}),
)
}
b'\\' => {
if ix + 1 < self.text.len() && is_ascii_punctuation(bytes[ix + 1]) {
self.tree.append_text(begin_text, ix);
if bytes[ix + 1] == b'`' {
let count = 1 + scan_ch_repeat(&bytes[(ix + 2)..], b'`');
self.tree.append(Item {
start: ix + 1,
end: ix + count + 1,
body: ItemBody::MaybeCode(count, true),
});
begin_text = ix + 1 + count;
LoopInstruction::ContinueAndSkip(count)
} else {
begin_text = ix + 1;
LoopInstruction::ContinueAndSkip(1)
}
} else {
LoopInstruction::ContinueAndSkip(0)
}
);
}
c @ b'*' | c @ b'_' | c @ b'~' => {
let string_suffix = &self.text[ix..];
let count = 1 + scan_ch_repeat(&string_suffix.as_bytes()[1..], c);
let can_open = delim_run_can_open(self.text, string_suffix, count, ix);
let can_close = delim_run_can_close(self.text, string_suffix, count, ix);
let is_valid_seq = c != b'~'
|| count == 2 && self.options.contains(Options::ENABLE_STRIKETHROUGH);
if (can_open || can_close) && is_valid_seq {
self.tree.append_text(begin_text, ix);
for i in 0..count {
self.tree.append(Item {
start: ix + i,
end: ix + i + 1,
body: ItemBody::MaybeEmphasis(count - i, can_open, can_close),
});
}
begin_text = ix + count;
}
LoopInstruction::ContinueAndSkip(count - 1)
}
b'`' => {
self.tree.append_text(begin_text, ix);
let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'`');
self.tree.append(Item {
start: ix,
end: ix + count,
body: ItemBody::MaybeCode(count, false),
});
begin_text = ix + count;
LoopInstruction::ContinueAndSkip(count - 1)
let trailing_whitespace =
scan_rev_while(&bytes[..ix], is_ascii_whitespace_no_nl);
if trailing_whitespace >= 2 {
i -= trailing_whitespace;
self.tree.append_text(begin_text, i);
return LoopInstruction::BreakAtWith(
end_ix,
Some(Item {
start: i,
end: end_ix,
body: ItemBody::HardBreak,
}),
);
}
b'<' => {
// Note: could detect some non-HTML cases and early escape here, but not
// clear that's a win.
self.tree.append_text(begin_text, ix);
LoopInstruction::BreakAtWith(
end_ix,
Some(Item {
start: i,
end: end_ix,
body: ItemBody::SoftBreak,
}),
)
}
b'\\' => {
if ix + 1 < self.text.len() && is_ascii_punctuation(bytes[ix + 1]) {
self.tree.append_text(begin_text, ix);
self.tree.append(Item {
start: ix,
end: ix + 1,
body: ItemBody::MaybeHtml,
});
begin_text = ix + 1;
if bytes[ix + 1] == b'`' {
let count = 1 + scan_ch_repeat(&bytes[(ix + 2)..], b'`');
self.tree.append(Item {
start: ix + 1,
end: ix + count + 1,
body: ItemBody::MaybeCode(count, true),
});
begin_text = ix + 1 + count;
LoopInstruction::ContinueAndSkip(count)
} else {
begin_text = ix + 1;
LoopInstruction::ContinueAndSkip(1)
}
} else {
LoopInstruction::ContinueAndSkip(0)
}
b'!' => {
if ix + 1 < self.text.len() && bytes[ix + 1] == b'[' {
self.tree.append_text(begin_text, ix);
}
c @ b'*' | c @ b'_' | c @ b'~' => {
let string_suffix = &self.text[ix..];
let count = 1 + scan_ch_repeat(&string_suffix.as_bytes()[1..], c);
let can_open = delim_run_can_open(self.text, string_suffix, count, ix);
let can_close = delim_run_can_close(self.text, string_suffix, count, ix);
let is_valid_seq = c != b'~'
|| count == 2 && self.options.contains(Options::ENABLE_STRIKETHROUGH);
if (can_open || can_close) && is_valid_seq {
self.tree.append_text(begin_text, ix);
for i in 0..count {
self.tree.append(Item {
start: ix,
end: ix + 2,
body: ItemBody::MaybeImage,
start: ix + i,
end: ix + i + 1,
body: ItemBody::MaybeEmphasis(count - i, can_open, can_close),
});
begin_text = ix + 2;
LoopInstruction::ContinueAndSkip(1)
} else {
LoopInstruction::ContinueAndSkip(0)
}
begin_text = ix + count;
}
b'[' => {
LoopInstruction::ContinueAndSkip(count - 1)
}
b'`' => {
self.tree.append_text(begin_text, ix);
let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'`');
self.tree.append(Item {
start: ix,
end: ix + count,
body: ItemBody::MaybeCode(count, false),
});
begin_text = ix + count;
LoopInstruction::ContinueAndSkip(count - 1)
}
b'<' => {
// Note: could detect some non-HTML cases and early escape here, but not
// clear that's a win.
self.tree.append_text(begin_text, ix);
self.tree.append(Item {
start: ix,
end: ix + 1,
body: ItemBody::MaybeHtml,
});
begin_text = ix + 1;
LoopInstruction::ContinueAndSkip(0)
}
b'!' => {
if ix + 1 < self.text.len() && bytes[ix + 1] == b'[' {
self.tree.append_text(begin_text, ix);
self.tree.append(Item {
start: ix,
end: ix + 1,
body: ItemBody::MaybeLinkOpen,
end: ix + 2,
body: ItemBody::MaybeImage,
});
begin_text = ix + 1;
begin_text = ix + 2;
LoopInstruction::ContinueAndSkip(1)
} else {
LoopInstruction::ContinueAndSkip(0)
}
b']' => {
}
b'[' => {
self.tree.append_text(begin_text, ix);
self.tree.append(Item {
start: ix,
end: ix + 1,
body: ItemBody::MaybeLinkOpen,
});
begin_text = ix + 1;
LoopInstruction::ContinueAndSkip(0)
}
b']' => {
self.tree.append_text(begin_text, ix);
self.tree.append(Item {
start: ix,
end: ix + 1,
body: ItemBody::MaybeLinkClose,
});
begin_text = ix + 1;
LoopInstruction::ContinueAndSkip(0)
}
b'&' => match scan_entity(&bytes[ix..]) {
(n, Some(value)) => {
self.tree.append_text(begin_text, ix);
self.tree.append(Item {
start: ix,
end: ix + 1,
body: ItemBody::MaybeLinkClose,
end: ix + n,
body: ItemBody::SynthesizeText(self.allocs.allocate_cow(value)),
});
begin_text = ix + 1;
LoopInstruction::ContinueAndSkip(0)
}
b'&' => match scan_entity(&bytes[ix..]) {
(n, Some(value)) => {
self.tree.append_text(begin_text, ix);
self.tree.append(Item {
start: ix,
end: ix + n,
body: ItemBody::SynthesizeText(self.allocs.allocate_cow(value)),
});
begin_text = ix + n;
LoopInstruction::ContinueAndSkip(n - 1)
}
_ => LoopInstruction::ContinueAndSkip(0),
},
b'|' => {
if let TableParseMode::Active = mode {
LoopInstruction::BreakAtWith(ix, None)
} else {
last_pipe_ix = ix;
pipes += 1;
LoopInstruction::ContinueAndSkip(0)
}
begin_text = ix + n;
LoopInstruction::ContinueAndSkip(n - 1)
}
_ => LoopInstruction::ContinueAndSkip(0),
},
b'|' => {
if let TableParseMode::Active = mode {
LoopInstruction::BreakAtWith(ix, None)
} else {
last_pipe_ix = ix;
pipes += 1;
LoopInstruction::ContinueAndSkip(0)
}
}
});
_ => LoopInstruction::ContinueAndSkip(0),
}
});
if brk.is_none() {
// need to close text at eof
@ -1924,17 +1921,43 @@ pub(crate) struct HtmlScanGuard {
pub declaration: usize,
}
pub(crate) fn create_lut(_options: &Options) -> LookupTable {
// TODO: we can make this const when if inside const fns
// is stabilized.
fn special_bytes(options: &Options) -> [bool; 256] {
let mut bytes = [false; 256];
bytes[b'<' as usize] = true;
bytes[b'!' as usize] = true;
bytes[b'[' as usize] = true;
bytes[b'`' as usize] = true;
bytes[b'\\' as usize] = true;
bytes[b'*' as usize] = true;
bytes[b'_' as usize] = true;
bytes[b'\r' as usize] = true;
bytes[b'\n' as usize] = true;
bytes[b']' as usize] = true;
bytes[b'&' as usize] = true;
if options.contains(Options::ENABLE_TABLES) {
bytes[b'|' as usize] = true;
}
if options.contains(Options::ENABLE_STRIKETHROUGH) {
bytes[b'~' as usize] = true;
}
bytes
}
pub(crate) fn create_lut(options: &Options) -> LookupTable {
#[cfg(all(target_arch = "x86_64", feature = "simd"))]
{
LookupTable {
simd: crate::simd::compute_lookup(),
scalar: special_bytes(),
simd: crate::simd::compute_lookup(options),
scalar: special_bytes(options),
}
}
#[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
{
special_bytes()
special_bytes(options)
}
}
@ -2613,12 +2636,7 @@ type LookupTable = [bool; 256];
/// called and the function returns immediately with the return value `(end_ix, opt_val)`.
/// If `BreakAtWith(..)` is never returned, this function will return the first
/// index that is outside the byteslice bound and a `None` value.
fn iterate_special_bytes<F, T>(
lut: &LookupTable,
bytes: &[u8],
ix: usize,
callback: F,
) -> (usize, Option<T>)
fn iterate_special_bytes<F, T>(lut: &LookupTable, bytes: &[u8], ix: usize, callback: F) -> (usize, Option<T>)
where
F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
{
@ -2632,24 +2650,6 @@ where
}
}
const fn special_bytes() -> [bool; 256] {
let mut bytes = [false; 256];
bytes[b'<' as usize] = true;
bytes[b'!' as usize] = true;
bytes[b'[' as usize] = true;
bytes[b'~' as usize] = true;
bytes[b'`' as usize] = true;
bytes[b'|' as usize] = true;
bytes[b'\\' as usize] = true;
bytes[b'*' as usize] = true;
bytes[b'_' as usize] = true;
bytes[b'\r' as usize] = true;
bytes[b'\n' as usize] = true;
bytes[b']' as usize] = true;
bytes[b'&' as usize] = true;
bytes
}
pub(crate) fn scalar_iterate_special_bytes<F, T>(
lut: &[bool; 256],
bytes: &[u8],

@ -16,7 +16,7 @@
//!
//! [great overview]: http://0x80.pl/articles/simd-byte-lookup.html
use crate::parse::{LookupTable, LoopInstruction};
use crate::parse::{LookupTable, LoopInstruction, Options};
use core::arch::x86_64::*;
pub(crate) const VECTOR_SIZE: usize = std::mem::size_of::<__m128i>();
@ -25,14 +25,12 @@ pub(crate) const VECTOR_SIZE: usize = std::mem::size_of::<__m128i>();
/// special marker bytes. This is effectively a 128 element 2d bitvector,
/// that can be indexed by a four bit row index (the lower nibble)
/// and a three bit column index (upper nibble).
pub(crate) const fn compute_lookup() -> [u8; 16] {
pub(crate) fn compute_lookup(options: &Options) -> [u8; 16] {
let mut lookup = [0u8; 16];
lookup[(b'\n' & 0x0f) as usize] |= 1 << (b'\n' >> 4);
lookup[(b'\r' & 0x0f) as usize] |= 1 << (b'\r' >> 4);
lookup[(b'*' & 0x0f) as usize] |= 1 << (b'*' >> 4);
lookup[(b'_' & 0x0f) as usize] |= 1 << (b'_' >> 4);
lookup[(b'~' & 0x0f) as usize] |= 1 << (b'~' >> 4);
lookup[(b'|' & 0x0f) as usize] |= 1 << (b'|' >> 4);
lookup[(b'&' & 0x0f) as usize] |= 1 << (b'&' >> 4);
lookup[(b'\\' & 0x0f) as usize] |= 1 << (b'\\' >> 4);
lookup[(b'[' & 0x0f) as usize] |= 1 << (b'[' >> 4);
@ -40,6 +38,14 @@ pub(crate) const fn compute_lookup() -> [u8; 16] {
lookup[(b'<' & 0x0f) as usize] |= 1 << (b'<' >> 4);
lookup[(b'!' & 0x0f) as usize] |= 1 << (b'!' >> 4);
lookup[(b'`' & 0x0f) as usize] |= 1 << (b'`' >> 4);
if options.contains(Options::ENABLE_TABLES) {
lookup[(b'|' & 0x0f) as usize] |= 1 << (b'|' >> 4);
}
if options.contains(Options::ENABLE_STRIKETHROUGH) {
lookup[(b'~' & 0x0f) as usize] |= 1 << (b'~' >> 4);
}
lookup
}
@ -54,7 +60,6 @@ pub(crate) const fn compute_lookup() -> [u8; 16] {
unsafe fn compute_mask(lut: &[u8; 16], bytes: &[u8], ix: usize) -> i32 {
debug_assert!(bytes.len() >= ix + VECTOR_SIZE);
//let lookup = compute_lookup();
let bitmap = _mm_loadu_si128(lut.as_ptr() as *const __m128i);
// Small lookup table to compute single bit bitshifts
// for 16 bytes at once.

Loading…
Cancel
Save