Initialize options based on options; reformat

4 years ago · 2ecccb9b68
parent 2c34494445
commit 2ecccb9b68
2 changed files with 211 additions and 206 deletions
--- a/src/parse.rs
+++ b/src/parse.rs
@ -672,211 +672,208 @@ impl<'a, 'b> FirstPass<'a, 'b> {
        let mut last_pipe_ix = start;
        let mut begin_text = start;

-        let (final_ix, brk) =
-            iterate_special_bytes(&self.lookup_table, bytes, start, |ix, byte| {
-                match byte {
-                    b'\n' | b'\r' => {
-                        if let TableParseMode::Active = mode {
-                            return LoopInstruction::BreakAtWith(ix, None);
-                        }
+        let (final_ix, brk) = iterate_special_bytes(&self.lookup_table, bytes, start, |ix, byte| {
+            match byte {
+                b'\n' | b'\r' => {
+                    if let TableParseMode::Active = mode {
+                        return LoopInstruction::BreakAtWith(ix, None);
+                    }

-                        let mut i = ix;
-                        let eol_bytes = scan_eol(&bytes[ix..]).unwrap();
-                        if mode == TableParseMode::Scan && pipes > 0 {
-                            // check if we may be parsing a table
-                            let next_line_ix = ix + eol_bytes;
-                            let mut line_start = LineStart::new(&bytes[next_line_ix..]);
-                            if scan_containers(&self.tree, &mut line_start) == self.tree.spine_len()
-                            {
-                                let table_head_ix = next_line_ix + line_start.bytes_scanned();
-                                let (table_head_bytes, alignment) =
-                                    scan_table_head(&bytes[table_head_ix..]);
-
-                                if table_head_bytes > 0 {
-                                    // computing header count from number of pipes
-                                    let header_count =
-                                        count_header_cols(bytes, pipes, start, last_pipe_ix);
-
-                                    // make sure they match the number of columns we find in separator line
-                                    if alignment.len() == header_count {
-                                        let alignment_ix =
-                                            self.allocs.allocate_alignment(alignment);
-                                        let end_ix = table_head_ix + table_head_bytes;
-                                        return LoopInstruction::BreakAtWith(
-                                            end_ix,
-                                            Some(Item {
-                                                start: i,
-                                                end: end_ix, // must update later
-                                                body: ItemBody::Table(alignment_ix),
-                                            }),
-                                        );
-                                    }
+                    let mut i = ix;
+                    let eol_bytes = scan_eol(&bytes[ix..]).unwrap();
+                    if mode == TableParseMode::Scan && pipes > 0 {
+                        // check if we may be parsing a table
+                        let next_line_ix = ix + eol_bytes;
+                        let mut line_start = LineStart::new(&bytes[next_line_ix..]);
+                        if scan_containers(&self.tree, &mut line_start) == self.tree.spine_len() {
+                            let table_head_ix = next_line_ix + line_start.bytes_scanned();
+                            let (table_head_bytes, alignment) =
+                                scan_table_head(&bytes[table_head_ix..]);
+
+                            if table_head_bytes > 0 {
+                                // computing header count from number of pipes
+                                let header_count =
+                                    count_header_cols(bytes, pipes, start, last_pipe_ix);
+
+                                // make sure they match the number of columns we find in separator line
+                                if alignment.len() == header_count {
+                                    let alignment_ix = self.allocs.allocate_alignment(alignment);
+                                    let end_ix = table_head_ix + table_head_bytes;
+                                    return LoopInstruction::BreakAtWith(
+                                        end_ix,
+                                        Some(Item {
+                                            start: i,
+                                            end: end_ix, // must update later
+                                            body: ItemBody::Table(alignment_ix),
+                                        }),
+                                    );
                                }
                            }
                        }
+                    }

-                        let end_ix = ix + eol_bytes;
-                        let trailing_backslashes = scan_rev_while(&bytes[..ix], |b| b == b'\\');
-                        if trailing_backslashes % 2 == 1 && end_ix < self.text.len() {
-                            i -= 1;
-                            self.tree.append_text(begin_text, i);
-                            return LoopInstruction::BreakAtWith(
-                                end_ix,
-                                Some(Item {
-                                    start: i,
-                                    end: end_ix,
-                                    body: ItemBody::HardBreak,
-                                }),
-                            );
-                        }
-                        let trailing_whitespace =
-                            scan_rev_while(&bytes[..ix], is_ascii_whitespace_no_nl);
-                        if trailing_whitespace >= 2 {
-                            i -= trailing_whitespace;
-                            self.tree.append_text(begin_text, i);
-                            return LoopInstruction::BreakAtWith(
-                                end_ix,
-                                Some(Item {
-                                    start: i,
-                                    end: end_ix,
-                                    body: ItemBody::HardBreak,
-                                }),
-                            );
-                        }
-
-                        self.tree.append_text(begin_text, ix);
-                        LoopInstruction::BreakAtWith(
+                    let end_ix = ix + eol_bytes;
+                    let trailing_backslashes = scan_rev_while(&bytes[..ix], |b| b == b'\\');
+                    if trailing_backslashes % 2 == 1 && end_ix < self.text.len() {
+                        i -= 1;
+                        self.tree.append_text(begin_text, i);
+                        return LoopInstruction::BreakAtWith(
                            end_ix,
                            Some(Item {
                                start: i,
                                end: end_ix,
-                                body: ItemBody::SoftBreak,
+                                body: ItemBody::HardBreak,
                            }),
-                        )
-                    }
-                    b'\\' => {
-                        if ix + 1 < self.text.len() && is_ascii_punctuation(bytes[ix + 1]) {
-                            self.tree.append_text(begin_text, ix);
-                            if bytes[ix + 1] == b'`' {
-                                let count = 1 + scan_ch_repeat(&bytes[(ix + 2)..], b'`');
-                                self.tree.append(Item {
-                                    start: ix + 1,
-                                    end: ix + count + 1,
-                                    body: ItemBody::MaybeCode(count, true),
-                                });
-                                begin_text = ix + 1 + count;
-                                LoopInstruction::ContinueAndSkip(count)
-                            } else {
-                                begin_text = ix + 1;
-                                LoopInstruction::ContinueAndSkip(1)
-                            }
-                        } else {
-                            LoopInstruction::ContinueAndSkip(0)
-                        }
+                        );
                    }
-                    c @ b'*' | c @ b'_' | c @ b'~' => {
-                        let string_suffix = &self.text[ix..];
-                        let count = 1 + scan_ch_repeat(&string_suffix.as_bytes()[1..], c);
-                        let can_open = delim_run_can_open(self.text, string_suffix, count, ix);
-                        let can_close = delim_run_can_close(self.text, string_suffix, count, ix);
-                        let is_valid_seq = c != b'~'
-                            || count == 2 && self.options.contains(Options::ENABLE_STRIKETHROUGH);
-
-                        if (can_open || can_close) && is_valid_seq {
-                            self.tree.append_text(begin_text, ix);
-                            for i in 0..count {
-                                self.tree.append(Item {
-                                    start: ix + i,
-                                    end: ix + i + 1,
-                                    body: ItemBody::MaybeEmphasis(count - i, can_open, can_close),
-                                });
-                            }
-                            begin_text = ix + count;
-                        }
-                        LoopInstruction::ContinueAndSkip(count - 1)
-                    }
-                    b'`' => {
-                        self.tree.append_text(begin_text, ix);
-                        let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'`');
-                        self.tree.append(Item {
-                            start: ix,
-                            end: ix + count,
-                            body: ItemBody::MaybeCode(count, false),
-                        });
-                        begin_text = ix + count;
-                        LoopInstruction::ContinueAndSkip(count - 1)
+                    let trailing_whitespace =
+                        scan_rev_while(&bytes[..ix], is_ascii_whitespace_no_nl);
+                    if trailing_whitespace >= 2 {
+                        i -= trailing_whitespace;
+                        self.tree.append_text(begin_text, i);
+                        return LoopInstruction::BreakAtWith(
+                            end_ix,
+                            Some(Item {
+                                start: i,
+                                end: end_ix,
+                                body: ItemBody::HardBreak,
+                            }),
+                        );
                    }
-                    b'<' => {
-                        // Note: could detect some non-HTML cases and early escape here, but not
-                        // clear that's a win.
+
+                    self.tree.append_text(begin_text, ix);
+                    LoopInstruction::BreakAtWith(
+                        end_ix,
+                        Some(Item {
+                            start: i,
+                            end: end_ix,
+                            body: ItemBody::SoftBreak,
+                        }),
+                    )
+                }
+                b'\\' => {
+                    if ix + 1 < self.text.len() && is_ascii_punctuation(bytes[ix + 1]) {
                        self.tree.append_text(begin_text, ix);
-                        self.tree.append(Item {
-                            start: ix,
-                            end: ix + 1,
-                            body: ItemBody::MaybeHtml,
-                        });
-                        begin_text = ix + 1;
+                        if bytes[ix + 1] == b'`' {
+                            let count = 1 + scan_ch_repeat(&bytes[(ix + 2)..], b'`');
+                            self.tree.append(Item {
+                                start: ix + 1,
+                                end: ix + count + 1,
+                                body: ItemBody::MaybeCode(count, true),
+                            });
+                            begin_text = ix + 1 + count;
+                            LoopInstruction::ContinueAndSkip(count)
+                        } else {
+                            begin_text = ix + 1;
+                            LoopInstruction::ContinueAndSkip(1)
+                        }
+                    } else {
                        LoopInstruction::ContinueAndSkip(0)
                    }
-                    b'!' => {
-                        if ix + 1 < self.text.len() && bytes[ix + 1] == b'[' {
-                            self.tree.append_text(begin_text, ix);
+                }
+                c @ b'*' | c @ b'_' | c @ b'~' => {
+                    let string_suffix = &self.text[ix..];
+                    let count = 1 + scan_ch_repeat(&string_suffix.as_bytes()[1..], c);
+                    let can_open = delim_run_can_open(self.text, string_suffix, count, ix);
+                    let can_close = delim_run_can_close(self.text, string_suffix, count, ix);
+                    let is_valid_seq = c != b'~'
+                        || count == 2 && self.options.contains(Options::ENABLE_STRIKETHROUGH);
+
+                    if (can_open || can_close) && is_valid_seq {
+                        self.tree.append_text(begin_text, ix);
+                        for i in 0..count {
                            self.tree.append(Item {
-                                start: ix,
-                                end: ix + 2,
-                                body: ItemBody::MaybeImage,
+                                start: ix + i,
+                                end: ix + i + 1,
+                                body: ItemBody::MaybeEmphasis(count - i, can_open, can_close),
                            });
-                            begin_text = ix + 2;
-                            LoopInstruction::ContinueAndSkip(1)
-                        } else {
-                            LoopInstruction::ContinueAndSkip(0)
                        }
+                        begin_text = ix + count;
                    }
-                    b'[' => {
+                    LoopInstruction::ContinueAndSkip(count - 1)
+                }
+                b'`' => {
+                    self.tree.append_text(begin_text, ix);
+                    let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'`');
+                    self.tree.append(Item {
+                        start: ix,
+                        end: ix + count,
+                        body: ItemBody::MaybeCode(count, false),
+                    });
+                    begin_text = ix + count;
+                    LoopInstruction::ContinueAndSkip(count - 1)
+                }
+                b'<' => {
+                    // Note: could detect some non-HTML cases and early escape here, but not
+                    // clear that's a win.
+                    self.tree.append_text(begin_text, ix);
+                    self.tree.append(Item {
+                        start: ix,
+                        end: ix + 1,
+                        body: ItemBody::MaybeHtml,
+                    });
+                    begin_text = ix + 1;
+                    LoopInstruction::ContinueAndSkip(0)
+                }
+                b'!' => {
+                    if ix + 1 < self.text.len() && bytes[ix + 1] == b'[' {
                        self.tree.append_text(begin_text, ix);
                        self.tree.append(Item {
                            start: ix,
-                            end: ix + 1,
-                            body: ItemBody::MaybeLinkOpen,
+                            end: ix + 2,
+                            body: ItemBody::MaybeImage,
                        });
-                        begin_text = ix + 1;
+                        begin_text = ix + 2;
+                        LoopInstruction::ContinueAndSkip(1)
+                    } else {
                        LoopInstruction::ContinueAndSkip(0)
                    }
-                    b']' => {
+                }
+                b'[' => {
+                    self.tree.append_text(begin_text, ix);
+                    self.tree.append(Item {
+                        start: ix,
+                        end: ix + 1,
+                        body: ItemBody::MaybeLinkOpen,
+                    });
+                    begin_text = ix + 1;
+                    LoopInstruction::ContinueAndSkip(0)
+                }
+                b']' => {
+                    self.tree.append_text(begin_text, ix);
+                    self.tree.append(Item {
+                        start: ix,
+                        end: ix + 1,
+                        body: ItemBody::MaybeLinkClose,
+                    });
+                    begin_text = ix + 1;
+                    LoopInstruction::ContinueAndSkip(0)
+                }
+                b'&' => match scan_entity(&bytes[ix..]) {
+                    (n, Some(value)) => {
                        self.tree.append_text(begin_text, ix);
                        self.tree.append(Item {
                            start: ix,
-                            end: ix + 1,
-                            body: ItemBody::MaybeLinkClose,
+                            end: ix + n,
+                            body: ItemBody::SynthesizeText(self.allocs.allocate_cow(value)),
                        });
-                        begin_text = ix + 1;
-                        LoopInstruction::ContinueAndSkip(0)
-                    }
-                    b'&' => match scan_entity(&bytes[ix..]) {
-                        (n, Some(value)) => {
-                            self.tree.append_text(begin_text, ix);
-                            self.tree.append(Item {
-                                start: ix,
-                                end: ix + n,
-                                body: ItemBody::SynthesizeText(self.allocs.allocate_cow(value)),
-                            });
-                            begin_text = ix + n;
-                            LoopInstruction::ContinueAndSkip(n - 1)
-                        }
-                        _ => LoopInstruction::ContinueAndSkip(0),
-                    },
-                    b'|' => {
-                        if let TableParseMode::Active = mode {
-                            LoopInstruction::BreakAtWith(ix, None)
-                        } else {
-                            last_pipe_ix = ix;
-                            pipes += 1;
-                            LoopInstruction::ContinueAndSkip(0)
-                        }
+                        begin_text = ix + n;
+                        LoopInstruction::ContinueAndSkip(n - 1)
                    }
                    _ => LoopInstruction::ContinueAndSkip(0),
+                },
+                b'|' => {
+                    if let TableParseMode::Active = mode {
+                        LoopInstruction::BreakAtWith(ix, None)
+                    } else {
+                        last_pipe_ix = ix;
+                        pipes += 1;
+                        LoopInstruction::ContinueAndSkip(0)
+                    }
                }
-            });
+                _ => LoopInstruction::ContinueAndSkip(0),
+            }
+        });

        if brk.is_none() {
            // need to close text at eof
@ -1924,17 +1921,43 @@ pub(crate) struct HtmlScanGuard {
    pub declaration: usize,
 }

-pub(crate) fn create_lut(_options: &Options) -> LookupTable {
+// TODO: we can make this const when if inside const fns
+// is stabilized.
+fn special_bytes(options: &Options) -> [bool; 256] {
+    let mut bytes = [false; 256];
+    bytes[b'<' as usize] = true;
+    bytes[b'!' as usize] = true;
+    bytes[b'[' as usize] = true;
+    bytes[b'`' as usize] = true;
+    bytes[b'\\' as usize] = true;
+    bytes[b'*' as usize] = true;
+    bytes[b'_' as usize] = true;
+    bytes[b'\r' as usize] = true;
+    bytes[b'\n' as usize] = true;
+    bytes[b']' as usize] = true;
+    bytes[b'&' as usize] = true;
+
+    if options.contains(Options::ENABLE_TABLES) {
+        bytes[b'|' as usize] = true;
+    }
+    if options.contains(Options::ENABLE_STRIKETHROUGH) {
+        bytes[b'~' as usize] = true;
+    }
+
+    bytes
+}
+
+pub(crate) fn create_lut(options: &Options) -> LookupTable {
    #[cfg(all(target_arch = "x86_64", feature = "simd"))]
    {
        LookupTable {
-            simd: crate::simd::compute_lookup(),
-            scalar: special_bytes(),
+            simd: crate::simd::compute_lookup(options),
+            scalar: special_bytes(options),
        }
    }
    #[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
    {
-        special_bytes()
+        special_bytes(options)
    }
 }

@ -2613,12 +2636,7 @@ type LookupTable = [bool; 256];
 /// called and the function returns immediately with the return value `(end_ix, opt_val)`.
 /// If `BreakAtWith(..)` is never returned, this function will return the first
 /// index that is outside the byteslice bound and a `None` value.
-fn iterate_special_bytes<F, T>(
-    lut: &LookupTable,
-    bytes: &[u8],
-    ix: usize,
-    callback: F,
-) -> (usize, Option<T>)
+fn iterate_special_bytes<F, T>(lut: &LookupTable, bytes: &[u8], ix: usize, callback: F) -> (usize, Option<T>)
 where
    F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
 {
@ -2632,24 +2650,6 @@ where
    }
 }

-const fn special_bytes() -> [bool; 256] {
-    let mut bytes = [false; 256];
-    bytes[b'<' as usize] = true;
-    bytes[b'!' as usize] = true;
-    bytes[b'[' as usize] = true;
-    bytes[b'~' as usize] = true;
-    bytes[b'`' as usize] = true;
-    bytes[b'|' as usize] = true;
-    bytes[b'\\' as usize] = true;
-    bytes[b'*' as usize] = true;
-    bytes[b'_' as usize] = true;
-    bytes[b'\r' as usize] = true;
-    bytes[b'\n' as usize] = true;
-    bytes[b']' as usize] = true;
-    bytes[b'&' as usize] = true;
-    bytes
-}
-
 pub(crate) fn scalar_iterate_special_bytes<F, T>(
    lut: &[bool; 256],
    bytes: &[u8],
--- a/src/simd.rs
+++ b/src/simd.rs
@ -16,7 +16,7 @@
 //!
 //! [great overview]: http://0x80.pl/articles/simd-byte-lookup.html

-use crate::parse::{LookupTable, LoopInstruction};
+use crate::parse::{LookupTable, LoopInstruction, Options};
 use core::arch::x86_64::*;

 pub(crate) const VECTOR_SIZE: usize = std::mem::size_of::<__m128i>();
@ -25,14 +25,12 @@ pub(crate) const VECTOR_SIZE: usize = std::mem::size_of::<__m128i>();
 /// special marker bytes. This is effectively a 128 element 2d bitvector,
 /// that can be indexed by a four bit row index (the lower nibble)
 /// and a three bit column index (upper nibble).
-pub(crate) const fn compute_lookup() -> [u8; 16] {
+pub(crate) fn compute_lookup(options: &Options) -> [u8; 16] {
    let mut lookup = [0u8; 16];
    lookup[(b'\n' & 0x0f) as usize] |= 1 << (b'\n' >> 4);
    lookup[(b'\r' & 0x0f) as usize] |= 1 << (b'\r' >> 4);
    lookup[(b'*' & 0x0f) as usize] |= 1 << (b'*' >> 4);
    lookup[(b'_' & 0x0f) as usize] |= 1 << (b'_' >> 4);
-    lookup[(b'~' & 0x0f) as usize] |= 1 << (b'~' >> 4);
-    lookup[(b'|' & 0x0f) as usize] |= 1 << (b'|' >> 4);
    lookup[(b'&' & 0x0f) as usize] |= 1 << (b'&' >> 4);
    lookup[(b'\\' & 0x0f) as usize] |= 1 << (b'\\' >> 4);
    lookup[(b'[' & 0x0f) as usize] |= 1 << (b'[' >> 4);
@ -40,6 +38,14 @@ pub(crate) const fn compute_lookup() -> [u8; 16] {
    lookup[(b'<' & 0x0f) as usize] |= 1 << (b'<' >> 4);
    lookup[(b'!' & 0x0f) as usize] |= 1 << (b'!' >> 4);
    lookup[(b'`' & 0x0f) as usize] |= 1 << (b'`' >> 4);
+
+    if options.contains(Options::ENABLE_TABLES) {
+        lookup[(b'|' & 0x0f) as usize] |= 1 << (b'|' >> 4);
+    }
+    if options.contains(Options::ENABLE_STRIKETHROUGH) {
+        lookup[(b'~' & 0x0f) as usize] |= 1 << (b'~' >> 4);
+    }
+
    lookup
 }

@ -54,7 +60,6 @@ pub(crate) const fn compute_lookup() -> [u8; 16] {
 unsafe fn compute_mask(lut: &[u8; 16], bytes: &[u8], ix: usize) -> i32 {
    debug_assert!(bytes.len() >= ix + VECTOR_SIZE);

-    //let lookup = compute_lookup();
    let bitmap = _mm_loadu_si128(lut.as_ptr() as *const __m128i);
    // Small lookup table to compute single bit bitshifts
    // for 16 bytes at once.