Clean up smart punctuation implementation

Address feedback from Raph, remove some stale comments and add
two more tests.
pull/1/head
Marcus Klaas de Vries 4 years ago
parent a59707720a
commit b91453dbb1

@ -784,8 +784,7 @@ impl<'a, 'b> FirstPass<'a, 'b> {
let count = 1 + scan_ch_repeat(&string_suffix.as_bytes()[1..], c);
let can_open = delim_run_can_open(self.text, string_suffix, count, ix);
let can_close = delim_run_can_close(self.text, string_suffix, count, ix);
let is_valid_seq = c != b'~'
|| count == 2 && self.options.contains(Options::ENABLE_STRIKETHROUGH);
let is_valid_seq = c != b'~' || count == 2;
if (can_open || can_close) && is_valid_seq {
self.tree.append_text(begin_text, ix);
@ -860,8 +859,6 @@ impl<'a, 'b> FirstPass<'a, 'b> {
b'&' => match scan_entity(&bytes[ix..]) {
(n, Some(value)) => {
self.tree.append_text(begin_text, ix);
// TODO: when the cow is a character, we should create
// a synthesizeChar instead
self.tree.append(Item {
start: ix,
end: ix + n,
@ -900,8 +897,6 @@ impl<'a, 'b> FirstPass<'a, 'b> {
if count == 1 {
LoopInstruction::ContinueAndSkip(0)
} else {
// TODO: if we make a special itembody for em/ en, we do not
// need to allocate a cow space for them.
let itembody = if count == 2 {
ItemBody::SynthesizeChar('')
} else if count == 3 {
@ -2000,22 +1995,15 @@ pub(crate) struct HtmlScanGuard {
pub declaration: usize,
}
// TODO: we can make this const when if inside const fns
// is stabilized.
fn special_bytes(options: &Options) -> [bool; 256] {
let mut bytes = [false; 256];
bytes[b'<' as usize] = true;
bytes[b'!' as usize] = true;
bytes[b'[' as usize] = true;
bytes[b'`' as usize] = true;
bytes[b'\\' as usize] = true;
bytes[b'*' as usize] = true;
bytes[b'_' as usize] = true;
bytes[b'\r' as usize] = true;
bytes[b'\n' as usize] = true;
bytes[b']' as usize] = true;
bytes[b'&' as usize] = true;
let standard_bytes = [
b'\n', b'\r', b'*', b'_', b'&', b'\\', b'[', b']', b'<', b'!', b'`',
];
for &byte in &standard_bytes {
bytes[byte as usize] = true;
}
if options.contains(Options::ENABLE_TABLES) {
bytes[b'|' as usize] = true;
}
@ -2023,10 +2011,9 @@ fn special_bytes(options: &Options) -> [bool; 256] {
bytes[b'~' as usize] = true;
}
if options.contains(Options::ENABLE_SMART_PUNCTUATION) {
bytes[b'.' as usize] = true;
bytes[b'-' as usize] = true;
bytes[b'\'' as usize] = true;
bytes[b'"' as usize] = true;
for &byte in &[b'.', b'-', b'"', b'\''] {
bytes[byte as usize] = true;
}
}
bytes

@ -27,34 +27,32 @@ pub(crate) const VECTOR_SIZE: usize = std::mem::size_of::<__m128i>();
/// and a three bit column index (upper nibble).
pub(crate) fn compute_lookup(options: &Options) -> [u8; 16] {
let mut lookup = [0u8; 16];
lookup[(b'\n' & 0x0f) as usize] |= 1 << (b'\n' >> 4);
lookup[(b'\r' & 0x0f) as usize] |= 1 << (b'\r' >> 4);
lookup[(b'*' & 0x0f) as usize] |= 1 << (b'*' >> 4);
lookup[(b'_' & 0x0f) as usize] |= 1 << (b'_' >> 4);
lookup[(b'&' & 0x0f) as usize] |= 1 << (b'&' >> 4);
lookup[(b'\\' & 0x0f) as usize] |= 1 << (b'\\' >> 4);
lookup[(b'[' & 0x0f) as usize] |= 1 << (b'[' >> 4);
lookup[(b']' & 0x0f) as usize] |= 1 << (b']' >> 4);
lookup[(b'<' & 0x0f) as usize] |= 1 << (b'<' >> 4);
lookup[(b'!' & 0x0f) as usize] |= 1 << (b'!' >> 4);
lookup[(b'`' & 0x0f) as usize] |= 1 << (b'`' >> 4);
let standard_bytes = [
b'\n', b'\r', b'*', b'_', b'&', b'\\', b'[', b']', b'<', b'!', b'`',
];
for &byte in &standard_bytes {
add_lookup_byte(&mut lookup, byte);
}
if options.contains(Options::ENABLE_TABLES) {
lookup[(b'|' & 0x0f) as usize] |= 1 << (b'|' >> 4);
add_lookup_byte(&mut lookup, b'|');
}
if options.contains(Options::ENABLE_STRIKETHROUGH) {
lookup[(b'~' & 0x0f) as usize] |= 1 << (b'~' >> 4);
add_lookup_byte(&mut lookup, b'~');
}
if options.contains(Options::ENABLE_SMART_PUNCTUATION) {
lookup[(b'.' & 0x0f) as usize] |= 1 << (b'.' >> 4);
lookup[(b'-' & 0x0f) as usize] |= 1 << (b'-' >> 4);
lookup[(b'"' & 0x0f) as usize] |= 1 << (b'"' >> 4);
lookup[(b'\'' & 0x0f) as usize] |= 1 << (b'\'' >> 4);
for &byte in &[b'.', b'-', b'"', b'\''] {
add_lookup_byte(&mut lookup, byte);
}
}
lookup
}
fn add_lookup_byte(lookup: &mut [u8; 16], byte: u8) {
lookup[(byte & 0x0f) as usize] |= 1 << (byte >> 4);
}
/// Computes a bit mask for the given byteslice starting from the given index,
/// where the 16 least significant bits indicate (by value of 1) whether or not
/// there is a special character at that byte position. The least significant bit

@ -2,7 +2,7 @@
extern crate pulldown_cmark;
use pulldown_cmark::{html, Options, Parser};
use pulldown_cmark::{html, BrokenLink, Options, Parser};
#[test]
fn html_test_1() {
@ -209,8 +209,16 @@ fn html_test_10() {
assert_eq!(expected, s);
}
// TODO: add broken link callback feature
/*
#[test]
fn html_test_11() {
let original = "hi ~~no~~";
let expected = "<p>hi ~~no~~</p>\n";
let mut s = String::new();
html::push_html(&mut s, Parser::new(&original));
assert_eq!(expected, s);
}
#[test]
fn html_test_broken_callback() {
let original = r##"[foo],
@ -225,21 +233,20 @@ fn html_test_broken_callback() {
<a href="https://example.org">baz</a>,</p>
"##;
use pulldown_cmark::{Options, Parser, html};
use pulldown_cmark::{html, Options, Parser};
let mut s = String::new();
let callback = |reference: &str, _normalized: &str| -> Option<(String, String)> {
if reference == "foo" || reference == "baz" {
let callback = |broken_link: BrokenLink| {
if broken_link.reference == "foo" || broken_link.reference == "baz" {
Some(("https://replaced.example.org".into(), "some title".into()))
} else {
None
}
};
let p = Parser::new_with_broken_link_callback(&original, Options::empty(), Some(&callback));
let p = Parser::new_with_broken_link_callback(&original, Options::empty(), Some(&mut callback));
html::push_html(&mut s, p);
assert_eq!(expected, s);
}
*/

Loading…
Cancel
Save