From f6d28b62f0e02b8a88a1832589e203c7a613f45b Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Fri, 25 Nov 2022 22:06:40 -0500 Subject: regex review, match speed & compile time, ctregex - improve match time - add interim fontface identifier marker - improve compile time - remove unused regexs - separate out some specialized output matches --- src/doc_reform/io_out/rgx.d | 38 +++++++++++--------------------------- 1 file changed, 11 insertions(+), 27 deletions(-) (limited to 'src/doc_reform/io_out/rgx.d') diff --git a/src/doc_reform/io_out/rgx.d b/src/doc_reform/io_out/rgx.d index 8369735..943643c 100644 --- a/src/doc_reform/io_out/rgx.d +++ b/src/doc_reform/io_out/rgx.d @@ -68,8 +68,7 @@ static template spineRgxOut() { static src_pth_contents = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9][a-zA-Z0-9._-]*)/pod[.]manifest$`); static src_pth_zip = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]zip)$`); static src_pth_types = ctRegex!(`^(?P[/]?[a-zA-Z0-9._-]+/)*(?P(?P[a-zA-Z0-9._-]+[.]ss[tm])|(?P[a-zA-Z0-9._-]+/pod[.]manifest)|(?P[a-zA-Z0-9._-]+[.]zip))$`); - static src_fn = - ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P(?P[a-zA-Z0-9._-]+)[.](?Pss[tm]))$`); + static src_fn = ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P(?P[a-zA-Z0-9._-]+)[.](?Pss[tm]))$`); static src_fn_master = ctRegex!(`^(?P/?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]ssm)$`); static src_fn_find_inserts = ctRegex!(`^(?P/?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]ss[im])$`); static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[ti])$`); @@ -119,34 +118,19 @@ static template spineRgxOut() { static mark_internal_site_lnk = ctRegex!(`¤`, "mg"); static quotation_mark_sql_insert_delimiter = ctRegex!("[']", "mg"); /+ inline markup font face mod +/ - static inline_emphasis = ctRegex!(`[*]┨(?P.+?)┣[*]`, "mg"); - static inline_bold = ctRegex!(`[!]┨(?P.+?)┣[!]`, "mg"); - static inline_underscore = ctRegex!(`[_]┨(?P.+?)┣[_]`, "mg"); - static inline_italics = ctRegex!(`[/]┨(?P.+?)┣[/]`, "mg"); - static inline_superscript = ctRegex!(`\^┨(?P.+?)┣\^`, "mg"); - static inline_subscript = ctRegex!(`[,]┨(?P.+?)┣[,]`, "mg"); - static inline_strike = ctRegex!(`[-]┨(?P.+?)┣[-]`, "mg"); - static inline_insert = ctRegex!(`[+]┨(?P.+?)┣[+]`, "mg"); - static inline_mono = ctRegex!(`[■]┨(?P.+?)┣[■]`, "mg"); - static inline_cite = ctRegex!(`[‖]┨(?P.+?)┣[‖]`, "mg"); + static inline_emphasis = ctRegex!(`⑆[*]┨(?P.+?)┣[*]`, "mg"); + static inline_bold = ctRegex!(`⑆[!]┨(?P.+?)┣[!]`, "mg"); + static inline_underscore = ctRegex!(`⑆[_]┨(?P.+?)┣[_]`, "mg"); + static inline_italics = ctRegex!(`⑆[/]┨(?P.+?)┣[/]`, "mg"); + static inline_superscript = ctRegex!(`⑆\^┨(?P.+?)┣\^`, "mg"); + static inline_subscript = ctRegex!(`⑆[,]┨(?P.+?)┣[,]`, "mg"); + static inline_strike = ctRegex!(`⑆[-]┨(?P.+?)┣[-]`, "mg"); + static inline_insert = ctRegex!(`⑆[+]┨(?P.+?)┣[+]`, "mg"); + static inline_mono = ctRegex!(`⑆[■]┨(?P.+?)┣[■]`, "mg"); + static inline_cite = ctRegex!(`⑆[‖]┨(?P.+?)┣[‖]`, "mg"); /+ table delimiters +/ static table_delimiter_col = ctRegex!("[ ]*[┊][ ]*", "mg"); static table_delimiter_row = ctRegex!("[ ]*\n", "mg"); - static xhtml_ampersand = ctRegex!(`[&]`, "m"); // & - static xhtml_quotation = ctRegex!(`["]`, "m"); // " - static xhtml_less_than = ctRegex!(`[<]`, "m"); // < - static xhtml_greater_than = ctRegex!(`[>]`, "m"); // > - static xhtml_line_break = ctRegex!(` [\\]{2}`, "m"); //
- static latex_special_char = ctRegex!(`([%${}_#&\\])`); - static latex_special_char_for_escape = ctRegex!(`([%${}_#\\])`); - static latex_special_char_for_escape_and_braces = ctRegex!(`([&])`); - static latex_special_char_for_escape_url = ctRegex!(`([%])`); - static latex_special_char_escaped = ctRegex!(`\\([%${}_#\\])`); - static latex_special_char_escaped_braced = ctRegex!(`[{]\\([&])[}]`); - static latex_identify_inline_link = ctRegex!(`┥.+?┝┤\S+?├`, "mg"); - static latex_identify_inline_fontface = ctRegex!(`\\([_#$]┨.+?┣)\\([_#$])`, "mg"); - static latex_clean_internal_link = ctRegex!(`^(?:#|¤\S+?#)`, "m"); - static latex_clean_bookindex_linebreak = ctRegex!(`\s*\\\\\\\\\s*`, "m"); /+ paragraph operators +/ static grouped_para_indent_1 = ctRegex!(`^_1[ ]`, "m"); static grouped_para_indent_2 = ctRegex!(`^_2[ ]`, "m"); -- cgit v1.2.3