From 6f27312b0e61932d820b991a15c44845ff2cee75 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sun, 26 Feb 2017 18:27:08 -0500 Subject: 0.13.5 defaults regex reorganised; some work on (x)html output --- src/sdp/ao_rgx.d | 50 ++++++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 32 deletions(-) (limited to 'src/sdp/ao_rgx.d') diff --git a/src/sdp/ao_rgx.d b/src/sdp/ao_rgx.d index 6d91096..7d47608 100644 --- a/src/sdp/ao_rgx.d +++ b/src/sdp/ao_rgx.d @@ -27,14 +27,6 @@ template SiSUrgxInit() { static levels_markup = ctRegex!(`^[A-D1-4]$`); static levels_numbered = ctRegex!(`^[0-9]$`); static levels_numbered_headings = ctRegex!(`^[0-7]$`); - static src_pth = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[tm])$`); - static src_fn = - ctRegex!(`^([a-zA-Z0-9._-]+/)*(?P(?P[a-zA-Z0-9._-]+)[.](?Pss[tm]))$`); - static src_fn_master = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ssm)$`); - static src_fn_text = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]sst)$`); - static src_fn_insert = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ssi)$`); - static src_fn_find_inserts = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[im])$`); - static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[ti])$`); /+ comments +/ static comment = ctRegex!(`^%+ `); static comments = ctRegex!(`^%+ |^%+$`); @@ -86,7 +78,7 @@ template SiSUrgxInit() { static para_bullet_indent = ctRegex!(`^_([1-9])[*] `); static para_indent = ctRegex!(`^_([1-9]) `); static para_indent_hang = ctRegex!(`^_([0-9])_([0-9]) `); - static para_attribs = ctRegex!(`^_(([0-9])(_([0-9]))?|_([1-9])?[*]) `); + static para_attribs = ctRegex!(`^_(?:(?:[0-9])(?:_([0-9]))?|(?:[1-9])?[*]) `); /+ blocked markup +/ static block_open = ctRegex!("^((code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)[{].*?$)|^`{3} (code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)"); static block_poem_open = ctRegex!("^((poem[{].*?$)|`{3} poem)"); @@ -142,8 +134,9 @@ template SiSUrgxInit() { static inline_curly_delimiter_close_regular = ctRegex!(`\s*\}~`, "m"); static inline_text_and_note_curly = ctRegex!(`(?P.+?)(?:(?:[~])[{][*+ ]*)(?P.+?)(?:[}][~])`, "mg"); static note_ref = ctRegex!(`^\S+?noteref_([0-9]+)`, "mg"); // {^{73.}^}#noteref_73 - static inline_url = ctRegex!(`((?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg"); - static inline_link_naked_url = ctRegex!(`(?P^|[ ])(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P[.,;:?!]?(?:[ ]|$))`, "mg"); + static inline_url_generic = ctRegex!(`(?:^|[}(\[ ])(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_]`, "mg"); + static inline_url = ctRegex!(`((?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_]\S*)`, "mg"); + static inline_link_naked_url = ctRegex!(`(?P^|[ ])(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P[.,;:?!'"]?(?:[ ]|$))`, "mg"); static inline_link_markup_regular = ctRegex!(`(?P^|[ ])\{\s*(?P.+?)\s*\}(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P[.,;:?!]?(?:[ ]|$))`, "mg"); static inline_link_endnote_url_helper_punctuated = ctRegex!(`\{~\^\s+(?P.+?)\}(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P[.,;:?!]?(?:[ ]|$))`, "mg"); static inline_link_endnote_url_helper = ctRegex!(`\{~\^\s+(?P.+?)\}(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg"); @@ -185,6 +178,14 @@ template SiSUrgxInit() { ctRegex!("(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)"); auto language_code_and_filename = ctRegex!("(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)/[A-Za-z0-9._-].+?[.](?:sst|ssm)$"); + static src_pth = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[tm])$`); + static src_fn = + ctRegex!(`^([a-zA-Z0-9._-]+/)*(?P(?P[a-zA-Z0-9._-]+)[.](?Pss[tm]))$`); + static src_fn_master = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ssm)$`); + static src_fn_text = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]sst)$`); + static src_fn_insert = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ssi)$`); + static src_fn_find_inserts = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[im])$`); + static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[ti])$`); /+ inline markup footnotes endnotes +/ static inline_notes_al = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg"); static inline_notes_al_gen = ctRegex!(`【.+?】`, "m"); @@ -199,27 +200,12 @@ template SiSUrgxInit() { static inline_al_delimiter_open_plus = ctRegex!(`【\+`, "m"); static inline_text_and_note_al = ctRegex!(`(?P.+?)【(?:[*+ ]*)(?P.+?)】`, "mg"); static inline_text_and_note_al_ = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|$))`, "mg"); - } -} -/++ - regex: regular expressions used in sisu document parser -+/ -template SiSUoutputRgxInit() { - private import output_defaults; - struct Rgx { /+ inline markup footnotes endnotes +/ - static inline_notes_al = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg"); - static inline_notes_al_gen = ctRegex!(`【.+?】`, "m"); - static inline_al_delimiter_open_regular = ctRegex!(`【\s`, "m"); - static inline_al_delimiter_open_symbol_star = ctRegex!(`【[*]\s`, "m"); - static inline_al_delimiter_open_symbol_plus = ctRegex!(`【[+]\s`, "m"); - static inline_al_delimiter_close_regular = ctRegex!(`】`, "m"); - static inline_al_delimiter_open_and_close_regular = ctRegex!(`【|】`, "m"); - static inline_notes_delimiter_al_regular = ctRegex!(`【(.+?)】`, "mg"); - static inline_notes_delimiter_al_regular_number_note = ctRegex!(`【(\d+)\s+(.+?)】`, "mg"); - static inline_al_delimiter_open_asterisk = ctRegex!(`【\*`, "m"); - static inline_al_delimiter_open_plus = ctRegex!(`【\+`, "m"); - static inline_text_and_note_al = ctRegex!(`(?P.+?)【(?:[*+ ]*)(?P.+?)】`, "mg"); - static inline_text_and_note_al_ = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|$))`, "mg"); + static inline_link = ctRegex!(`┥(.+?)┝┤(.+?)├`, "mg"); + static inline_a_url = ctRegex!(`(┤)(\S+?)(├)`, "mg"); + static fn_suffix = ctRegex!(`\.fnSuffix`, "mg"); + static inline_link_fn_suffix = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg"); + static inline_seg_link = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg"); + static mark_internal_site_lnk = ctRegex!(`¤`, "mg"); } } -- cgit v1.2.3