From e973365c4b74be2b2cff9be970ccba5928dbe368 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 22 May 2019 10:50:33 -0400 Subject: 0.7.3 start to look at document harvest (initial stub) --- org/default_regex.org | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'org/default_regex.org') diff --git a/org/default_regex.org b/org/default_regex.org index 2958027..6d17f0c 100644 --- a/org/default_regex.org +++ b/org/default_regex.org @@ -51,6 +51,7 @@ static template DocReformRgxInit() { #+BEGIN_SRC d /+ misc +/ static true_dollar = ctRegex!(`\$`, "gm"); +static sep = ctRegex!(`␣`, "gm"); static flag_action = ctRegex!(`^(--[a-z][a-z0-9-]+)$`); static flag_action_str = ctRegex!(` (--[a-z][a-z0-9-]+)`); static within_quotes = ctRegex!(`"(.+?)"`, "m"); @@ -106,7 +107,7 @@ static make_simple_substitutions_d = ctRegex!(`(?P\S.+?),\s+(?P.+)`,"i"); static toml_header_meta_title = ctRegex!(`^\s*(title\s*=\s*"|\[title\])`, "m"); #+END_SRC @@ -368,6 +369,16 @@ static bi_sub_terms_plus_object_number_offset_split = ctRegex!(`\s*\|\s*`); static bi_term_and_object_numbers_match = ctRegex!(`^(.+?)\+(\d+)`); #+END_SRC +** topic register split (document classify) + +#+name: meta_rgx +#+BEGIN_SRC d +static topic_register_main_terms_split = ctRegex!(`\s*;\s*`); +static topic_register_main_term_plus_rest_split = ctRegex!(`\s*:\s*`); +static topic_register_sub_terms_split = ctRegex!(`\s*\|\s*`); +static topic_register_multiple_sub_terms_split = ctRegex!(`␣([^|␣]+(?:\|[^|␣]+)+)`); +#+END_SRC + ** language codes :language:codes: #+name: meta_rgx -- cgit v1.2.3