From c8f3ea1fe9389f720546534ca57b050f16e34a8c Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 20 Nov 2017 13:44:08 -0500 Subject: process pod dir with sisudoc.txt (or file) - process multiple files named in sisudoc.txt - works with multilingual doc with inserts - regex fixes were needed --- org/default_regex.org | 6 +++--- org/meta_read_source_files.org | 11 +++++++---- org/sdp.org | 31 ++++++++++++++++++++----------- src/sdp/meta/metadoc.d | 2 ++ src/sdp/meta/metadoc_summary.d | 2 +- src/sdp/meta/read_source_files.d | 11 +++++++---- src/sdp/meta/rgx.d | 6 +++--- src/sdp/output/rgx.d | 6 +++--- src/sdp/sdp.d | 29 +++++++++++++++++++---------- 9 files changed, 65 insertions(+), 39 deletions(-) diff --git a/org/default_regex.org b/org/default_regex.org index 2783663..9a17633 100644 --- a/org/default_regex.org +++ b/org/default_regex.org @@ -411,9 +411,9 @@ static nbsp_char_and_space = ctRegex!(`░[ ]`, "mg") #+name: prgmkup_rgx #+BEGIN_SRC d -static src_pth = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[tm])$`); -static src_pth_contents = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+)/sisudoc[.]txt$`); -static src_pth_zip = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]zip)$`); +static src_pth = ctRegex!(`^(?P(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]ss[tm])$`); +static src_pth_contents = ctRegex!(`^(?P(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+)/sisudoc[.]txt$`); +static src_pth_zip = ctRegex!(`^(?P(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]zip)$`); static src_pth_unzip_pod = ctRegex!(`^(?Pmedia/text/[a-z]{2}/)*(?P[a-zA-Z0-9._-]+[.]ss[im])$`); static src_pth_types = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P(?P[a-zA-Z0-9._-]+[.]ss[tm])|(?P[a-zA-Z0-9._-]+/sisudoc[.]txt)|(?P[a-zA-Z0-9._-]+[.]zip))$`); static src_fn = diff --git a/org/meta_read_source_files.org b/org/meta_read_source_files.org index 9569ee4..5e32b1f 100644 --- a/org/meta_read_source_files.org +++ b/org/meta_read_source_files.org @@ -261,7 +261,8 @@ static template SiSUrawMarkupContent() { final private string readInMarkupSource(in char[] fn_src) { enforce( exists(fn_src)!=0, - "file not found" + "file not found: «" ~ + fn_src ~ "»" ); string source_txt_str; try { @@ -333,7 +334,8 @@ auto markupSourceReadIn(in string fn_src) { static auto rgx = Rgx(); enforce( fn_src.match(rgx.src_pth), - "not a sisu markup filename" + "not a sisu markup filename: «" ~ + fn_src ~ "»" ); auto source_txt_str = readInMarkupSource(fn_src); return source_txt_str; @@ -364,12 +366,13 @@ auto markupSourceHeaderContentRawLineTupleArray(in string source_txt_str) { #+name: meta_markup_source_raw_get_insert_source_line_array #+BEGIN_SRC d final char[][] getInsertMarkupSourceContentRawLineArray( - in char[] fn_src_insert, + in char[] fn_src_insert, Regex!(char) rgx_file ) { enforce( fn_src_insert.match(rgx_file), - "not a sisu markup filename" + "not a sisu markup filename: «" ~ + fn_src_insert ~ "»" ); auto source_txt_str = readInMarkupSource(fn_src_insert); auto source_line_arr = markupSourceLineArray(source_txt_str); diff --git a/org/sdp.org b/org/sdp.org index 9cd57bc..d0d7eaf 100644 --- a/org/sdp.org +++ b/org/sdp.org @@ -112,6 +112,8 @@ unittest { #+BEGIN_SRC d import std.getopt, + std.file, + std.path, std.process; import sdp.meta, @@ -316,13 +318,20 @@ foreach(arg; args[1..$]) { flag_action ~= " " ~ arg; // flags not taken by getopt } else if (arg.match(rgx.src_pth)) { fns_src ~= arg; // gather input markup source file names for processing - } else if (arg.match(rgx.src_pth_contents)) { - import std.file, - std.path; + } else if (arg.match(rgx.src_pth_contents) + || ((arg.isDir) && ((arg.chainPath("sisudoc.txt").array).isFile)) + ) { string contents_location_; + string sisudoc_txt_; + if ((arg.chainPath("sisudoc.txt").array).isFile) { + sisudoc_txt_ = arg.chainPath("sisudoc.txt").array; + } else if (arg.match(rgx.src_pth_contents)) { + sisudoc_txt_ = arg; + } else { + } try { - if (exists(arg)) { - contents_location_ = arg.readText; + if (exists(sisudoc_txt_)) { + contents_location_ = sisudoc_txt_.readText; } } catch (ErrnoException ex) { @@ -332,11 +341,11 @@ foreach(arg; args[1..$]) { } auto contents_locations_arr = (cast(char[]) contents_location_).split; - auto tmp_dir_ = (arg).dirName.array; + auto tmp_dir_ = (sisudoc_txt_).dirName.array; foreach (contents_location; contents_locations_arr) { assert(contents_location.match(rgx.src_pth), - "not a recognised file: " ~ - contents_location + "not a recognised file: «" ~ + contents_location ~ "»" ); auto contents_location_pth_ = (contents_location).to!string; fns_src ~= (((tmp_dir_).chainPath(contents_location_pth_)).array).to!(char[]); @@ -401,8 +410,8 @@ scope(failure) { } enforce( fn_src.match(rgx.src_pth_types), - "not a sisu markup filename: <<" ~ - fn_src ~ ">>" + "not a sisu markup filename: «" ~ + fn_src ~ "»" ); #+END_SRC @@ -705,8 +714,8 @@ import import std.array, std.exception, - std.stdio, std.regex, + std.stdio, std.string, std.traits, std.typecons, diff --git a/src/sdp/meta/metadoc.d b/src/sdp/meta/metadoc.d index b2f6270..eca4df7 100644 --- a/src/sdp/meta/metadoc.d +++ b/src/sdp/meta/metadoc.d @@ -2,6 +2,8 @@ module sdp.meta.metadoc; template SiSUabstraction() { import std.getopt, + std.file, + std.path, std.process; import sdp.meta, diff --git a/src/sdp/meta/metadoc_summary.d b/src/sdp/meta/metadoc_summary.d index 526c492..45fd319 100644 --- a/src/sdp/meta/metadoc_summary.d +++ b/src/sdp/meta/metadoc_summary.d @@ -10,8 +10,8 @@ template SiSUabstractionSummary() { import std.array, std.exception, - std.stdio, std.regex, + std.stdio, std.string, std.traits, std.typecons, diff --git a/src/sdp/meta/read_source_files.d b/src/sdp/meta/read_source_files.d index 9700cb6..0443ded 100644 --- a/src/sdp/meta/read_source_files.d +++ b/src/sdp/meta/read_source_files.d @@ -55,7 +55,8 @@ static template SiSUrawMarkupContent() { final private string readInMarkupSource(in char[] fn_src) { enforce( exists(fn_src)!=0, - "file not found" + "file not found: «" ~ + fn_src ~ "»" ); string source_txt_str; try { @@ -96,7 +97,8 @@ static template SiSUrawMarkupContent() { static auto rgx = Rgx(); enforce( fn_src.match(rgx.src_pth), - "not a sisu markup filename" + "not a sisu markup filename: «" ~ + fn_src ~ "»" ); auto source_txt_str = readInMarkupSource(fn_src); return source_txt_str; @@ -115,12 +117,13 @@ static template SiSUrawMarkupContent() { return t; } final char[][] getInsertMarkupSourceContentRawLineArray( - in char[] fn_src_insert, + in char[] fn_src_insert, Regex!(char) rgx_file ) { enforce( fn_src_insert.match(rgx_file), - "not a sisu markup filename" + "not a sisu markup filename: «" ~ + fn_src_insert ~ "»" ); auto source_txt_str = readInMarkupSource(fn_src_insert); auto source_line_arr = markupSourceLineArray(source_txt_str); diff --git a/src/sdp/meta/rgx.d b/src/sdp/meta/rgx.d index 13b9e9f..bf1b175 100644 --- a/src/sdp/meta/rgx.d +++ b/src/sdp/meta/rgx.d @@ -196,9 +196,9 @@ static template SiSUrgxInit() { static nbsp_chars_line_start = ctRegex!(`^░+`, "mg"); static nbsp_and_space = ctRegex!(` [ ]`, "mg"); static nbsp_char_and_space = ctRegex!(`░[ ]`, "mg"); - static src_pth = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[tm])$`); - static src_pth_contents = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+)/sisudoc[.]txt$`); - static src_pth_zip = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]zip)$`); + static src_pth = ctRegex!(`^(?P(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]ss[tm])$`); + static src_pth_contents = ctRegex!(`^(?P(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+)/sisudoc[.]txt$`); + static src_pth_zip = ctRegex!(`^(?P(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]zip)$`); static src_pth_unzip_pod = ctRegex!(`^(?Pmedia/text/[a-z]{2}/)*(?P[a-zA-Z0-9._-]+[.]ss[im])$`); static src_pth_types = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P(?P[a-zA-Z0-9._-]+[.]ss[tm])|(?P[a-zA-Z0-9._-]+/sisudoc[.]txt)|(?P[a-zA-Z0-9._-]+[.]zip))$`); static src_fn = diff --git a/src/sdp/output/rgx.d b/src/sdp/output/rgx.d index 1c0f4d3..dbd1528 100644 --- a/src/sdp/output/rgx.d +++ b/src/sdp/output/rgx.d @@ -15,9 +15,9 @@ static template SiSUoutputRgxInit() { static nbsp_chars_line_start = ctRegex!(`^░+`, "mg"); static nbsp_and_space = ctRegex!(` [ ]`, "mg"); static nbsp_char_and_space = ctRegex!(`░[ ]`, "mg"); - static src_pth = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[tm])$`); - static src_pth_contents = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+)/sisudoc[.]txt$`); - static src_pth_zip = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]zip)$`); + static src_pth = ctRegex!(`^(?P(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]ss[tm])$`); + static src_pth_contents = ctRegex!(`^(?P(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+)/sisudoc[.]txt$`); + static src_pth_zip = ctRegex!(`^(?P(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]zip)$`); static src_pth_unzip_pod = ctRegex!(`^(?Pmedia/text/[a-z]{2}/)*(?P[a-zA-Z0-9._-]+[.]ss[im])$`); static src_pth_types = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P(?P[a-zA-Z0-9._-]+[.]ss[tm])|(?P[a-zA-Z0-9._-]+/sisudoc[.]txt)|(?P[a-zA-Z0-9._-]+[.]zip))$`); static src_fn = diff --git a/src/sdp/sdp.d b/src/sdp/sdp.d index c861d1d..375f91f 100755 --- a/src/sdp/sdp.d +++ b/src/sdp/sdp.d @@ -10,6 +10,8 @@ import sdp.meta.metadoc; import std.getopt, + std.file, + std.path, std.process; import sdp.meta, @@ -145,13 +147,20 @@ void main(string[] args) { flag_action ~= " " ~ arg; // flags not taken by getopt } else if (arg.match(rgx.src_pth)) { fns_src ~= arg; // gather input markup source file names for processing - } else if (arg.match(rgx.src_pth_contents)) { - import std.file, - std.path; + } else if (arg.match(rgx.src_pth_contents) + || ((arg.isDir) && ((arg.chainPath("sisudoc.txt").array).isFile)) + ) { string contents_location_; + string sisudoc_txt_; + if ((arg.chainPath("sisudoc.txt").array).isFile) { + sisudoc_txt_ = arg.chainPath("sisudoc.txt").array; + } else if (arg.match(rgx.src_pth_contents)) { + sisudoc_txt_ = arg; + } else { + } try { - if (exists(arg)) { - contents_location_ = arg.readText; + if (exists(sisudoc_txt_)) { + contents_location_ = sisudoc_txt_.readText; } } catch (ErrnoException ex) { @@ -161,11 +170,11 @@ void main(string[] args) { } auto contents_locations_arr = (cast(char[]) contents_location_).split; - auto tmp_dir_ = (arg).dirName.array; + auto tmp_dir_ = (sisudoc_txt_).dirName.array; foreach (contents_location; contents_locations_arr) { assert(contents_location.match(rgx.src_pth), - "not a recognised file: " ~ - contents_location + "not a recognised file: «" ~ + contents_location ~ "»" ); auto contents_location_pth_ = (contents_location).to!string; fns_src ~= (((tmp_dir_).chainPath(contents_location_pth_)).array).to!(char[]); @@ -206,8 +215,8 @@ void main(string[] args) { } enforce( fn_src.match(rgx.src_pth_types), - "not a sisu markup filename: <<" ~ - fn_src ~ ">>" + "not a sisu markup filename: «" ~ + fn_src ~ "»" ); auto t = SiSUabstraction!()(fn_src, opts, env); static assert(!isTypeTuple!(t)); -- cgit v1.2.3