From 18bdad0fd7ced5fecb39e9e73d7c4bd9a3956c6f Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 10 Jul 2024 08:51:11 -0400 Subject: pod zip fixes - serial processing (need to be built serially) - multilingual pods, copy all languages before zip --- org/out_metadata.org | 11 +-- org/out_src_pod.org | 195 +++++++++++++++++++++++++++------------------------ org/output_hub.org | 16 ++--- org/spine.org | 24 +++---- 4 files changed, 121 insertions(+), 125 deletions(-) (limited to 'org') diff --git a/org/out_metadata.org b/org/out_metadata.org index d43adf2..52a2481 100644 --- a/org/out_metadata.org +++ b/org/out_metadata.org @@ -28,7 +28,7 @@ module sisudoc.io_out.metadata; // @safe: template outputMetadata() { - void outputMetadata(T)(T doc_matters) { + void outputMetadata(T)(T doc_matters) { <> <> <> @@ -188,11 +188,6 @@ if (doc_matters.opt.action.html_link_markup_source) { } } } -metadata_ ~= "

" - ~ doc_matters.doc_digest.markup_doc.toHexString - ~ " - " - ~ doc_matters.src.filename - ~ "

"; if (doc_matters.conf_make_meta.meta.classify_topic_register_arr.length > 0) { metadata_ ~= "

Topics:

"; string[] _top = ["", "", "", "", ""]; @@ -324,9 +319,7 @@ void metadata_write_output(M)(M doc_matters, char[] metadata_) { } catch (ErrnoException ex) { // Handle error } - if (doc_matters.opt.action.vox_gt0) { - writeln(" ", pth_html.fn_scroll("metadata." ~ doc_matters.src.filename)); - } + if (doc_matters.opt.action.vox_gt0) { writeln(" ", pth_html.fn_scroll("metadata." ~ doc_matters.src.filename)); } } #+END_SRC diff --git a/org/out_src_pod.org b/org/out_src_pod.org index da2b050..c16280e 100644 --- a/org/out_src_pod.org +++ b/org/out_src_pod.org @@ -31,22 +31,29 @@ template spinePod() { <> void spinePod(T)(T doc_matters) { <> - try { - { - pod_archive_directory_tree(doc_matters, pths_pod); - } - auto t = pod_zip_make_ready(doc_matters, pths_pod); - static assert(t.length==3); - auto zip = t[0]; - auto fn_pod = t[1]; - auto _digests = t[2]; - { - zipArchive(doc_matters, fn_pod, zip); - } { - zipArchiveDigest(doc_matters, fn_pod, _digests); + if (doc_matters.opt.action.pod) { + try { + { + podArchive_directory_tree(doc_matters, pths_pod); + } + { + struct STsrcDigests { + std.zip.ZipArchive zip; + string fn_pod; + string[string][string] digests; + } + STsrcDigests _st; + _st = pod_zipMakeReady(doc_matters, pths_pod, _st); + { + zipArchive(doc_matters, _st.fn_pod, _st.zip); + if (doc_matters.src.language == doc_matters.pod.manifest_list_of_languages[$-1]) { + zipArchiveDigest(doc_matters, _st.fn_pod, _st.digests); + } + } + } + } catch (ErrnoException ex) { + // Handle error } - } catch (ErrnoException ex) { - // Handle error } } <> @@ -96,10 +103,9 @@ assert (doc_matters.src.filename.match(rgx_files.src_fn)); #+NAME: source_pod_archive_prepare_for_zip_and_get_digests #+BEGIN_SRC d -auto pod_zip_make_ready(M,P)(M doc_matters, P pths_pod) { +auto pod_zipMakeReady(M,P,S)(M doc_matters, P pths_pod, S _st) { auto pth_dr_doc_src = doc_matters.src_path_info; - if (doc_matters.opt.action.debug_do_pod - && doc_matters.opt.action.vox_gt1) { + if (doc_matters.opt.action.debug_do_pod && doc_matters.opt.action.vox_gt1) { writeln(__LINE__, ": ", doc_matters.src.filename, " -> ", pths_pod.fn_doc(doc_matters.src.filename, doc_matters.src.language).filesystem_open_zpod @@ -134,11 +140,10 @@ auto pod_zip_make_ready(M,P)(M doc_matters, P pths_pod) { fn_src_in.copy(fn_src_out_filesystem); } if (doc_matters.opt.action.pod) { - zip = pod_archive("file_path_bin", fn_src_in, fn_src_out_pod_zip_base, zip); + zip = podArchive("file_path_bin", fn_src_in, fn_src_out_pod_zip_base, zip); } } else { - if (doc_matters.opt.action.debug_do_pod - && doc_matters.opt.action.vox_gt1) { + if (doc_matters.opt.action.debug_do_pod && doc_matters.opt.action.vox_gt1) { writeln("WARNING (io) src out NOT found (image): ", fn_src_in); } } @@ -159,11 +164,10 @@ auto pod_zip_make_ready(M,P)(M doc_matters, P pths_pod) { fn_src_in.copy(fn_src_out_filesystem); } if (doc_matters.opt.action.pod) { - zip = pod_archive("file_path_text", fn_src_in, fn_src_out_pod_zip_base, zip); + zip = podArchive("file_path_text", fn_src_in, fn_src_out_pod_zip_base, zip); } } else { - if (doc_matters.opt.action.debug_do_pod - && doc_matters.opt.action.vox_gt1) { + if (doc_matters.opt.action.debug_do_pod && doc_matters.opt.action.vox_gt1) { writeln("WARNING (io) src out NOT found (document make): ", fn_src_in); } } @@ -187,8 +191,7 @@ auto pod_zip_make_ready(M,P)(M doc_matters, P pths_pod) { = File(pths_pod.fn_pod_filelist(doc_matters.src.filename).filesystem_open_zpod, "w"); Node _pmy; string _pm = "doc:\n filename: " ~ doc_matters.src.filename ~ "\n language: " ~ doc_matters.pod.manifest_list_of_languages.to!string ~ "\n"; - if (doc_matters.opt.action.debug_do_pod - && doc_matters.opt.action.vox_gt1) { + if (doc_matters.opt.action.debug_do_pod && doc_matters.opt.action.vox_gt1) { try { _pmy = Loader.fromString(_pm).load(); } catch (ErrnoException ex) { @@ -206,7 +209,7 @@ auto pod_zip_make_ready(M,P)(M doc_matters, P pths_pod) { pod_filelist_yaml_string.writeln(_pm); } if (doc_matters.opt.action.pod) { - zip = pod_archive("string", _pm, fn_src_out_pod_zip_base, zip); + zip = podArchive("string", _pm, fn_src_out_pod_zip_base, zip); } } } @@ -220,47 +223,55 @@ auto pod_zip_make_ready(M,P)(M doc_matters, P pths_pod) { = pths_pod.fn_doc(doc_matters.src.filename, doc_matters.src.language).zpod.to!string; // needed without root path: string[] filelist_src_out_pod_arr; string[] filelist_src_zpod_arr; - if (exists(fn_src_in)) { // what of language? - debug(io) { writeln("(io debug) src in found: ", fn_src_in); } - { // take DIGEST write to pod file digests.txt - auto data = (cast(byte[]) (fn_src_in).read); - _digests["en"]["sst"] ~= data.sha256Of.toHexString ~ "::" ~ data.length.to!string ~ " - " ~ doc_matters.src.filename; // FIX language issue - // writeln(data.sha256Of.toHexString, "::", data.length, " - ", doc_matters.src.filename); - } - filelist_src_out_pod_arr ~= fn_src_out_pod_zip_base; - filelist_src_zpod_arr ~= fn_src_out_inside_pod; - string _pod_to_markup_file = doc_matters.src.pod_name ~ "/" ~ "media/text/" ~ doc_matters.src.language ~ "/" ~ doc_matters.src.filename; - if (doc_matters.opt.action.source_or_pod) { - fn_src_in.copy(fn_src_out_filesystem); - } - if (doc_matters.opt.action.pod) { - auto _rgx = regex(r"(?P\S+?)(?P[a-z_-]+)/(?Pmedia/text/)(?P\S+?)/(?P\S+?\.ss[mt])"); - if (auto _x = fn_src_in.match(_rgx)){ - if (doc_matters.src.lng == doc_matters.pod.manifest_list_of_languages[$-1]) { - string _path_to_pod = _x.captures["path_to_pod"]; - string _podname = _x.captures["podname"]; - string _root_to_lang = _x.captures["from_root"]; - string _language = _x.captures["language"]; - string _filename = _x.captures["filename"]; - foreach (_lang; doc_matters.pod.manifest_list_of_languages) { - string _pth_mkup_src_in = _path_to_pod ~ _podname ~ "/" ~ _root_to_lang ~ _lang ~ "/" ~ _filename; - string _pth_mkup_src_out = "pod/" ~ _root_to_lang ~ _lang ~ "/" ~ _filename; - zip = pod_archive("file_path_text", _pth_mkup_src_in, _pth_mkup_src_out, zip); + if (doc_matters.src.language == doc_matters.pod.manifest_list_of_languages[$-1]) { // wait until all language versions of .ssm parsed + foreach (_lang; doc_matters.pod.manifest_list_of_languages) { // do for all language versions + string fn_src_out_filesystem_lng + = pths_pod.fn_doc(doc_matters.src.filename, _lang).filesystem_open_zpod.to!string; + string _sstm = (doc_matters.pod.manifest_path ~ "/media/text/" ~ _lang ~ "/" ~ doc_matters.src.filename); + // writeln(_sstm); + if (exists(_sstm)) { // what of language? + debug(io) { writeln("(io debug) src in found: ", _sstm); } + { // take DIGEST write to pod file digests.txt + auto data = (cast(byte[]) (_sstm).read); + _digests[_lang]["sstm"] ~= data.sha256Of.toHexString ~ "::" ~ data.length.to!string ~ " - " ~ doc_matters.src.filename ~ " - [" ~ _lang ~ "]"; + // writeln(data.sha256Of.toHexString, "::", data.length, " - ", doc_matters.src.filename); + } + filelist_src_out_pod_arr ~= fn_src_out_pod_zip_base; + filelist_src_zpod_arr ~= fn_src_out_inside_pod; + string _pod_to_markup_file = doc_matters.src.pod_name ~ "/" ~ "media/text/" ~ _lang ~ "/" ~ doc_matters.src.filename; + if (doc_matters.opt.action.source_or_pod) { + _sstm.copy(fn_src_out_filesystem_lng); + } + if (doc_matters.opt.action.pod) { + auto _rgx_sstm = regex(r"(?P\S+?)(?P[a-z_-]+)/(?Pmedia/text/)(?P\S+?)/(?P\S+?\.ss[mt])"); + if (auto _x = _sstm.match(_rgx_sstm)){ + if (doc_matters.src.lng == doc_matters.pod.manifest_list_of_languages[$-1]) { // again wait until all language versions of .ssm parsed + string _path_to_pod = _x.captures["path_to_pod"]; + string _podname = _x.captures["podname"]; + string _root_to_lang = _x.captures["from_root"]; + string _language = _x.captures["language"]; // .ssi inserts expected to have same name across languages + string _filename = _x.captures["filename"]; + foreach (_lang1; doc_matters.pod.manifest_list_of_languages) { // do for all language versions + string _pth_mkup_src_in = _path_to_pod ~ _podname ~ "/" ~ _root_to_lang ~ _lang1 ~ "/" ~ _filename; + string _pth_mkup_src_out = "pod/" ~ _root_to_lang ~ _lang1 ~ "/" ~ _filename; + // writeln("\nin: ", _pth_mkup_src_in, "\nout: ", _pth_mkup_src_out); // DEBUG, REMOVE + zip = podArchive("file_path_text", _pth_mkup_src_in, _pth_mkup_src_out, zip); + } + } + } else { + zip = podArchive("file_path_text", _sstm, fn_src_out_pod_zip_base, zip); } } } else { - zip = pod_archive("file_path_text", fn_src_in, fn_src_out_pod_zip_base, zip); + if (doc_matters.opt.action.debug_do_pod && doc_matters.opt.action.vox_gt1) { + writeln("WARNING (io) src in NOT found (markup source): ", _sstm); + } } } - } else { - if (doc_matters.opt.action.debug_do_pod - && doc_matters.opt.action.vox_gt1) { - writeln("WARNING (io) src in NOT found (markup source): ", fn_src_in); - } } } { // bundle insert files (.ssi) - get digest if (doc_matters.srcs.file_insert_list.length > 0) { - auto _rgx = regex(r"(?P\S+?)(?P[a-z_-]+)/(?Pmedia/text/)(?P\S+?)/(?P\S+?\.ss[i])"); + auto _rgx_ssi = regex(r"(?P\S+?)(?P[a-z_-]+)/(?Pmedia/text/)(?P\S+?)/(?P\S+?\.ss[i])"); foreach (insert_file; doc_matters.srcs.file_insert_list) { debug(pod) { writeln( @@ -272,7 +283,7 @@ auto pod_zip_make_ready(M,P)(M doc_matters, P pths_pod) { ).zpod ); } - if (auto _x = insert_file.match(_rgx)){ + if (auto _x = insert_file.match(_rgx_ssi)){ if (doc_matters.src.lng == doc_matters.pod.manifest_list_of_languages[$-1]) { string _path_to_pod = _x.captures["path_to_pod"]; string _podname = _x.captures["podname"]; @@ -298,11 +309,10 @@ auto pod_zip_make_ready(M,P)(M doc_matters, P pths_pod) { _pth_mkup_src_in.copy(fn_src_out_filesystem); // check why here, thought dealt with elsewhere } if (doc_matters.opt.action.pod) { - zip = pod_archive("file_path_text", _pth_mkup_src_in, _pth_mkup_src_out, zip); + zip = podArchive("file_path_text", _pth_mkup_src_in, _pth_mkup_src_out, zip); } } else { - if (doc_matters.opt.action.debug_do_pod - && doc_matters.opt.action.vox_gt1) { + if (doc_matters.opt.action.debug_do_pod && doc_matters.opt.action.vox_gt1) { writeln("WARNING (io) src out NOT found (insert file): ", _pth_mkup_src_in); } } @@ -333,11 +343,10 @@ auto pod_zip_make_ready(M,P)(M doc_matters, P pths_pod) { fn_src_in.copy(fn_src_out_filesystem); } if (doc_matters.opt.action.pod) { - zip = pod_archive("file_path_text", fn_src_in, fn_src_out_pod_zip_base, zip); + zip = podArchive("file_path_text", fn_src_in, fn_src_out_pod_zip_base, zip); } } else { - if (doc_matters.opt.action.debug_do_pod - && doc_matters.opt.action.vox_gt1) { + if (doc_matters.opt.action.debug_do_pod && doc_matters.opt.action.vox_gt1) { writeln("WARNING (io) src out NOT found (insert file): ", fn_src_in); } } @@ -345,24 +354,28 @@ auto pod_zip_make_ready(M,P)(M doc_matters, P pths_pod) { } } } - auto t = tuple(zip, fn_pod, _digests); - return t; + { + _st.zip = zip; + _st.fn_pod = fn_pod; + _st.digests = _digests; + } + return _st; } #+END_SRC ** mkdir :mkdir: +- create directory structure + #+NAME: source_pod_mkdirs #+BEGIN_SRC d -void pod_archive_directory_tree(M,P)(M doc_matters, P pths_pod) { // create directory structure +void podArchive_directory_tree(M,P)(M doc_matters, P pths_pod) { // create directory structure if (!exists(pths_pod.pod_dir_())) { // used both by pod zipped (& pod filesystem (unzipped) which makes its own recursive dirs) pths_pod.pod_dir_().mkdirRecurse; } if (doc_matters.opt.action.source_or_pod) { - if (doc_matters.opt.action.vox_gt0) { - writeln(" ", pths_pod.fn_pod_filelist(doc_matters.src.filename).filesystem_open_zpod); - } + // if (doc_matters.opt.action.vox_gt0) { writeln(" ", pths_pod.fn_pod_filelist(doc_matters.src.filename).filesystem_open_zpod); } if (!exists(pths_pod.text_root(doc_matters.src.filename).filesystem_open_zpod)) { pths_pod.text_root(doc_matters.src.filename).filesystem_open_zpod.mkdirRecurse; } @@ -401,7 +414,7 @@ void pod_archive_directory_tree(M,P)(M doc_matters, P pths_pod) { // create dire #+NAME: source_pod_archive_zip #+BEGIN_SRC d -@system auto pod_archive(Z)( +@system auto podArchive(Z)( string _source_type, string _data_in, string _pth_out, @@ -454,6 +467,7 @@ void zipArchive(M,F,Z)(M doc_matters, F fn_pod, Z zip) { void zipArchiveDigest(M,F,D)(M doc_matters, F fn_pod, D _digests) { import sisudoc.io_out.paths_output; auto pths_pod = spinePathsPods!()(doc_matters); + char[] _zip_digest; try { if (!exists(pths_pod.pod_dir_())) { // used both by pod zipped (& pod filesystem (unzipped) which makes its own recursive dirs) @@ -463,23 +477,20 @@ void zipArchiveDigest(M,F,D)(M doc_matters, F fn_pod, D _digests) { // Handle error } try { - writeln(pths_pod.pod_dir_(), "/", doc_matters.src.filename_base, ".digests.txt"); + // if (doc_matters.opt.action.vox_gt1) { writeln(" ", pths_pod.pod_dir_(), "/", doc_matters.src.filename_base, ".digests.txt"); } string _digest_fn = pths_pod.pod_dir_() ~ "/" ~ doc_matters.src.filename_base ~ ".digests.txt"; - writeln(_digest_fn); + // if (doc_matters.opt.action.vox_gt1) { writeln(_digest_fn); } auto f = File(_digest_fn, "w"); if (exists(fn_pod)) { try { - if (doc_matters.opt.action.vox_gt0 - && doc_matters.opt.action.pod) { - auto data = (cast(byte[]) (fn_pod).read); - if (doc_matters.opt.action.vox_gt1) { - writeln(doc_matters.src.filename, " > ", doc_matters.src.filename_base, ".zip"); - } - if (doc_matters.opt.action.pod) { - auto _zip_digest = (data.sha256Of.toHexString ~ "::" ~ data.length.to!string ~ " - " ~ doc_matters.src.filename_base ~ ".zip"); - writeln(_zip_digest); - f.writeln(_zip_digest); - } + auto data = (cast(byte[]) (fn_pod).read); + // if (doc_matters.opt.action.vox_gt1) { writeln(" ", doc_matters.src.filename, " > ", doc_matters.src.filename_base, ".zip"); } + if (doc_matters.opt.action.pod) { + _zip_digest = (data.sha256Of.toHexString ~ "::" ~ data.length.to!string ~ " - " ~ doc_matters.src.filename_base ~ ".zip"); + if (doc_matters.opt.action.vox_gt0) { writeln(" ", _zip_digest); } + if (doc_matters.opt.action.vox_gt0) { writeln(" ", pths_pod.pod_dir_(), "/", doc_matters.src.filename_base, "/"); } + if (doc_matters.opt.action.vox_gt0) { writeln(" ", _digest_fn); } + f.writeln(_zip_digest); } } catch (ErrnoException ex) { // Handle errors @@ -487,19 +498,19 @@ void zipArchiveDigest(M,F,D)(M doc_matters, F fn_pod, D _digests) { } foreach (_lang; doc_matters.pod.manifest_list_of_languages) { if (_lang in _digests) { - if (("sst" in _digests[_lang]) && (_digests[_lang]["sst"].length > 0)) { - writeln(_digests[_lang]["sst"]); - f.writeln(_digests[_lang]["sst"]); + if (("sstm" in _digests[_lang]) && (_digests[_lang]["sstm"].length > 0)) { + // if (doc_matters.opt.action.vox_gt1) { writeln(_digests[_lang]["sstm"]); } + f.writeln(_digests[_lang]["sstm"]); } if (("ssi" in _digests[_lang]) && (_digests[_lang]["ssi"].length > 0)) { - writeln(_digests[_lang]["ssi"]); + // if (doc_matters.opt.action.vox_gt1) { writeln(_digests[_lang]["ssi"]); } f.writeln(_digests[_lang]["ssi"]); } } } if ("shared" in _digests) { if (("images" in _digests["shared"]) && (_digests["shared"]["images"].length > 0)) { - writeln(_digests["shared"]["images"]); + // if (doc_matters.opt.action.vox_gt1) { writeln(_digests["shared"]["images"]); } f.writeln(_digests["shared"]["images"]); } } diff --git a/org/output_hub.org b/org/output_hub.org index b73c582..541cc44 100644 --- a/org/output_hub.org +++ b/org/output_hub.org @@ -51,9 +51,7 @@ template outputHub() { <> <> } - if (doc_matters.opt.action.vox_gt0) { - writeln(" ", doc_matters.src.filename_base); - } + if (doc_matters.opt.action.vox_gt0) { writeln(doc_matters.src.filename_base); } if (!(doc_matters.opt.action.parallelise_subprocesses)) { foreach(schedule; doc_matters.opt.action.output_task_scheduler) { Scheduled!()(schedule, doc_abstraction, doc_matters); @@ -136,17 +134,11 @@ import sisudoc.io_out, #+BEGIN_SRC d if (sched == outTask.source_or_pod) { msg.v("spine (doc reform) source processing... "); - if (doc_matters.opt.action.pod) { - msg.v("spine (doc reform) source pod processing... "); - } + if (doc_matters.opt.action.pod) { msg.v("spine (doc reform) source pod processing... "); } import sisudoc.io_out.source_pod; spinePod!()(doc_matters); - if (doc_matters.opt.action.source) { - msg.vv("spine (doc reform) source done"); - } - if (doc_matters.opt.action.pod) { - msg.vv("spine (doc reform) source pod done"); - } + if (doc_matters.opt.action.source) { msg.vv("spine (doc reform) source done"); } + if (doc_matters.opt.action.pod) { msg.vv("spine (doc reform) source pod done"); } } #+END_SRC diff --git a/org/spine.org b/org/spine.org index 8ccaa0f..d185adb 100644 --- a/org/spine.org +++ b/org/spine.org @@ -85,9 +85,7 @@ string program_name = "spine"; } } else { // note cannot parallelise sqlite shared db foreach(manifest; _manifests[1..$]) { - if (_opt_action.vox_gt2) { - writeln("parallelisation off: actions include sqlite shared db"); - } + if (_opt_action.vox_gt2) { writeln("parallelisation off: actions include sqlite shared db"); } if (!empty(manifest.src.filename)) { <> <> @@ -848,11 +846,19 @@ struct OptActions { bool _is; if (opts["serial"] == true) { _is = false; - } else if (sqlite_shared_db_action) { + } else if ( + sqlite_shared_db_action + || source_or_pod + ) { _is = false; } else if (opts["parallel"] == true) { _is = true; - if (sqlite_shared_db_action) { _is = false; } + if ( + sqlite_shared_db_action + || source_or_pod + ) { + _is = false; + } } else if ( opts["abstraction"] || concordance @@ -862,7 +868,6 @@ struct OptActions { || odt || latex || manifest - || source_or_pod || sqlite_discrete ) { _is = true; @@ -1255,12 +1260,7 @@ foreach(arg; args[1..$]) { #+NAME: spine_each_file_do_scope #+BEGIN_SRC d scope(success) { - if (_opt_action.vox_gt0) { - writefln( - "%s", - "-- ~ document complete, ok ~ ------------------------------------", - ); - } + if (_opt_action.vox_gt0) { writeln("-- ~ document complete, ok ~ ------------------------------------"); } } scope(failure) { debug(checkdoc) { -- cgit v1.2.3