From f3806668388a3d955261d9782c2cb5659b0f15e3 Mon Sep 17 00:00:00 2001
From: Ralph Amissah
Date: Wed, 9 Aug 2017 06:09:18 -0400
Subject: sqlite, introduced, far to go
---
dub.sdl | 2 +
dub.selections.json | 1 +
maker.org | 2 +
org/default_paths.org | 33 +
org/output_hub.org | 7 +-
org/output_sqlite.org | 1445 +++++++++++++++++++++++++++++++++++++++++
src/sdp/output/hub.d | 8 +-
src/sdp/output/paths_output.d | 26 +
src/sdp/output/sqlite.d | 1093 +++++++++++++++++++++++++++++++
9 files changed, 2611 insertions(+), 6 deletions(-)
create mode 100644 org/output_sqlite.org
create mode 100644 src/sdp/output/sqlite.d
diff --git a/dub.sdl b/dub.sdl
index 5bc5650..549fc31 100644
--- a/dub.sdl
+++ b/dub.sdl
@@ -9,6 +9,8 @@ targetPath "./bin"
stringImportPaths "./views"
buildRequirements "allowWarnings"
dependency "sdlang-d" version="~>0.10.1"
+dependency "d2sqlite3" version="~>0.13.1" # https://code.dlang.org/packages/d2sqlite3 http://biozic.github.io/d2sqlite3/d2sqlite3.html
+subconfiguration "d2sqlite3" "all-included"
dependency "archive" version="~>0.6.0"
configuration "sdp-release" {
name "release"
diff --git a/dub.selections.json b/dub.selections.json
index d0f8e44..728c834 100644
--- a/dub.selections.json
+++ b/dub.selections.json
@@ -2,6 +2,7 @@
"fileVersion": 1,
"versions": {
"archive": "0.6.0",
+ "d2sqlite3": "0.13.1",
"libinputvisitor": "1.2.2",
"sdlang-d": "0.10.1",
"taggedalgebraic": "0.10.4",
diff --git a/maker.org b/maker.org
index 3d294b5..842c67b 100644
--- a/maker.org
+++ b/maker.org
@@ -597,6 +597,8 @@ targetPath "./bin"
stringImportPaths "./views"
buildRequirements "allowWarnings"
dependency "sdlang-d" version="~>0.10.1"
+dependency "d2sqlite3" version="~>0.13.1" # https://code.dlang.org/packages/d2sqlite3 http://biozic.github.io/d2sqlite3/d2sqlite3.html
+subconfiguration "d2sqlite3" "all-included"
dependency "archive" version="~>0.6.0"
#+END_SRC
diff --git a/org/default_paths.org b/org/default_paths.org
index ddf9bfe..7dfef88 100644
--- a/org/default_paths.org
+++ b/org/default_paths.org
@@ -100,6 +100,7 @@ import sdp.meta.rgx;
<>
<>
<>
+<>
#+END_SRC
** _sisupod_ (output bundled source) :sisupod:
@@ -442,3 +443,35 @@ template SiSUpathsEPUB() {
}
}
#+END_SRC
+
+** _sqlite_ :sqlite:
+
+#+name: template_paths_sqlite
+#+BEGIN_SRC d
+template SiSUpathsSQLite() {
+ mixin SiSUrgxInit;
+ static auto rgx = Rgx();
+ auto SiSUpathsSQLite(Ps,Lng)(
+ Ps src_pth_info,
+ Lng lng,
+ ) {
+ auto out_pth = SiSUoutPaths!()(src_pth_info, lng);
+ string base_dir = "sqlite";
+ struct _PathsStruct {
+ string base_filename(string fn_src) {
+ return fn_src.baseName.stripExtension;
+ }
+ string base() {
+ return (out_pth.output_base).chainPath(base_dir).array;
+ }
+ string seg(string fn_src) {
+ return base.chainPath(base_filename(fn_src)).array;
+ }
+ string sqlite_file(string fn_src) {
+ return base.chainPath(base_filename(fn_src) ~ ".sqlite").array;
+ }
+ }
+ return _PathsStruct();
+ }
+}
+#+END_SRC
diff --git a/org/output_hub.org b/org/output_hub.org
index 8f06194..037a169 100644
--- a/org/output_hub.org
+++ b/org/output_hub.org
@@ -49,6 +49,7 @@ template outputHubOp() {
import sdp.output,
sdp.output.epub3,
sdp.output.html,
+ sdp.output.sqlite,
sdp.output.xmls,
sdp.output.source_sisupod,
sdp.output.create_zip_file,
@@ -166,7 +167,7 @@ if (doc_matters.opt_action["odt"]) {
#+BEGIN_SRC d
if (doc_matters.opt_action["sqlite"]) {
if ((doc_matters.opt_action["verbose"])) { writeln("sqlite processing... "); }
- // SQLtableLoad!()(doc_abstraction, doc_matters);
+ SQLiteBuildTablesAndPopulate!()(doc_abstraction, doc_matters);
}
#+END_SRC
@@ -176,11 +177,11 @@ if (doc_matters.opt_action["sqlite"]) {
#+BEGIN_SRC d
if ((config["sqlite-create"])) {
if ((config["verbose"])) { writeln("sqlite create table... "); }
- // SQLtableCreate!()();
+ SQLiteTablesCreate!()();
}
if ((config["sqlite-drop"])) {
if ((config["verbose"])) { writeln("sqlite drop table... "); }
- // SQLtableDrop!()();
+ SQLiteTablesDrop!()();
}
#+END_SRC
diff --git a/org/output_sqlite.org b/org/output_sqlite.org
new file mode 100644
index 0000000..d3f6745
--- /dev/null
+++ b/org/output_sqlite.org
@@ -0,0 +1,1445 @@
+#+TITLE: sdp output html
+#+AUTHOR: Ralph Amissah
+#+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]]
+#+DESCRIPTION for documents - structuring, publishing in multiple formats and search
+#+KEYWORDS
+#+LANGUAGE: en
+#+STARTUP: indent content
+#+OPTIONS: H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t
+#+OPTIONS: TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc
+#+OPTIONS: author:nil email:nil creator:nil timestamp:nil
+#+PROPERTY: header-args :padline no :exports code :noweb yes
+#+EXPORT_SELECT_TAGS: export
+#+EXPORT_EXCLUDE_TAGS: noexport
+#+FILETAGS: :sdp:niu:output:
+#+TAGS: assert(a) class(c) debug(d) mixin(m) sdp(s) tangle(T) template(t) WEB(W) noexport(n)
+
+* sql
+** 0. module, templates
+
+#+BEGIN_SRC d :tangle ../src/sdp/output/sqlite.d
+module sdp.output.sqlite;
+<>
+template SQLiteBuildTablesAndPopulate() {
+ void SQLiteBuildTablesAndPopulate(D,I)(
+ auto ref const D doc_abstraction,
+ auto ref I doc_matters,
+ ) {
+ import d2sqlite3;
+ import std.typecons : Nullable;
+ import sdp.output.sqlite;
+ mixin SiSUoutputRgxInit;
+ static auto rgx = Rgx();
+ template SQLiteFormatAndLoadObject() {
+ auto SQLiteFormatAndLoadObject(I)(
+ auto ref I doc_matters,
+ ) {
+ mixin SiSUoutputRgxInit;
+ struct sqlite_format_and_load_objects {
+ <>
+ <>
+ <>
+ <>
+ <>
+ }
+ return sqlite_format_and_load_objects();
+ }
+ }
+ template SQLiteInstruct() {
+ Statement SQLiteInstruct(I)(
+ auto ref I doc_matters,
+ ) {
+ <>
+ <>
+ <>
+ <>
+ <>
+ <>
+ <>
+ <>
+ <>
+ }
+ }
+ template SQLiteObjectsLoop() {
+ void SQLiteObjectsLoop(P)(
+ auto ref P doc_parts,
+ ) {
+ <>
+ }
+ }
+ SQLiteObjectsLoop!()(doc_matters.keys_seq.sql);
+ }
+}
+template SQLiteTablesCreate() {
+ void SQLiteTablesCreate()() {
+ writeln("table create");
+ }
+}
+template SQLiteTablesDrop() {
+ void SQLiteTablesDrop()() {
+ writeln("table drop");
+ }
+}
+#+END_SRC
+
+** 1. imports
+
+#+name: output_imports
+#+BEGIN_SRC d
+import sdp.output;
+import
+ std.file,
+ std.conv : to;
+import sdp.output.sqlite;
+#+END_SRC
+
+** 2. format and load template
+*** 1. prepare objects (munge, sanitize, markup)
+**** 1. _text_ generic munge (sanitize text for search)
+
+- [3/4] (search text, applies to all but code blocks)
+ - [ ] remove whitespace, paragraph on single line (formatting kept so far)
+ - [X] remove font face attributes
+ - [X] move embedded endnotes
+ - [X] place after text object
+ - [X] remove embedded endnote numbers (rely on html output to represent)
+ - [X] urls
+ - [X] clean url markers, leave plain link text
+ - [X] place urls after text object and its endnotes
+
+#+name: sanitize_text_for_search
+#+BEGIN_SRC d
+auto generic_munge_sanitize_text_for_search(
+ string _txt,
+) {
+ string _notes;
+ string _urls;
+ _txt = _txt.replaceAll(rgx.inline_fontface_clean, "");
+ if (_txt.matchFirst(rgx.inline_notes_al_gen)) {
+ foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) {
+ _notes ~= "\n" ~ m["text"];
+ }
+ _txt = _txt.replaceAll(rgx.inline_notes_al_gen, "");
+ }
+ if (_txt.matchFirst(rgx.inline_link)) {
+ foreach (m; _txt.matchAll(rgx.inline_link)) {
+ if (m["link"].match(rgx.url)) {
+ _urls ~= "\n" ~ m["link"];
+ }
+ }
+ _txt = _txt.replaceAll(rgx.inline_link_clean, "");
+ }
+ if (_notes.length > 0) {
+ _txt ~= _notes;
+ }
+ if (_urls.length > 0) {
+ _txt ~= _urls;
+ }
+ if (doc_matters.opt_action["debug"]) {
+ writeln(_txt, "\n");
+ }
+ debug(sql_text_clean) {
+ writeln(_txt);
+ }
+ return _txt;
+}
+#+END_SRC
+
+**** 2. _html_
+
+light html objects
+- place endnotes after text object
+- light inline html markup
+
+***** munge
+****** general munge (special characters, inline markup, move notes)
+
+#+name: sanitize_and_mmunge_inline_html
+#+BEGIN_SRC d
+auto munge_html(O)(
+ auto return ref const O obj,
+) {
+ string _html_special_characters(string _txt){
+ _txt = (_txt)
+ .replaceAll(rgx.xhtml_ampersand, "&")
+ .replaceAll(rgx.xhtml_quotation, """)
+ .replaceAll(rgx.xhtml_less_than, "<")
+ .replaceAll(rgx.xhtml_greater_than, ">")
+ .replaceAll(rgx.nbsp_char, " ")
+ .replaceAll(rgx.xhtml_line_break, "
");
+ return _txt;
+ }
+ string _html_font_face(string _txt){
+ _txt = (_txt)
+ .replaceAll(rgx.inline_emphasis, ("$1"))
+ .replaceAll(rgx.inline_bold, ("$1"))
+ .replaceAll(rgx.inline_underscore, ("$1"))
+ .replaceAll(rgx.inline_italics, ("$1"))
+ .replaceAll(rgx.inline_superscript, ("$1"))
+ .replaceAll(rgx.inline_subscript, ("$1"))
+ .replaceAll(rgx.inline_strike, ("$1"))
+ .replaceAll(rgx.inline_insert, ("$1"))
+ .replaceAll(rgx.inline_mono, ("$1"))
+ .replaceAll(rgx.inline_cite, ("$1"));
+ return _txt;
+ }
+ string _notes;
+ string _urls;
+ string _txt = _html_font_face(_html_special_characters(obj.text));
+ if (_txt.matchFirst(rgx.inline_notes_al_gen)) {
+ foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) {
+ _notes ~= "\n" ~ m["text"];
+ }
+ _txt = _txt.replaceAll(rgx.inline_notes_al_gen_ref, "$1 ");
+ }
+ if (_txt.matchFirst(rgx.inline_link)) {
+ foreach (m; _txt.matchAll(rgx.inline_link)) {
+ }
+ _txt = _txt.replaceAll(rgx.inline_link_clean, "");
+ }
+ if (_notes.length > 0) {
+ _txt ~= _notes;
+ }
+ if (doc_matters.opt_action["debug"]) {
+ writeln(_txt, "\n");
+ }
+ return _txt;
+}
+#+END_SRC
+
+****** special characters
+
+#+name: sanitize_and_mmunge_inline_html
+#+BEGIN_SRC d
+string html_special_characters(string _txt){
+ _txt = (_txt)
+ .replaceAll(rgx.xhtml_ampersand, "&")
+ .replaceAll(rgx.xhtml_quotation, """)
+ .replaceAll(rgx.xhtml_less_than, "<")
+ .replaceAll(rgx.xhtml_greater_than, ">")
+ .replaceAll(rgx.nbsp_char, " ")
+ .replaceAll(rgx.xhtml_line_break, "
");
+ return _txt;
+}
+#+END_SRC
+
+****** special characters for code
+
+#+name: sanitize_and_mmunge_inline_html
+#+BEGIN_SRC d
+string html_special_characters_code(string _txt){
+ _txt = (_txt)
+ .replaceAll(rgx.xhtml_ampersand, "&")
+ .replaceAll(rgx.xhtml_quotation, """)
+ .replaceAll(rgx.xhtml_less_than, "<")
+ .replaceAll(rgx.xhtml_greater_than, ">")
+ .replaceAll(rgx.nbsp_char, " ");
+ return _txt;
+}
+#+END_SRC
+
+****** font_face
+
+#+name: sanitize_and_mmunge_inline_html
+#+BEGIN_SRC d
+string html_font_face(string _txt){
+ _txt = (_txt)
+ .replaceAll(rgx.inline_emphasis, ("$1"))
+ .replaceAll(rgx.inline_bold, ("$1"))
+ .replaceAll(rgx.inline_underscore, ("$1"))
+ .replaceAll(rgx.inline_italics, ("$1"))
+ .replaceAll(rgx.inline_superscript, ("$1"))
+ .replaceAll(rgx.inline_subscript, ("$1"))
+ .replaceAll(rgx.inline_strike, ("$1"))
+ .replaceAll(rgx.inline_insert, ("$1"))
+ .replaceAll(rgx.inline_mono, ("$1"))
+ .replaceAll(rgx.inline_cite, ("$1"));
+ return _txt;
+}
+#+END_SRC
+
+***** objects
+****** heading
+
+#+name: html_objects
+#+BEGIN_SRC d
+auto html_heading(O)(
+ auto return ref const O obj,
+) {
+ string _txt = munge_html(obj);
+ string o = format(q"¶
+ %s
+
¶",
+ obj.is_a,
+ _txt,
+ );
+ return o;
+}
+#+END_SRC
+
+******* +fancy+
+
+##+name: prepare_objects_html
+#+BEGIN_SRC d
+auto html_heading(O)(
+ auto return ref const O obj,
+) {
+ string o;
+ string _txt = munge_html(obj);
+ o = format(q"¶
+ %s
+ ¶",
+ obj.heading_lev_markup,
+ obj.is_a,
+ _txt,
+ obj.heading_lev_markup,
+ );
+ return o;
+}
+#+END_SRC
+
+****** para
+
+#+name: html_objects
+#+BEGIN_SRC d
+auto html_para(O)(
+ auto return ref const O obj,
+) {
+ string _txt = munge_html(obj);
+ _txt = (obj.bullet) ? ("● " ~ _txt) : _txt;
+ string o = format(q"¶
+ %s
+
¶",
+ obj.is_a,
+ obj.indent_hang,
+ obj.indent_base,
+ _txt
+ );
+ return o;
+}
+#+END_SRC
+
+****** quote
+
+#+name: html_objects
+#+BEGIN_SRC d
+auto html_quote(O)(
+ auto return ref const O obj,
+) {
+ string _txt = munge_html(obj);
+ string o = format(q"¶
+ %s
+
¶",
+ obj.is_a,
+ _txt
+ );
+ return o;
+}
+#+END_SRC
+
+****** group
+
+#+name: html_objects
+#+BEGIN_SRC d
+auto html_group(O)(
+ auto return ref const O obj,
+) {
+ string _txt = munge_html(obj);
+ string o = format(q"¶
+ %s
+
¶",
+ obj.is_a,
+ _txt
+ );
+ return o;
+}
+#+END_SRC
+
+****** block
+
+#+name: html_objects
+#+BEGIN_SRC d
+auto html_block(O)(
+ auto return ref const O obj,
+) {
+ string _txt = munge_html(obj);
+ string o = format(q"¶
+ %s
¶",
+ obj.is_a,
+ _txt.stripRight
+ );
+ return o;
+}
+#+END_SRC
+
+****** verse
+
+#+name: html_objects
+#+BEGIN_SRC d
+auto html_verse(O)(
+ auto return ref const O obj,
+) {
+ string _txt = munge_html(obj);
+ string o = format(q"¶%s
¶",
+ obj.is_a,
+ _txt
+ );
+ return o;
+}
+#+END_SRC
+
+****** code
+
+#+name: html_objects
+#+BEGIN_SRC d
+auto html_code(O)(
+ auto return ref const O obj,
+) {
+ string _txt = html_special_characters_code(obj.text);
+ string o = format(q"¶%s
¶",
+ obj.is_a,
+ _txt
+ );
+ return o;
+}
+#+END_SRC
+
+****** table
+
+#+name: html_objects
+#+BEGIN_SRC d
+auto html_table(O)(
+ auto return ref const O obj,
+) {
+ auto _tablarize(O)(
+ auto return ref const O obj,
+ string _txt,
+ ) {
+ string[] _table_rows = (_txt).split(rgx.table_delimiter_row);
+ string[] _table_cols;
+ string _table;
+ string _tablenote;
+ foreach(row_idx, row; _table_rows) {
+ _table_cols = row.split(rgx.table_delimiter_col);
+ _table ~= "";
+ foreach(col_idx, cell; _table_cols) {
+ if ((_table_cols.length == 1)
+ && (_table_rows.length <= row_idx+2)) { // check row_idx+2 (rather than == ++row_idx)
+ _tablenote ~= cell;
+ } else {
+ string _col_is = (row_idx == 0 && obj.table_heading) ? "th" : "td";
+ string _align = ("style=\"text-align:"
+ ~ ((obj.table_column_aligns[col_idx] == "l")
+ ? "left\"" : "right\""));
+ _table ~= "<" ~ _col_is ~ " width=\"" ~ obj.table_column_widths[col_idx].to!string ~ "%\" " ~ _align ~ ">";
+ _table ~= cell;
+ _table ~= "" ~ _col_is ~ ">";
+ }
+ }
+ _table ~= "
";
+ }
+ auto t = tuple(
+ _table,
+ _tablenote,
+ );
+ return t;
+ }
+ string _txt = munge_html(obj);
+ auto t = _tablarize(obj, _txt);
+ _txt = t[0];
+ string _note = t[1];
+ string o = format(q"¶
+
+ %s
+
¶",
+ obj.is_a,
+ _txt,
+ _note
+ );
+ return o;
+}
+#+END_SRC
+
+*** 2. _sqlite_ (loop preparation & pre-loop action)
+***** initialize: prepare statements drop & create db & tables anew
+****** d sqlite statements (initialize database) {
+
+#+name: sqlite_db_initialize
+#+BEGIN_SRC d
+auto pth_sqlite = SiSUpathsSQLite!()(doc_matters.src_path_info, doc_matters.language);
+auto db = Database(pth_sqlite.sqlite_file(doc_matters.source_filename));
+// auto db = Database(":memory:"); // open database in memory
+db.run("
+#+END_SRC
+
+******* 0. drop table if exists
+
+#+name: sqlite_table_initialize
+#+BEGIN_SRC sql
+DROP TABLE IF EXISTS metadata_and_text;
+DROP TABLE IF EXISTS doc_objects;
+DROP TABLE IF EXISTS urls;
+#+END_SRC
+
+******* 1. create tables
+******** metadata & src text
+
+#+name: sqlite_create_table_metadata_and_src_txt
+#+BEGIN_SRC sql
+CREATE TABLE metadata_and_text (
+ tid BIGINT PRIMARY KEY,
+ title VARCHAR(800) NOT NULL,
+ title_main VARCHAR(400) NULL, /*NOT*/
+ title_sub VARCHAR(400) NULL,
+ title_short VARCHAR(400) NULL,
+ title_edition VARCHAR(10) NULL,
+ title_note VARCHAR(2500) NULL,
+ title_language VARCHAR(100) NULL,
+ title_language_char VARCHAR(6) NULL,
+ creator_author VARCHAR(600) NULL,
+ creator_author_hon VARCHAR(100) NULL,
+ creator_author_nationality VARCHAR(100) NULL,
+ creator_editor VARCHAR(600) NULL,
+ creator_contributor VARCHAR(600) NULL,
+ creator_illustrator VARCHAR(600) NULL,
+ creator_photographer VARCHAR(600) NULL,
+ creator_translator VARCHAR(600) NULL,
+ creator_prepared_by VARCHAR(600) NULL,
+ creator_digitized_by VARCHAR(600) NULL,
+ creator_audio VARCHAR(600) NULL,
+ creator_video VARCHAR(600) NULL,
+ language_document VARCHAR(100) NULL,
+ language_document_char VARCHAR(6) NULL, /*NOT*/
+ language_original VARCHAR(100) NULL,
+ language_original_char VARCHAR(6) NULL,
+ date_added_to_site VARCHAR(10) NULL,
+ date_available VARCHAR(10) NULL,
+ date_created VARCHAR(10) NULL,
+ date_issued VARCHAR(10) NULL,
+ date_modified VARCHAR(10) NULL,
+ date_published VARCHAR(10) NULL,
+ date_valid VARCHAR(10) NULL,
+ date_translated VARCHAR(10) NULL,
+ date_original_publication VARCHAR(10) NULL,
+ date_generated VARCHAR(10) NULL,
+ publisher VARCHAR(600) NULL,
+ original_publisher VARCHAR(600) NULL,
+ original_language VARCHAR(100) NULL,
+ original_language_char VARCHAR(6) NULL,
+ original_source VARCHAR(600) NULL,
+ original_institution VARCHAR(600) NULL,
+ original_nationality VARCHAR(100) NULL,
+ rights VARCHAR(2500) NULL,
+ rights_copyright_text VARCHAR(2500) NULL,
+ rights_copyright_translation VARCHAR(2500) NULL,
+ rights_copyright_illustrations VARCHAR(2500) NULL,
+ rights_copyright_photographs VARCHAR(2500) NULL,
+ rights_copyright_preparation VARCHAR(2500) NULL,
+ rights_copyright_digitization VARCHAR(2500) NULL,
+ rights_copyright_audio VARCHAR(2500) NULL,
+ rights_copyright_video VARCHAR(2500) NULL,
+ rights_license VARCHAR(2500) NULL,
+ identifier_oclc VARCHAR(30) NULL,
+ identifier_isbn VARCHAR(16) NULL,
+ classify_topic_register VARCHAR(2500) NULL,
+ classify_subject VARCHAR(600) NULL,
+ classify_loc VARCHAR(30) NULL,
+ classify_dewey VARCHAR(30) NULL,
+ classify_keywords VARCHAR(600) NULL,
+ notes_abstract TEXT NULL,
+ notes_description TEXT NULL,
+ notes_comment TEXT NULL,
+ notes_coverage VARCHAR(200) NULL,
+ notes_relation VARCHAR(200) NULL,
+ notes_history VARCHAR(600) NULL,
+ notes_type VARCHAR(600) NULL,
+ notes_format VARCHAR(600) NULL,
+ notes_prefix TEXT NULL,
+ notes_prefix_a TEXT NULL,
+ notes_prefix_b TEXT NULL,
+ notes_suffix TEXT NULL,
+ src_filename VARCHAR(256) NULL,
+ src_fingerprint VARCHAR(256) NULL,
+ src_filesize VARCHAR(10) NULL,
+ src_wordcount VARCHAR(10) NULL,
+ src_text TEXT NULL,
+ fulltext TEXT NULL,
+ links TEXT NULL
+);
+#+END_SRC
+
+******** doc_objects
+
+#+name: sqlite_create_table_objects
+#+BEGIN_SRC sql
+CREATE TABLE doc_objects (
+ lid BIGINT PRIMARY KEY,
+ metadata_tid BIGINT REFERENCES metadata_and_text,
+ ocn SMALLINT,
+ ocnd VARCHAR(6),
+ ocns VARCHAR(6),
+ clean TEXT NULL,
+ body TEXT NULL,
+ book_idx TEXT NULL,
+ seg VARCHAR(256) NULL,
+ lev_an VARCHAR(1),
+ lev SMALLINT NULL,
+ lev0 SMALLINT,
+ lev1 SMALLINT,
+ lev2 SMALLINT,
+ lev3 SMALLINT,
+ lev4 SMALLINT,
+ lev5 SMALLINT,
+ lev6 SMALLINT,
+ lev7 SMALLINT,
+ en_a SMALLINT NULL,
+ en_z SMALLINT NULL,
+ en_a_asterisk SMALLINT NULL,
+ en_z_asterisk SMALLINT NULL,
+ en_a_plus SMALLINT NULL,
+ en_z_plus SMALLINT NULL,
+ t_of VARCHAR(16),
+ t_is VARCHAR(16),
+ node VARCHAR(16) NULL,
+ parent VARCHAR(16) NULL,
+ digest_clean CHAR(256),
+ digest_all CHAR(256),
+ types CHAR(1) NULL
+)
+#+END_SRC
+
+****** d }
+
+#+name: sqlite_close_initialize
+#+BEGIN_SRC d
+");
+#+END_SRC
+
+***** prepare insert statements and do pre-loop inserts
+****** 1. _metadata & src_ text
+******* sql statement
+******** d { sql statement
+
+#+name: sqlite_insert_metadata_and_src_text
+#+BEGIN_SRC d
+Statement insert_metadata = db.prepare("
+#+END_SRC
+
+tid document number unique
+either:
+- increment by adding 1 for each document,
+- make hash of document filename or url and use?
+
+********* sql insert into
+
+#+name: sqlite_insert_metadata_and_src_text
+#+BEGIN_SRC sql
+ INSERT INTO metadata_and_text (
+ tid,
+ title,
+ title_main,
+ title_sub,
+ title_short,
+ title_edition,
+ title_note,
+ title_language,
+ title_language_char,
+ creator_author,
+ creator_author_hon,
+ creator_author_nationality,
+ creator_editor,
+ creator_contributor,
+ creator_illustrator,
+ creator_photographer,
+ creator_translator,
+ creator_prepared_by,
+ creator_digitized_by,
+ creator_audio,
+ creator_video,
+ language_document,
+ language_document_char,
+ language_original,
+ language_original_char,
+ date_added_to_site,
+ date_available,
+ date_created,
+ date_issued,
+ date_modified,
+ date_published,
+ date_valid,
+ date_translated,
+ date_original_publication,
+ date_generated,
+ publisher,
+ original_publisher,
+ original_language,
+ original_language_char,
+ original_source,
+ original_institution,
+ original_nationality,
+ rights,
+ rights_copyright_text,
+ rights_copyright_translation,
+ rights_copyright_illustrations,
+ rights_copyright_photographs,
+ rights_copyright_preparation,
+ rights_copyright_digitization,
+ rights_copyright_audio,
+ rights_copyright_video,
+ rights_license,
+ identifier_oclc,
+ identifier_isbn,
+ classify_topic_register,
+ classify_subject,
+ classify_loc,
+ classify_dewey,
+ classify_keywords,
+ notes_abstract,
+ notes_description,
+ notes_comment,
+ notes_coverage,
+ notes_relation,
+ notes_history,
+ notes_type,
+ notes_format,
+ notes_prefix,
+ notes_prefix_a,
+ notes_prefix_b,
+ notes_suffix,
+ src_filename,
+ src_fingerprint,
+ src_filesize,
+ src_wordcount,
+ src_text,
+ fulltext,
+ links
+ )
+#+END_SRC
+
+********* sql values
+
+#+name: sqlite_insert_metadata_and_src_text
+#+BEGIN_SRC sql
+ VALUES (
+ :tid,
+ :title,
+ :title_main,
+ :title_sub,
+ :title_short,
+ :title_edition,
+ :title_note,
+ :title_language,
+ :title_language_char,
+ :creator_author,
+ :creator_author_hon,
+ :creator_author_nationality,
+ :creator_editor,
+ :creator_contributor,
+ :creator_illustrator,
+ :creator_photographer,
+ :creator_translator,
+ :creator_prepared_by,
+ :creator_digitized_by,
+ :creator_audio,
+ :creator_video,
+ :language_document,
+ :language_document_char,
+ :language_original,
+ :language_original_char,
+ :date_added_to_site,
+ :date_available,
+ :date_created,
+ :date_issued,
+ :date_modified,
+ :date_published,
+ :date_valid,
+ :date_translated,
+ :date_original_publication,
+ :date_generated,
+ :publisher,
+ :original_publisher,
+ :original_language,
+ :original_language_char,
+ :original_source,
+ :original_institution,
+ :original_nationality,
+ :rights,
+ :rights_copyright_text,
+ :rights_copyright_translation,
+ :rights_copyright_illustrations,
+ :rights_copyright_photographs,
+ :rights_copyright_preparation,
+ :rights_copyright_digitization,
+ :rights_copyright_audio,
+ :rights_copyright_video,
+ :rights_license,
+ :identifier_oclc,
+ :identifier_isbn,
+ :classify_topic_register,
+ :classify_subject,
+ :classify_loc,
+ :classify_dewey,
+ :classify_keywords,
+ :notes_abstract,
+ :notes_description,
+ :notes_comment,
+ :notes_coverage,
+ :notes_relation,
+ :notes_history,
+ :notes_type,
+ :notes_format,
+ :notes_prefix,
+ :notes_prefix_a,
+ :notes_prefix_b,
+ :notes_suffix,
+ :src_filename,
+ :src_fingerprint,
+ :src_filesize,
+ :src_wordcount,
+ :src_text,
+ :fulltext,
+ :links
+ )
+#+END_SRC
+
+******** d }
+
+#+name: sqlite_insert_metadata_and_src_text
+#+BEGIN_SRC d
+");
+#+END_SRC
+
+******* TODO insert document_metadata
+
+#+name: sqlite_insert_metadata_and_src_text
+#+BEGIN_SRC d
+insert_metadata.bind(":title", doc_matters.dochead_meta["title"]["full"]);
+// insert_metadata.bind(":title_main", doc_matters.dochead_meta["title"]["full"]);
+// insert_metadata.bind(":title_sub", doc_matters.dochead_meta["title"][""]);
+// insert_metadata.bind(":title_short", doc_matters.dochead_meta["title"][""]);
+// insert_metadata.bind(":title_edition", doc_matters.dochead_meta["title"][""]);
+// insert_metadata.bind(":title_note", doc_matters.dochead_meta["title"][""]);
+// insert_metadata.bind(":title_language", doc_matters.dochead_meta["title"][""]);
+// insert_metadata.bind(":title_language_char", doc_matters.dochead_meta["title"][""]);
+insert_metadata.bind(":creator_author", doc_matters.dochead_meta["creator"]["author"]);
+// insert_metadata.bind(":creator_author_hon", doc_matters.dochead_meta["creator"][""]);
+// insert_metadata.bind(":creator_author_nationality", doc_matters.dochead_meta["creator"][""]);
+// insert_metadata.bind(":creator_editor", doc_matters.dochead_meta["creator"][""]);
+// insert_metadata.bind(":creator_contributor", doc_matters.dochead_meta["creator"][""]);
+// insert_metadata.bind(":creator_illustrator", doc_matters.dochead_meta["creator"][""]);
+// insert_metadata.bind(":creator_photographer", doc_matters.dochead_meta["creator"][""]);
+// insert_metadata.bind(":creator_translator", doc_matters.dochead_meta["creator"][""]);
+// insert_metadata.bind(":creator_prepared_by", doc_matters.dochead_meta["creator"][""]);
+// insert_metadata.bind(":creator_digitized_by", doc_matters.dochead_meta["creator"][""]);
+// insert_metadata.bind(":creator_audio", doc_matters.dochead_meta["creator"][""]);
+// insert_metadata.bind(":creator_video", doc_matters.dochead_meta["creator"][""]);
+// insert_metadata.bind(":language_document", doc_matters.dochead_meta["language"][""]);
+// insert_metadata.bind(":language_document_char", doc_matters.dochead_meta["language"][""]);
+// insert_metadata.bind(":language_original", doc_matters.dochead_meta["language"][""]);
+// insert_metadata.bind(":language_original_char", doc_matters.dochead_meta["language"][""]);
+// insert_metadata.bind(":date_added_to_site", doc_matters.dochead_meta["date"][""]);
+// insert_metadata.bind(":date_available", doc_matters.dochead_meta["date"][""]);
+// insert_metadata.bind(":date_created", doc_matters.dochead_meta["date"][""]);
+// insert_metadata.bind(":date_issued", doc_matters.dochead_meta["date"][""]);
+// insert_metadata.bind(":date_modified", doc_matters.dochead_meta["date"][""]);
+// insert_metadata.bind(":date_published", doc_matters.dochead_meta["date"][""]);
+// insert_metadata.bind(":date_valid", doc_matters.dochead_meta["date"][""]);
+// insert_metadata.bind(":date_translated", doc_matters.dochead_meta["date"][""]);
+// insert_metadata.bind(":date_original_publication", doc_matters.dochead_meta["date"][""]);
+// insert_metadata.bind(":date_generated", doc_matters.dochead_meta["date"][""]);
+// insert_metadata.bind(":publisher", doc_matters.dochead_meta["publisher"][""]);
+// insert_metadata.bind(":original_publisher", doc_matters.dochead_meta["original"][""]);
+// insert_metadata.bind(":original_language", doc_matters.dochead_meta["original"][""]);
+// insert_metadata.bind(":original_language_char", doc_matters.dochead_meta["original"][""]);
+// insert_metadata.bind(":original_source", doc_matters.dochead_meta["original"][""]);
+// insert_metadata.bind(":original_institution", doc_matters.dochead_meta["original"][""]);
+// insert_metadata.bind(":original_nationality", doc_matters.dochead_meta["original"][""]);
+// insert_metadata.bind(":rights", doc_matters.dochead_meta["rights"][""]);
+// insert_metadata.bind(":rights_copyright_text", doc_matters.dochead_meta["rights"][""]);
+// insert_metadata.bind(":rights_copyright_translation", doc_matters.dochead_meta["rights"][""]);
+// insert_metadata.bind(":rights_copyright_illustrations", doc_matters.dochead_meta["rights"][""]);
+// insert_metadata.bind(":rights_copyright_photographs", doc_matters.dochead_meta["rights"][""]);
+// insert_metadata.bind(":rights_copyright_preparation", doc_matters.dochead_meta["rights"][""]);
+// insert_metadata.bind(":rights_copyright_digitization", doc_matters.dochead_meta["rights"][""]);
+// insert_metadata.bind(":rights_copyright_audio", doc_matters.dochead_meta["rights"][""]);
+// insert_metadata.bind(":rights_copyright_video", doc_matters.dochead_meta["rights"][""]);
+// insert_metadata.bind(":rights_license", doc_matters.dochead_meta["rights"][""]);
+// insert_metadata.bind(":identifier_oclc", doc_matters.dochead_meta["identifier"][""]);
+// insert_metadata.bind(":identifier_isbn", doc_matters.dochead_meta["identifier"][""]);
+// insert_metadata.bind(":classify_topic_register", doc_matters.dochead_meta["classify"][""]);
+// insert_metadata.bind(":classify_subject", doc_matters.dochead_meta["classify"][""]);
+// insert_metadata.bind(":classify_loc", doc_matters.dochead_meta["classify"][""]);
+// insert_metadata.bind(":classify_loc", doc_matters.dochead_meta["classify"][""]);
+// insert_metadata.bind(":notes_abstract", doc_matters.dochead_meta["notes"][""]);
+// insert_metadata.bind(":notes_description", doc_matters.dochead_meta["notes"][""]);
+// insert_metadata.bind(":notes_comment", doc_matters.dochead_meta["notes"][""]);
+// insert_metadata.bind(":notes_coverage", doc_matters.dochead_meta["notes"][""]);
+// insert_metadata.bind(":notes_relation", doc_matters.dochead_meta["notes"][""]);
+// insert_metadata.bind(":notes_history", doc_matters.dochead_meta["notes"][""]);
+// insert_metadata.bind(":notes_type", doc_matters.dochead_meta["notes"][""]);
+// insert_metadata.bind(":notes_format", doc_matters.dochead_meta["notes"][""]);
+// insert_metadata.bind(":notes_prefix", doc_matters.dochead_meta["notes"][""]);
+// insert_metadata.bind(":notes_prefix_a", doc_matters.dochead_meta["notes"][""]);
+// insert_metadata.bind(":notes_prefix_b", doc_matters.dochead_meta["notes"][""]);
+// insert_metadata.bind(":notes_suffix", doc_matters.dochead_meta["notes"][""]);
+// insert_metadata.bind(":src_filename", doc_matters.dochead_meta["src"][""]);
+// insert_metadata.bind(":src_fingerprint", doc_matters.dochead_meta["src"][""]);
+// insert_metadata.bind(":src_filesize", doc_matters.dochead_meta["src"][""]);
+// insert_metadata.bind(":src_wordcount", doc_matters.dochead_meta["src"][""]);
+// insert_metadata.bind(":src_text", doc_matters.dochead_meta["src"][""]);
+// insert_metadata.bind(":fulltext", doc_matters.dochead_meta["fulltext"][""]);
+// insert_metadata.bind(":links", doc_matters.dochead_meta["links"][""]);
+insert_metadata.execute(); insert_metadata.reset();
+/+ watch +/
+writeln("sql statement executed");
+assert(db.totalChanges == 1);
+#+END_SRC
+
+###+name: sqlite_insert_metadata_and_src_text
+#+BEGIN_SRC d
+insert_metadata.inject(
+ 1,
+ doc_matters.dochead_meta["title"]["full"],
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ doc_matters.dochead_meta["creator"]["author"]
+);
+#+END_SRC
+
+****** 2. _doc objects_ (used with doc_objects in document loop)
+******* prepare sql statement
+******** d { sql statement
+
+#+name: sqlite_insert_doc_objects
+#+BEGIN_SRC d
+Statement insert_doc_objects = db.prepare("
+#+END_SRC
+
+********* sql insert into
+
+lid unique, increment by 1 per object, not ocn
+
+metadata tid document number unique
+either:
+- increment by adding 1 for each document,
+- make hash of document filename or url and use?
+
+#+name: sqlite_insert_doc_objects
+#+BEGIN_SRC sql
+ INSERT INTO doc_objects (
+ lid,
+ metadata_tid,
+ ocn,
+ ocnd,
+ ocns,
+ clean,
+ body,
+ book_idx,
+ seg,
+ lev_an,
+ lev,
+ lev0,
+ lev1,
+ lev2,
+ lev3,
+ lev4,
+ lev5,
+ lev6,
+ lev7,
+ en_a,
+ en_z,
+ en_a_asterisk,
+ en_z_asterisk,
+ en_a_plus,
+ en_z_plus,
+ t_of,
+ t_is,
+ node,
+ parent,
+ digest_clean,
+ digest_all,
+ types
+ )
+#+END_SRC
+
+********* sql values
+
+#+name: sqlite_insert_doc_objects
+#+BEGIN_SRC sql
+ VALUES (
+ :lid,
+ :metadata_tid,
+ :ocn,
+ :ocnd,
+ :ocns,
+ :clean,
+ :body,
+ :book_idx,
+ :seg,
+ :lev_an,
+ :lev,
+ :lev0,
+ :lev1,
+ :lev2,
+ :lev3,
+ :lev4,
+ :lev5,
+ :lev6,
+ :lev7,
+ :en_a,
+ :en_z,
+ :en_a_asterisk,
+ :en_z_asterisk,
+ :en_a_plus,
+ :en_z_plus,
+ :t_of,
+ :t_is,
+ :node,
+ :parent,
+ :digest_clean,
+ :digest_all,
+ :types
+ )
+#+END_SRC
+
+******** d }
+
+#+name: sqlite_insert_doc_objects
+#+BEGIN_SRC d
+");
+return insert_doc_objects;
+#+END_SRC
+
+******* TODO (within loop not here - insert doc objects
+
+work out
+
+*** 3. hub (sqlite_format_and_load_objects)
+***** sql related
+
+#+name: sqlite_load_object
+#+BEGIN_SRC d
+auto sqlite_load_string(O,Dm)(
+ auto return ref const O obj,
+ auto return ref Dm doc_matters,
+) {
+ string o;
+ return o;
+}
+#+END_SRC
+
+#+name: sqlite_load_object
+#+BEGIN_SRC d
+auto postgresql_load_string(O,Dm)(
+ auto return ref const O obj,
+ auto return ref Dm doc_matters,
+) {
+ string o;
+ return o;
+}
+#+END_SRC
+
+#+name: sqlite_load_object
+#+BEGIN_SRC d
+string sqlite_statement(O)(
+ auto return ref const O obj,
+ string _txt,
+ string _html,
+) {
+ void _sql_exe(O)(
+ string _sql,
+ ) {
+ writeln(_html);
+ writeln(_sql);
+ }
+ string _sql;
+ return _sql;
+}
+#+END_SRC
+
+***** heading
+
+#+name: hub_format_and_sqlite_load_objects
+#+BEGIN_SRC d
+auto heading(O)(
+ auto return ref const O obj,
+) {
+ string[string] obj_txt = [
+ "text": generic_munge_sanitize_text_for_search(obj.text),
+ "html": html_heading(obj)
+ ];
+ if (doc_matters.opt_action["debug"]) {
+ debug(sql_txt) {
+ writeln(obj_txt["text"]);
+ }
+ debug(sql_html) {
+ writeln(obj_txt["html"]);
+ }
+ } else {
+ // load sql
+ }
+ return obj_txt;
+}
+#+END_SRC
+
+***** para
+
+#+name: hub_format_and_sqlite_load_objects
+#+BEGIN_SRC d
+auto para(O)(
+ auto return ref const O obj,
+) {
+ string[string] obj_txt = [
+ "text": generic_munge_sanitize_text_for_search(obj.text),
+ "html": html_para(obj)
+ ];
+ if (doc_matters.opt_action["debug"]) {
+ debug(sql_txt) {
+ writeln(obj_txt["text"]);
+ }
+ debug(sql_html) {
+ writeln(obj_txt["html"]);
+ }
+ } else {
+ // load sql
+ }
+ return obj_txt;
+}
+#+END_SRC
+
+***** quote
+
+#+name: hub_format_and_sqlite_load_objects
+#+BEGIN_SRC d
+auto quote(O)(
+ auto return ref const O obj,
+) {
+ string[string] obj_txt = [
+ "text": generic_munge_sanitize_text_for_search(obj.text),
+ "html": html_quote(obj)
+ ];
+ if (doc_matters.opt_action["debug"]) {
+ debug(sql_txt) {
+ writeln(obj_txt["text"]);
+ }
+ debug(sql_html) {
+ writeln(obj_txt["html"]);
+ }
+ } else {
+ // load sql
+ }
+ return obj_txt;
+}
+#+END_SRC
+***** group
+
+#+name: hub_format_and_sqlite_load_objects
+#+BEGIN_SRC d
+auto group(O)(
+ auto return ref const O obj,
+) {
+ string[string] obj_txt = [
+ "text": generic_munge_sanitize_text_for_search(obj.text),
+ "html": html_group(obj)
+ ];
+ if (doc_matters.opt_action["debug"]) {
+ debug(sql_txt) {
+ writeln(obj_txt["text"]);
+ }
+ debug(sql_html) {
+ writeln(obj_txt["html"]);
+ }
+ } else {
+ // load sql
+ }
+ return obj_txt;
+}
+#+END_SRC
+
+***** block
+
+#+name: hub_format_and_sqlite_load_objects
+#+BEGIN_SRC d
+auto block(O)(
+ auto return ref const O obj,
+) {
+ string[string] obj_txt = [
+ "text": generic_munge_sanitize_text_for_search(obj.text),
+ "html": html_block(obj)
+ ];
+ if (doc_matters.opt_action["debug"]) {
+ debug(sql_txt) {
+ writeln(obj_txt["text"]);
+ }
+ debug(sql_html) {
+ writeln(obj_txt["html"]);
+ }
+ } else {
+ // load sql
+ }
+ return obj_txt;
+}
+#+END_SRC
+
+***** verse
+
+#+name: hub_format_and_sqlite_load_objects
+#+BEGIN_SRC d
+auto verse(O)(
+ auto return ref const O obj,
+) {
+ string[string] obj_txt = [
+ "text": generic_munge_sanitize_text_for_search(obj.text),
+ "html": html_verse(obj)
+ ];
+ if (doc_matters.opt_action["debug"]) {
+ debug(sql_txt) {
+ writeln(obj_txt["text"]);
+ }
+ debug(sql_html) {
+ writeln(obj_txt["html"]);
+ }
+ } else {
+ // load sql
+ }
+ return obj_txt;
+}
+#+END_SRC
+
+***** code
+
+#+name: hub_format_and_sqlite_load_objects
+#+BEGIN_SRC d
+auto code(O)(
+ auto return ref const O obj,
+) {
+ string[string] obj_txt = [
+ "text": generic_munge_sanitize_text_for_search(obj.text),
+ "html": html_code(obj)
+ ];
+ if (doc_matters.opt_action["debug"]) {
+ debug(sql_txt) {
+ writeln(obj_txt["text"]);
+ }
+ debug(sql_html) {
+ writeln(obj_txt["html"]);
+ }
+ } else {
+ // load sql
+ }
+ return obj_txt;
+}
+#+END_SRC
+
+***** table
+
+#+name: hub_format_and_sqlite_load_objects
+#+BEGIN_SRC d
+auto table(O)(
+ auto return ref const O obj,
+) {
+ string[string] obj_txt = [
+ "text": generic_munge_sanitize_text_for_search(obj.text),
+ "html": html_table(obj)
+ ];
+ if (doc_matters.opt_action["debug"]) {
+ debug(sql_txt) {
+ writeln(obj_txt["text"]);
+ }
+ debug(sql_html) {
+ writeln(obj_txt["html"]);
+ }
+ } else {
+ // load sql
+ }
+ return obj_txt;
+}
+#+END_SRC
+
+** 3. loop, identify, load - loop template
+
+#+name: sqlite_objects_loop
+#+BEGIN_SRC d
+Statement insert_doc_objects = SQLiteInstruct!()(doc_matters);
+auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters);
+string[string] obj_txt;
+string doc_text;
+foreach (part; doc_parts) {
+ foreach (obj; doc_abstraction[part]) {
+ switch (obj.of_part) {
+ case "frontmatter": assert(part == "head");
+ switch (obj.is_of) {
+ case "para":
+ switch (obj.is_a) {
+ case "heading":
+ obj_txt = format_and_sqlite_load.heading(obj);
+ break;
+ default:
+ if ((doc_matters.opt_action["debug"])) {
+ writeln(__FILE__, ":", __LINE__, ": ", obj.is_a);
+ }
+ break;
+ }
+ break;
+ default:
+ if ((doc_matters.opt_action["debug"])) {
+ writeln(__FILE__, ":", __LINE__, ": ", obj.is_of);
+ }
+ break;
+ }
+ break;
+ case "body": // assert(part == "body"); // TODO broken
+ switch (obj.is_of) {
+ case "para":
+ switch (obj.is_a) {
+ case "heading":
+ debug (asserts) { // TODO consider and fix or remove
+ if (part != "body") {
+ writeln(__LINE__, ": ", obj.text);
+ }
+ }
+ obj_txt = format_and_sqlite_load.heading(obj);
+ break;
+ case "para":
+ obj_txt = format_and_sqlite_load.para(obj);
+ break;
+ default:
+ if ((doc_matters.opt_action["debug"])) {
+ writeln(__FILE__, ":", __LINE__, ": ", obj.is_a);
+ }
+ break;
+ }
+ break;
+ case "block":
+ switch (obj.is_a) {
+ case "quote":
+ obj_txt = format_and_sqlite_load.quote(obj);
+ break;
+ case "group":
+ obj_txt = format_and_sqlite_load.group(obj);
+ break;
+ case "block":
+ obj_txt = format_and_sqlite_load.block(obj);
+ break;
+ case "poem": // double check on keeping both poem & verse
+ break;
+ case "verse":
+ obj_txt = format_and_sqlite_load.verse(obj);
+ break;
+ case "code":
+ obj_txt = format_and_sqlite_load.code(obj);
+ break;
+ case "table":
+ obj_txt = format_and_sqlite_load.table(obj);
+ break;
+ default:
+ if ((doc_matters.opt_action["debug"])) {
+ writeln(__FILE__, ":", __LINE__, ": ", obj.is_a);
+ }
+ break;
+ }
+ break;
+ default:
+ if ((doc_matters.opt_action["debug"])) {
+ writeln(__FILE__, ":", __LINE__, ": ", obj.is_of);
+ }
+ break;
+ }
+ break;
+ case "backmatter":
+ assert(part == "endnotes" || "glossary" || "bibliography" || "bookindex_scroll" || "blurb" || "tail");
+ switch (obj.is_of) {
+ case "para":
+ switch (obj.is_a) {
+ case "heading":
+ obj_txt = format_and_sqlite_load.heading(obj);
+ break;
+ case "endnote": assert(part == "endnotes");
+ obj_txt = format_and_sqlite_load.para(obj);
+ break;
+ case "glossary": assert(part == "glossary");
+ obj_txt = format_and_sqlite_load.para(obj);
+ break;
+ case "bibliography": assert(part == "bibliography");
+ obj_txt = format_and_sqlite_load.para(obj);
+ break;
+ case "bookindex": assert(part == "bookindex_scroll");
+ obj_txt = format_and_sqlite_load.para(obj);
+ break;
+ case "blurb": assert(part == "blurb");
+ obj_txt = format_and_sqlite_load.para(obj);
+ break;
+ default:
+ if ((doc_matters.opt_action["debug"])) {
+ writeln(__FILE__, ":", __LINE__, ": ", obj.is_a);
+ }
+ break;
+ }
+ break;
+ default:
+ if ((doc_matters.opt_action["debug"])) {
+ writeln(__FILE__, ":", __LINE__, ": ", obj.is_of);
+ }
+ break;
+ }
+ break;
+ case "comment":
+ break;
+ default:
+ if ((doc_matters.opt_action["debug"])) {
+ writeln(__FILE__, ":", __LINE__, ": ", obj.of_part); // check where empty value could come from
+ writeln(__FILE__, ":", __LINE__, ": ", obj.is_a);
+ writeln(__FILE__, ":", __LINE__, ": ", obj.text); // check where empty value could come from
+ }
+ break;
+ }
+ if (obj.is_a == "heading") {
+ writeln(
+ "markup: ", obj.heading_lev_markup,
+ "> ", obj.dom_markedup,
+ "; collapsed: ", obj.heading_lev_collapsed,
+ "> ", obj.dom_collapsed,
+ "; ocn: ", obj.ocn,
+ " node: ", obj.node,
+ "; parent: ", obj.parent_lev_markup,
+ "; ocn: ", obj.parent_ocn,
+ "; ",
+ );
+ }
+ insert_doc_objects.bind(":t_of", obj.is_of);
+ insert_doc_objects.bind(":t_is", obj.is_a);
+ insert_doc_objects.bind(":ocn", obj.ocn);
+ insert_doc_objects.bind(":clean", obj_txt["text"]); // consider whether book index info should be made available within clear text for search
+ insert_doc_objects.bind(":body", obj_txt["html"]);
+ // insert_doc_objects.bind(":book_idx", ""); // not needed, but, consider whether should be made available within object for clear text search
+ insert_doc_objects.bind(":lev", obj.heading_lev_markup);
+ // // insert_doc_objects.bind(":dom_markedup", ""); // should make lev sequence below obsolete
+ // // insert_doc_objects.bind(":dom_collapsed", ""); // should add info
+ // insert_doc_objects.bind(":lev0", "");
+ // insert_doc_objects.bind(":lev1", "");
+ // insert_doc_objects.bind(":lev2", "");
+ // insert_doc_objects.bind(":lev3", "");
+ // insert_doc_objects.bind(":lev4", "");
+ // insert_doc_objects.bind(":lev5", "");
+ // insert_doc_objects.bind(":lev6", "");
+ // insert_doc_objects.bind(":lev7", "");
+ // insert_doc_objects.bind(":node", "");
+ // insert_doc_objects.bind(":type", "");
+ // insert_doc_objects.bind(":parent_ocn", "");
+ // insert_doc_objects.bind(":ancestors", "");
+ // insert_doc_objects.bind(":heading_lev_markup", "");
+ // insert_doc_objects.bind(":heading_lev_collapsed", "");
+ // insert_doc_objects.bind(":parent_lev_markup", "");
+ // insert_doc_objects.bind(":heading_ancestors", "");
+ // insert_doc_objects.bind(":node", "");
+ insert_doc_objects.execute(); insert_doc_objects.reset();
+ }
+}
+#+END_SRC
diff --git a/src/sdp/output/hub.d b/src/sdp/output/hub.d
index b7f274a..d25905f 100644
--- a/src/sdp/output/hub.d
+++ b/src/sdp/output/hub.d
@@ -7,6 +7,7 @@ template outputHub() {
import sdp.output,
sdp.output.epub3,
sdp.output.html,
+ sdp.output.sqlite,
sdp.output.xmls,
sdp.output.source_sisupod,
sdp.output.create_zip_file,
@@ -74,7 +75,7 @@ template outputHub() {
}
if (doc_matters.opt_action["sqlite"]) {
if ((doc_matters.opt_action["verbose"])) { writeln("sqlite processing... "); }
- // SQLtableLoad!()(doc_abstraction, doc_matters);
+ SQLiteBuildTablesAndPopulate!()(doc_abstraction, doc_matters);
}
if (doc_matters.opt_action["postgresql"]) {
/+ mixin outputPostgreSQL; +/
@@ -86,6 +87,7 @@ template outputHubOp() {
import sdp.output,
sdp.output.epub3,
sdp.output.html,
+ sdp.output.sqlite,
sdp.output.xmls,
sdp.output.source_sisupod,
sdp.output.create_zip_file,
@@ -95,11 +97,11 @@ template outputHubOp() {
static auto rgx = Rgx();
if ((config["sqlite-create"])) {
if ((config["verbose"])) { writeln("sqlite create table... "); }
- // SQLtableCreate!()();
+ SQLiteTablesCreate!()();
}
if ((config["sqlite-drop"])) {
if ((config["verbose"])) { writeln("sqlite drop table... "); }
- // SQLtableDrop!()();
+ SQLiteTablesDrop!()();
}
}
}
diff --git a/src/sdp/output/paths_output.d b/src/sdp/output/paths_output.d
index 63360a4..5902b6b 100644
--- a/src/sdp/output/paths_output.d
+++ b/src/sdp/output/paths_output.d
@@ -289,3 +289,29 @@ template SiSUpathsEPUB() {
return _PathsStruct();
}
}
+template SiSUpathsSQLite() {
+ mixin SiSUrgxInit;
+ static auto rgx = Rgx();
+ auto SiSUpathsSQLite(Ps,Lng)(
+ Ps src_pth_info,
+ Lng lng,
+ ) {
+ auto out_pth = SiSUoutPaths!()(src_pth_info, lng);
+ string base_dir = "sqlite";
+ struct _PathsStruct {
+ string base_filename(string fn_src) {
+ return fn_src.baseName.stripExtension;
+ }
+ string base() {
+ return (out_pth.output_base).chainPath(base_dir).array;
+ }
+ string seg(string fn_src) {
+ return base.chainPath(base_filename(fn_src)).array;
+ }
+ string sqlite_file(string fn_src) {
+ return base.chainPath(base_filename(fn_src) ~ ".sqlite").array;
+ }
+ }
+ return _PathsStruct();
+ }
+}
diff --git a/src/sdp/output/sqlite.d b/src/sdp/output/sqlite.d
new file mode 100644
index 0000000..55d2573
--- /dev/null
+++ b/src/sdp/output/sqlite.d
@@ -0,0 +1,1093 @@
+module sdp.output.sqlite;
+import sdp.output;
+import
+ std.file,
+ std.conv : to;
+import sdp.output.sqlite;
+template SQLiteBuildTablesAndPopulate() {
+ void SQLiteBuildTablesAndPopulate(D,I)(
+ auto ref const D doc_abstraction,
+ auto ref I doc_matters,
+ ) {
+ import d2sqlite3;
+ import std.typecons : Nullable;
+ import sdp.output.sqlite;
+ mixin SiSUoutputRgxInit;
+ static auto rgx = Rgx();
+ template SQLiteFormatAndLoadObject() {
+ auto SQLiteFormatAndLoadObject(I)(
+ auto ref I doc_matters,
+ ) {
+ mixin SiSUoutputRgxInit;
+ struct sqlite_format_and_load_objects {
+ auto generic_munge_sanitize_text_for_search(
+ string _txt,
+ ) {
+ string _notes;
+ string _urls;
+ _txt = _txt.replaceAll(rgx.inline_fontface_clean, "");
+ if (_txt.matchFirst(rgx.inline_notes_al_gen)) {
+ foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) {
+ _notes ~= "\n" ~ m["text"];
+ }
+ _txt = _txt.replaceAll(rgx.inline_notes_al_gen, "");
+ }
+ if (_txt.matchFirst(rgx.inline_link)) {
+ foreach (m; _txt.matchAll(rgx.inline_link)) {
+ if (m["link"].match(rgx.url)) {
+ _urls ~= "\n" ~ m["link"];
+ }
+ }
+ _txt = _txt.replaceAll(rgx.inline_link_clean, "");
+ }
+ if (_notes.length > 0) {
+ _txt ~= _notes;
+ }
+ if (_urls.length > 0) {
+ _txt ~= _urls;
+ }
+ if (doc_matters.opt_action["debug"]) {
+ writeln(_txt, "\n");
+ }
+ debug(sql_text_clean) {
+ writeln(_txt);
+ }
+ return _txt;
+ }
+ auto munge_html(O)(
+ auto return ref const O obj,
+ ) {
+ string _html_special_characters(string _txt){
+ _txt = (_txt)
+ .replaceAll(rgx.xhtml_ampersand, "&")
+ .replaceAll(rgx.xhtml_quotation, """)
+ .replaceAll(rgx.xhtml_less_than, "<")
+ .replaceAll(rgx.xhtml_greater_than, ">")
+ .replaceAll(rgx.nbsp_char, " ")
+ .replaceAll(rgx.xhtml_line_break, "
");
+ return _txt;
+ }
+ string _html_font_face(string _txt){
+ _txt = (_txt)
+ .replaceAll(rgx.inline_emphasis, ("$1"))
+ .replaceAll(rgx.inline_bold, ("$1"))
+ .replaceAll(rgx.inline_underscore, ("$1"))
+ .replaceAll(rgx.inline_italics, ("$1"))
+ .replaceAll(rgx.inline_superscript, ("$1"))
+ .replaceAll(rgx.inline_subscript, ("$1"))
+ .replaceAll(rgx.inline_strike, ("$1"))
+ .replaceAll(rgx.inline_insert, ("$1"))
+ .replaceAll(rgx.inline_mono, ("$1"))
+ .replaceAll(rgx.inline_cite, ("$1"));
+ return _txt;
+ }
+ string _notes;
+ string _urls;
+ string _txt = _html_font_face(_html_special_characters(obj.text));
+ if (_txt.matchFirst(rgx.inline_notes_al_gen)) {
+ foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) {
+ _notes ~= "\n" ~ m["text"];
+ }
+ _txt = _txt.replaceAll(rgx.inline_notes_al_gen_ref, "$1 ");
+ }
+ if (_txt.matchFirst(rgx.inline_link)) {
+ foreach (m; _txt.matchAll(rgx.inline_link)) {
+ }
+ _txt = _txt.replaceAll(rgx.inline_link_clean, "");
+ }
+ if (_notes.length > 0) {
+ _txt ~= _notes;
+ }
+ if (doc_matters.opt_action["debug"]) {
+ writeln(_txt, "\n");
+ }
+ return _txt;
+ }
+ string html_special_characters(string _txt){
+ _txt = (_txt)
+ .replaceAll(rgx.xhtml_ampersand, "&")
+ .replaceAll(rgx.xhtml_quotation, """)
+ .replaceAll(rgx.xhtml_less_than, "<")
+ .replaceAll(rgx.xhtml_greater_than, ">")
+ .replaceAll(rgx.nbsp_char, " ")
+ .replaceAll(rgx.xhtml_line_break, "
");
+ return _txt;
+ }
+ string html_special_characters_code(string _txt){
+ _txt = (_txt)
+ .replaceAll(rgx.xhtml_ampersand, "&")
+ .replaceAll(rgx.xhtml_quotation, """)
+ .replaceAll(rgx.xhtml_less_than, "<")
+ .replaceAll(rgx.xhtml_greater_than, ">")
+ .replaceAll(rgx.nbsp_char, " ");
+ return _txt;
+ }
+ string html_font_face(string _txt){
+ _txt = (_txt)
+ .replaceAll(rgx.inline_emphasis, ("$1"))
+ .replaceAll(rgx.inline_bold, ("$1"))
+ .replaceAll(rgx.inline_underscore, ("$1"))
+ .replaceAll(rgx.inline_italics, ("$1"))
+ .replaceAll(rgx.inline_superscript, ("$1"))
+ .replaceAll(rgx.inline_subscript, ("$1"))
+ .replaceAll(rgx.inline_strike, ("$1"))
+ .replaceAll(rgx.inline_insert, ("$1"))
+ .replaceAll(rgx.inline_mono, ("$1"))
+ .replaceAll(rgx.inline_cite, ("$1"));
+ return _txt;
+ }
+ auto html_heading(O)(
+ auto return ref const O obj,
+ ) {
+ string _txt = munge_html(obj);
+ string o = format(q"¶
+ %s
+
¶",
+ obj.is_a,
+ _txt,
+ );
+ return o;
+ }
+ auto html_para(O)(
+ auto return ref const O obj,
+ ) {
+ string _txt = munge_html(obj);
+ _txt = (obj.bullet) ? ("● " ~ _txt) : _txt;
+ string o = format(q"¶
+ %s
+
¶",
+ obj.is_a,
+ obj.indent_hang,
+ obj.indent_base,
+ _txt
+ );
+ return o;
+ }
+ auto html_quote(O)(
+ auto return ref const O obj,
+ ) {
+ string _txt = munge_html(obj);
+ string o = format(q"¶
+ %s
+
¶",
+ obj.is_a,
+ _txt
+ );
+ return o;
+ }
+ auto html_group(O)(
+ auto return ref const O obj,
+ ) {
+ string _txt = munge_html(obj);
+ string o = format(q"¶
+ %s
+
¶",
+ obj.is_a,
+ _txt
+ );
+ return o;
+ }
+ auto html_block(O)(
+ auto return ref const O obj,
+ ) {
+ string _txt = munge_html(obj);
+ string o = format(q"¶
+ %s
¶",
+ obj.is_a,
+ _txt.stripRight
+ );
+ return o;
+ }
+ auto html_verse(O)(
+ auto return ref const O obj,
+ ) {
+ string _txt = munge_html(obj);
+ string o = format(q"¶%s
¶",
+ obj.is_a,
+ _txt
+ );
+ return o;
+ }
+ auto html_code(O)(
+ auto return ref const O obj,
+ ) {
+ string _txt = html_special_characters_code(obj.text);
+ string o = format(q"¶%s
¶",
+ obj.is_a,
+ _txt
+ );
+ return o;
+ }
+ auto html_table(O)(
+ auto return ref const O obj,
+ ) {
+ auto _tablarize(O)(
+ auto return ref const O obj,
+ string _txt,
+ ) {
+ string[] _table_rows = (_txt).split(rgx.table_delimiter_row);
+ string[] _table_cols;
+ string _table;
+ string _tablenote;
+ foreach(row_idx, row; _table_rows) {
+ _table_cols = row.split(rgx.table_delimiter_col);
+ _table ~= "";
+ foreach(col_idx, cell; _table_cols) {
+ if ((_table_cols.length == 1)
+ && (_table_rows.length <= row_idx+2)) { // check row_idx+2 (rather than == ++row_idx)
+ _tablenote ~= cell;
+ } else {
+ string _col_is = (row_idx == 0 && obj.table_heading) ? "th" : "td";
+ string _align = ("style=\"text-align:"
+ ~ ((obj.table_column_aligns[col_idx] == "l")
+ ? "left\"" : "right\""));
+ _table ~= "<" ~ _col_is ~ " width=\"" ~ obj.table_column_widths[col_idx].to!string ~ "%\" " ~ _align ~ ">";
+ _table ~= cell;
+ _table ~= "" ~ _col_is ~ ">";
+ }
+ }
+ _table ~= "
";
+ }
+ auto t = tuple(
+ _table,
+ _tablenote,
+ );
+ return t;
+ }
+ string _txt = munge_html(obj);
+ auto t = _tablarize(obj, _txt);
+ _txt = t[0];
+ string _note = t[1];
+ string o = format(q"¶
+
+ %s
+ ¶",
+ obj.is_a,
+ _txt,
+ _note
+ );
+ return o;
+ }
+ auto sqlite_load_string(O,Dm)(
+ auto return ref const O obj,
+ auto return ref Dm doc_matters,
+ ) {
+ string o;
+ return o;
+ }
+ auto postgresql_load_string(O,Dm)(
+ auto return ref const O obj,
+ auto return ref Dm doc_matters,
+ ) {
+ string o;
+ return o;
+ }
+ string sqlite_statement(O)(
+ auto return ref const O obj,
+ string _txt,
+ string _html,
+ ) {
+ void _sql_exe(O)(
+ string _sql,
+ ) {
+ writeln(_html);
+ writeln(_sql);
+ }
+ string _sql;
+ return _sql;
+ }
+ auto heading(O)(
+ auto return ref const O obj,
+ ) {
+ string[string] obj_txt = [
+ "text": generic_munge_sanitize_text_for_search(obj.text),
+ "html": html_heading(obj)
+ ];
+ if (doc_matters.opt_action["debug"]) {
+ debug(sql_txt) {
+ writeln(obj_txt["text"]);
+ }
+ debug(sql_html) {
+ writeln(obj_txt["html"]);
+ }
+ } else {
+ // load sql
+ }
+ return obj_txt;
+ }
+ auto para(O)(
+ auto return ref const O obj,
+ ) {
+ string[string] obj_txt = [
+ "text": generic_munge_sanitize_text_for_search(obj.text),
+ "html": html_para(obj)
+ ];
+ if (doc_matters.opt_action["debug"]) {
+ debug(sql_txt) {
+ writeln(obj_txt["text"]);
+ }
+ debug(sql_html) {
+ writeln(obj_txt["html"]);
+ }
+ } else {
+ // load sql
+ }
+ return obj_txt;
+ }
+ auto quote(O)(
+ auto return ref const O obj,
+ ) {
+ string[string] obj_txt = [
+ "text": generic_munge_sanitize_text_for_search(obj.text),
+ "html": html_quote(obj)
+ ];
+ if (doc_matters.opt_action["debug"]) {
+ debug(sql_txt) {
+ writeln(obj_txt["text"]);
+ }
+ debug(sql_html) {
+ writeln(obj_txt["html"]);
+ }
+ } else {
+ // load sql
+ }
+ return obj_txt;
+ }
+ auto group(O)(
+ auto return ref const O obj,
+ ) {
+ string[string] obj_txt = [
+ "text": generic_munge_sanitize_text_for_search(obj.text),
+ "html": html_group(obj)
+ ];
+ if (doc_matters.opt_action["debug"]) {
+ debug(sql_txt) {
+ writeln(obj_txt["text"]);
+ }
+ debug(sql_html) {
+ writeln(obj_txt["html"]);
+ }
+ } else {
+ // load sql
+ }
+ return obj_txt;
+ }
+ auto block(O)(
+ auto return ref const O obj,
+ ) {
+ string[string] obj_txt = [
+ "text": generic_munge_sanitize_text_for_search(obj.text),
+ "html": html_block(obj)
+ ];
+ if (doc_matters.opt_action["debug"]) {
+ debug(sql_txt) {
+ writeln(obj_txt["text"]);
+ }
+ debug(sql_html) {
+ writeln(obj_txt["html"]);
+ }
+ } else {
+ // load sql
+ }
+ return obj_txt;
+ }
+ auto verse(O)(
+ auto return ref const O obj,
+ ) {
+ string[string] obj_txt = [
+ "text": generic_munge_sanitize_text_for_search(obj.text),
+ "html": html_verse(obj)
+ ];
+ if (doc_matters.opt_action["debug"]) {
+ debug(sql_txt) {
+ writeln(obj_txt["text"]);
+ }
+ debug(sql_html) {
+ writeln(obj_txt["html"]);
+ }
+ } else {
+ // load sql
+ }
+ return obj_txt;
+ }
+ auto code(O)(
+ auto return ref const O obj,
+ ) {
+ string[string] obj_txt = [
+ "text": generic_munge_sanitize_text_for_search(obj.text),
+ "html": html_code(obj)
+ ];
+ if (doc_matters.opt_action["debug"]) {
+ debug(sql_txt) {
+ writeln(obj_txt["text"]);
+ }
+ debug(sql_html) {
+ writeln(obj_txt["html"]);
+ }
+ } else {
+ // load sql
+ }
+ return obj_txt;
+ }
+ auto table(O)(
+ auto return ref const O obj,
+ ) {
+ string[string] obj_txt = [
+ "text": generic_munge_sanitize_text_for_search(obj.text),
+ "html": html_table(obj)
+ ];
+ if (doc_matters.opt_action["debug"]) {
+ debug(sql_txt) {
+ writeln(obj_txt["text"]);
+ }
+ debug(sql_html) {
+ writeln(obj_txt["html"]);
+ }
+ } else {
+ // load sql
+ }
+ return obj_txt;
+ }
+ }
+ return sqlite_format_and_load_objects();
+ }
+ }
+ template SQLiteInstruct() {
+ Statement SQLiteInstruct(I)(
+ auto ref I doc_matters,
+ ) {
+ auto pth_sqlite = SiSUpathsSQLite!()(doc_matters.src_path_info, doc_matters.language);
+ auto db = Database(pth_sqlite.sqlite_file(doc_matters.source_filename));
+ // auto db = Database(":memory:"); // open database in memory
+ db.run("
+ DROP TABLE IF EXISTS metadata_and_text;
+ DROP TABLE IF EXISTS doc_objects;
+ DROP TABLE IF EXISTS urls;
+ CREATE TABLE metadata_and_text (
+ tid BIGINT PRIMARY KEY,
+ title VARCHAR(800) NOT NULL,
+ title_main VARCHAR(400) NULL, /*NOT*/
+ title_sub VARCHAR(400) NULL,
+ title_short VARCHAR(400) NULL,
+ title_edition VARCHAR(10) NULL,
+ title_note VARCHAR(2500) NULL,
+ title_language VARCHAR(100) NULL,
+ title_language_char VARCHAR(6) NULL,
+ creator_author VARCHAR(600) NULL,
+ creator_author_hon VARCHAR(100) NULL,
+ creator_author_nationality VARCHAR(100) NULL,
+ creator_editor VARCHAR(600) NULL,
+ creator_contributor VARCHAR(600) NULL,
+ creator_illustrator VARCHAR(600) NULL,
+ creator_photographer VARCHAR(600) NULL,
+ creator_translator VARCHAR(600) NULL,
+ creator_prepared_by VARCHAR(600) NULL,
+ creator_digitized_by VARCHAR(600) NULL,
+ creator_audio VARCHAR(600) NULL,
+ creator_video VARCHAR(600) NULL,
+ language_document VARCHAR(100) NULL,
+ language_document_char VARCHAR(6) NULL, /*NOT*/
+ language_original VARCHAR(100) NULL,
+ language_original_char VARCHAR(6) NULL,
+ date_added_to_site VARCHAR(10) NULL,
+ date_available VARCHAR(10) NULL,
+ date_created VARCHAR(10) NULL,
+ date_issued VARCHAR(10) NULL,
+ date_modified VARCHAR(10) NULL,
+ date_published VARCHAR(10) NULL,
+ date_valid VARCHAR(10) NULL,
+ date_translated VARCHAR(10) NULL,
+ date_original_publication VARCHAR(10) NULL,
+ date_generated VARCHAR(10) NULL,
+ publisher VARCHAR(600) NULL,
+ original_publisher VARCHAR(600) NULL,
+ original_language VARCHAR(100) NULL,
+ original_language_char VARCHAR(6) NULL,
+ original_source VARCHAR(600) NULL,
+ original_institution VARCHAR(600) NULL,
+ original_nationality VARCHAR(100) NULL,
+ rights VARCHAR(2500) NULL,
+ rights_copyright_text VARCHAR(2500) NULL,
+ rights_copyright_translation VARCHAR(2500) NULL,
+ rights_copyright_illustrations VARCHAR(2500) NULL,
+ rights_copyright_photographs VARCHAR(2500) NULL,
+ rights_copyright_preparation VARCHAR(2500) NULL,
+ rights_copyright_digitization VARCHAR(2500) NULL,
+ rights_copyright_audio VARCHAR(2500) NULL,
+ rights_copyright_video VARCHAR(2500) NULL,
+ rights_license VARCHAR(2500) NULL,
+ identifier_oclc VARCHAR(30) NULL,
+ identifier_isbn VARCHAR(16) NULL,
+ classify_topic_register VARCHAR(2500) NULL,
+ classify_subject VARCHAR(600) NULL,
+ classify_loc VARCHAR(30) NULL,
+ classify_dewey VARCHAR(30) NULL,
+ classify_keywords VARCHAR(600) NULL,
+ notes_abstract TEXT NULL,
+ notes_description TEXT NULL,
+ notes_comment TEXT NULL,
+ notes_coverage VARCHAR(200) NULL,
+ notes_relation VARCHAR(200) NULL,
+ notes_history VARCHAR(600) NULL,
+ notes_type VARCHAR(600) NULL,
+ notes_format VARCHAR(600) NULL,
+ notes_prefix TEXT NULL,
+ notes_prefix_a TEXT NULL,
+ notes_prefix_b TEXT NULL,
+ notes_suffix TEXT NULL,
+ src_filename VARCHAR(256) NULL,
+ src_fingerprint VARCHAR(256) NULL,
+ src_filesize VARCHAR(10) NULL,
+ src_wordcount VARCHAR(10) NULL,
+ src_text TEXT NULL,
+ fulltext TEXT NULL,
+ links TEXT NULL
+ );
+
+ CREATE TABLE doc_objects (
+ lid BIGINT PRIMARY KEY,
+ metadata_tid BIGINT REFERENCES metadata_and_text,
+ ocn SMALLINT,
+ ocnd VARCHAR(6),
+ ocns VARCHAR(6),
+ clean TEXT NULL,
+ body TEXT NULL,
+ book_idx TEXT NULL,
+ seg VARCHAR(256) NULL,
+ lev_an VARCHAR(1),
+ lev SMALLINT NULL,
+ lev0 SMALLINT,
+ lev1 SMALLINT,
+ lev2 SMALLINT,
+ lev3 SMALLINT,
+ lev4 SMALLINT,
+ lev5 SMALLINT,
+ lev6 SMALLINT,
+ lev7 SMALLINT,
+ en_a SMALLINT NULL,
+ en_z SMALLINT NULL,
+ en_a_asterisk SMALLINT NULL,
+ en_z_asterisk SMALLINT NULL,
+ en_a_plus SMALLINT NULL,
+ en_z_plus SMALLINT NULL,
+ t_of VARCHAR(16),
+ t_is VARCHAR(16),
+ node VARCHAR(16) NULL,
+ parent VARCHAR(16) NULL,
+ digest_clean CHAR(256),
+ digest_all CHAR(256),
+ types CHAR(1) NULL
+ )
+ ");
+ Statement insert_metadata = db.prepare("
+ INSERT INTO metadata_and_text (
+ tid,
+ title,
+ title_main,
+ title_sub,
+ title_short,
+ title_edition,
+ title_note,
+ title_language,
+ title_language_char,
+ creator_author,
+ creator_author_hon,
+ creator_author_nationality,
+ creator_editor,
+ creator_contributor,
+ creator_illustrator,
+ creator_photographer,
+ creator_translator,
+ creator_prepared_by,
+ creator_digitized_by,
+ creator_audio,
+ creator_video,
+ language_document,
+ language_document_char,
+ language_original,
+ language_original_char,
+ date_added_to_site,
+ date_available,
+ date_created,
+ date_issued,
+ date_modified,
+ date_published,
+ date_valid,
+ date_translated,
+ date_original_publication,
+ date_generated,
+ publisher,
+ original_publisher,
+ original_language,
+ original_language_char,
+ original_source,
+ original_institution,
+ original_nationality,
+ rights,
+ rights_copyright_text,
+ rights_copyright_translation,
+ rights_copyright_illustrations,
+ rights_copyright_photographs,
+ rights_copyright_preparation,
+ rights_copyright_digitization,
+ rights_copyright_audio,
+ rights_copyright_video,
+ rights_license,
+ identifier_oclc,
+ identifier_isbn,
+ classify_topic_register,
+ classify_subject,
+ classify_loc,
+ classify_dewey,
+ classify_keywords,
+ notes_abstract,
+ notes_description,
+ notes_comment,
+ notes_coverage,
+ notes_relation,
+ notes_history,
+ notes_type,
+ notes_format,
+ notes_prefix,
+ notes_prefix_a,
+ notes_prefix_b,
+ notes_suffix,
+ src_filename,
+ src_fingerprint,
+ src_filesize,
+ src_wordcount,
+ src_text,
+ fulltext,
+ links
+ )
+ VALUES (
+ :tid,
+ :title,
+ :title_main,
+ :title_sub,
+ :title_short,
+ :title_edition,
+ :title_note,
+ :title_language,
+ :title_language_char,
+ :creator_author,
+ :creator_author_hon,
+ :creator_author_nationality,
+ :creator_editor,
+ :creator_contributor,
+ :creator_illustrator,
+ :creator_photographer,
+ :creator_translator,
+ :creator_prepared_by,
+ :creator_digitized_by,
+ :creator_audio,
+ :creator_video,
+ :language_document,
+ :language_document_char,
+ :language_original,
+ :language_original_char,
+ :date_added_to_site,
+ :date_available,
+ :date_created,
+ :date_issued,
+ :date_modified,
+ :date_published,
+ :date_valid,
+ :date_translated,
+ :date_original_publication,
+ :date_generated,
+ :publisher,
+ :original_publisher,
+ :original_language,
+ :original_language_char,
+ :original_source,
+ :original_institution,
+ :original_nationality,
+ :rights,
+ :rights_copyright_text,
+ :rights_copyright_translation,
+ :rights_copyright_illustrations,
+ :rights_copyright_photographs,
+ :rights_copyright_preparation,
+ :rights_copyright_digitization,
+ :rights_copyright_audio,
+ :rights_copyright_video,
+ :rights_license,
+ :identifier_oclc,
+ :identifier_isbn,
+ :classify_topic_register,
+ :classify_subject,
+ :classify_loc,
+ :classify_dewey,
+ :classify_keywords,
+ :notes_abstract,
+ :notes_description,
+ :notes_comment,
+ :notes_coverage,
+ :notes_relation,
+ :notes_history,
+ :notes_type,
+ :notes_format,
+ :notes_prefix,
+ :notes_prefix_a,
+ :notes_prefix_b,
+ :notes_suffix,
+ :src_filename,
+ :src_fingerprint,
+ :src_filesize,
+ :src_wordcount,
+ :src_text,
+ :fulltext,
+ :links
+ )
+ ");
+ insert_metadata.bind(":title", doc_matters.dochead_meta["title"]["full"]);
+ // insert_metadata.bind(":title_main", doc_matters.dochead_meta["title"]["full"]);
+ // insert_metadata.bind(":title_sub", doc_matters.dochead_meta["title"][""]);
+ // insert_metadata.bind(":title_short", doc_matters.dochead_meta["title"][""]);
+ // insert_metadata.bind(":title_edition", doc_matters.dochead_meta["title"][""]);
+ // insert_metadata.bind(":title_note", doc_matters.dochead_meta["title"][""]);
+ // insert_metadata.bind(":title_language", doc_matters.dochead_meta["title"][""]);
+ // insert_metadata.bind(":title_language_char", doc_matters.dochead_meta["title"][""]);
+ insert_metadata.bind(":creator_author", doc_matters.dochead_meta["creator"]["author"]);
+ // insert_metadata.bind(":creator_author_hon", doc_matters.dochead_meta["creator"][""]);
+ // insert_metadata.bind(":creator_author_nationality", doc_matters.dochead_meta["creator"][""]);
+ // insert_metadata.bind(":creator_editor", doc_matters.dochead_meta["creator"][""]);
+ // insert_metadata.bind(":creator_contributor", doc_matters.dochead_meta["creator"][""]);
+ // insert_metadata.bind(":creator_illustrator", doc_matters.dochead_meta["creator"][""]);
+ // insert_metadata.bind(":creator_photographer", doc_matters.dochead_meta["creator"][""]);
+ // insert_metadata.bind(":creator_translator", doc_matters.dochead_meta["creator"][""]);
+ // insert_metadata.bind(":creator_prepared_by", doc_matters.dochead_meta["creator"][""]);
+ // insert_metadata.bind(":creator_digitized_by", doc_matters.dochead_meta["creator"][""]);
+ // insert_metadata.bind(":creator_audio", doc_matters.dochead_meta["creator"][""]);
+ // insert_metadata.bind(":creator_video", doc_matters.dochead_meta["creator"][""]);
+ // insert_metadata.bind(":language_document", doc_matters.dochead_meta["language"][""]);
+ // insert_metadata.bind(":language_document_char", doc_matters.dochead_meta["language"][""]);
+ // insert_metadata.bind(":language_original", doc_matters.dochead_meta["language"][""]);
+ // insert_metadata.bind(":language_original_char", doc_matters.dochead_meta["language"][""]);
+ // insert_metadata.bind(":date_added_to_site", doc_matters.dochead_meta["date"][""]);
+ // insert_metadata.bind(":date_available", doc_matters.dochead_meta["date"][""]);
+ // insert_metadata.bind(":date_created", doc_matters.dochead_meta["date"][""]);
+ // insert_metadata.bind(":date_issued", doc_matters.dochead_meta["date"][""]);
+ // insert_metadata.bind(":date_modified", doc_matters.dochead_meta["date"][""]);
+ // insert_metadata.bind(":date_published", doc_matters.dochead_meta["date"][""]);
+ // insert_metadata.bind(":date_valid", doc_matters.dochead_meta["date"][""]);
+ // insert_metadata.bind(":date_translated", doc_matters.dochead_meta["date"][""]);
+ // insert_metadata.bind(":date_original_publication", doc_matters.dochead_meta["date"][""]);
+ // insert_metadata.bind(":date_generated", doc_matters.dochead_meta["date"][""]);
+ // insert_metadata.bind(":publisher", doc_matters.dochead_meta["publisher"][""]);
+ // insert_metadata.bind(":original_publisher", doc_matters.dochead_meta["original"][""]);
+ // insert_metadata.bind(":original_language", doc_matters.dochead_meta["original"][""]);
+ // insert_metadata.bind(":original_language_char", doc_matters.dochead_meta["original"][""]);
+ // insert_metadata.bind(":original_source", doc_matters.dochead_meta["original"][""]);
+ // insert_metadata.bind(":original_institution", doc_matters.dochead_meta["original"][""]);
+ // insert_metadata.bind(":original_nationality", doc_matters.dochead_meta["original"][""]);
+ // insert_metadata.bind(":rights", doc_matters.dochead_meta["rights"][""]);
+ // insert_metadata.bind(":rights_copyright_text", doc_matters.dochead_meta["rights"][""]);
+ // insert_metadata.bind(":rights_copyright_translation", doc_matters.dochead_meta["rights"][""]);
+ // insert_metadata.bind(":rights_copyright_illustrations", doc_matters.dochead_meta["rights"][""]);
+ // insert_metadata.bind(":rights_copyright_photographs", doc_matters.dochead_meta["rights"][""]);
+ // insert_metadata.bind(":rights_copyright_preparation", doc_matters.dochead_meta["rights"][""]);
+ // insert_metadata.bind(":rights_copyright_digitization", doc_matters.dochead_meta["rights"][""]);
+ // insert_metadata.bind(":rights_copyright_audio", doc_matters.dochead_meta["rights"][""]);
+ // insert_metadata.bind(":rights_copyright_video", doc_matters.dochead_meta["rights"][""]);
+ // insert_metadata.bind(":rights_license", doc_matters.dochead_meta["rights"][""]);
+ // insert_metadata.bind(":identifier_oclc", doc_matters.dochead_meta["identifier"][""]);
+ // insert_metadata.bind(":identifier_isbn", doc_matters.dochead_meta["identifier"][""]);
+ // insert_metadata.bind(":classify_topic_register", doc_matters.dochead_meta["classify"][""]);
+ // insert_metadata.bind(":classify_subject", doc_matters.dochead_meta["classify"][""]);
+ // insert_metadata.bind(":classify_loc", doc_matters.dochead_meta["classify"][""]);
+ // insert_metadata.bind(":classify_loc", doc_matters.dochead_meta["classify"][""]);
+ // insert_metadata.bind(":notes_abstract", doc_matters.dochead_meta["notes"][""]);
+ // insert_metadata.bind(":notes_description", doc_matters.dochead_meta["notes"][""]);
+ // insert_metadata.bind(":notes_comment", doc_matters.dochead_meta["notes"][""]);
+ // insert_metadata.bind(":notes_coverage", doc_matters.dochead_meta["notes"][""]);
+ // insert_metadata.bind(":notes_relation", doc_matters.dochead_meta["notes"][""]);
+ // insert_metadata.bind(":notes_history", doc_matters.dochead_meta["notes"][""]);
+ // insert_metadata.bind(":notes_type", doc_matters.dochead_meta["notes"][""]);
+ // insert_metadata.bind(":notes_format", doc_matters.dochead_meta["notes"][""]);
+ // insert_metadata.bind(":notes_prefix", doc_matters.dochead_meta["notes"][""]);
+ // insert_metadata.bind(":notes_prefix_a", doc_matters.dochead_meta["notes"][""]);
+ // insert_metadata.bind(":notes_prefix_b", doc_matters.dochead_meta["notes"][""]);
+ // insert_metadata.bind(":notes_suffix", doc_matters.dochead_meta["notes"][""]);
+ // insert_metadata.bind(":src_filename", doc_matters.dochead_meta["src"][""]);
+ // insert_metadata.bind(":src_fingerprint", doc_matters.dochead_meta["src"][""]);
+ // insert_metadata.bind(":src_filesize", doc_matters.dochead_meta["src"][""]);
+ // insert_metadata.bind(":src_wordcount", doc_matters.dochead_meta["src"][""]);
+ // insert_metadata.bind(":src_text", doc_matters.dochead_meta["src"][""]);
+ // insert_metadata.bind(":fulltext", doc_matters.dochead_meta["fulltext"][""]);
+ // insert_metadata.bind(":links", doc_matters.dochead_meta["links"][""]);
+ insert_metadata.execute(); insert_metadata.reset();
+ /+ watch +/
+ writeln("sql statement executed");
+ assert(db.totalChanges == 1);
+
+ Statement insert_doc_objects = db.prepare("
+ INSERT INTO doc_objects (
+ lid,
+ metadata_tid,
+ ocn,
+ ocnd,
+ ocns,
+ clean,
+ body,
+ book_idx,
+ seg,
+ lev_an,
+ lev,
+ lev0,
+ lev1,
+ lev2,
+ lev3,
+ lev4,
+ lev5,
+ lev6,
+ lev7,
+ en_a,
+ en_z,
+ en_a_asterisk,
+ en_z_asterisk,
+ en_a_plus,
+ en_z_plus,
+ t_of,
+ t_is,
+ node,
+ parent,
+ digest_clean,
+ digest_all,
+ types
+ )
+ VALUES (
+ :lid,
+ :metadata_tid,
+ :ocn,
+ :ocnd,
+ :ocns,
+ :clean,
+ :body,
+ :book_idx,
+ :seg,
+ :lev_an,
+ :lev,
+ :lev0,
+ :lev1,
+ :lev2,
+ :lev3,
+ :lev4,
+ :lev5,
+ :lev6,
+ :lev7,
+ :en_a,
+ :en_z,
+ :en_a_asterisk,
+ :en_z_asterisk,
+ :en_a_plus,
+ :en_z_plus,
+ :t_of,
+ :t_is,
+ :node,
+ :parent,
+ :digest_clean,
+ :digest_all,
+ :types
+ )
+ ");
+ return insert_doc_objects;
+ }
+ }
+ template SQLiteObjectsLoop() {
+ void SQLiteObjectsLoop(P)(
+ auto ref P doc_parts,
+ ) {
+ Statement insert_doc_objects = SQLiteInstruct!()(doc_matters);
+ auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters);
+ string[string] obj_txt;
+ string doc_text;
+ foreach (part; doc_parts) {
+ foreach (obj; doc_abstraction[part]) {
+ switch (obj.of_part) {
+ case "frontmatter": assert(part == "head");
+ switch (obj.is_of) {
+ case "para":
+ switch (obj.is_a) {
+ case "heading":
+ obj_txt = format_and_sqlite_load.heading(obj);
+ break;
+ default:
+ if ((doc_matters.opt_action["debug"])) {
+ writeln(__FILE__, ":", __LINE__, ": ", obj.is_a);
+ }
+ break;
+ }
+ break;
+ default:
+ if ((doc_matters.opt_action["debug"])) {
+ writeln(__FILE__, ":", __LINE__, ": ", obj.is_of);
+ }
+ break;
+ }
+ break;
+ case "body": // assert(part == "body"); // TODO broken
+ switch (obj.is_of) {
+ case "para":
+ switch (obj.is_a) {
+ case "heading":
+ debug (asserts) { // TODO consider and fix or remove
+ if (part != "body") {
+ writeln(__LINE__, ": ", obj.text);
+ }
+ }
+ obj_txt = format_and_sqlite_load.heading(obj);
+ break;
+ case "para":
+ obj_txt = format_and_sqlite_load.para(obj);
+ break;
+ default:
+ if ((doc_matters.opt_action["debug"])) {
+ writeln(__FILE__, ":", __LINE__, ": ", obj.is_a);
+ }
+ break;
+ }
+ break;
+ case "block":
+ switch (obj.is_a) {
+ case "quote":
+ obj_txt = format_and_sqlite_load.quote(obj);
+ break;
+ case "group":
+ obj_txt = format_and_sqlite_load.group(obj);
+ break;
+ case "block":
+ obj_txt = format_and_sqlite_load.block(obj);
+ break;
+ case "poem": // double check on keeping both poem & verse
+ break;
+ case "verse":
+ obj_txt = format_and_sqlite_load.verse(obj);
+ break;
+ case "code":
+ obj_txt = format_and_sqlite_load.code(obj);
+ break;
+ case "table":
+ obj_txt = format_and_sqlite_load.table(obj);
+ break;
+ default:
+ if ((doc_matters.opt_action["debug"])) {
+ writeln(__FILE__, ":", __LINE__, ": ", obj.is_a);
+ }
+ break;
+ }
+ break;
+ default:
+ if ((doc_matters.opt_action["debug"])) {
+ writeln(__FILE__, ":", __LINE__, ": ", obj.is_of);
+ }
+ break;
+ }
+ break;
+ case "backmatter":
+ assert(part == "endnotes" || "glossary" || "bibliography" || "bookindex_scroll" || "blurb" || "tail");
+ switch (obj.is_of) {
+ case "para":
+ switch (obj.is_a) {
+ case "heading":
+ obj_txt = format_and_sqlite_load.heading(obj);
+ break;
+ case "endnote": assert(part == "endnotes");
+ obj_txt = format_and_sqlite_load.para(obj);
+ break;
+ case "glossary": assert(part == "glossary");
+ obj_txt = format_and_sqlite_load.para(obj);
+ break;
+ case "bibliography": assert(part == "bibliography");
+ obj_txt = format_and_sqlite_load.para(obj);
+ break;
+ case "bookindex": assert(part == "bookindex_scroll");
+ obj_txt = format_and_sqlite_load.para(obj);
+ break;
+ case "blurb": assert(part == "blurb");
+ obj_txt = format_and_sqlite_load.para(obj);
+ break;
+ default:
+ if ((doc_matters.opt_action["debug"])) {
+ writeln(__FILE__, ":", __LINE__, ": ", obj.is_a);
+ }
+ break;
+ }
+ break;
+ default:
+ if ((doc_matters.opt_action["debug"])) {
+ writeln(__FILE__, ":", __LINE__, ": ", obj.is_of);
+ }
+ break;
+ }
+ break;
+ case "comment":
+ break;
+ default:
+ if ((doc_matters.opt_action["debug"])) {
+ writeln(__FILE__, ":", __LINE__, ": ", obj.of_part); // check where empty value could come from
+ writeln(__FILE__, ":", __LINE__, ": ", obj.is_a);
+ writeln(__FILE__, ":", __LINE__, ": ", obj.text); // check where empty value could come from
+ }
+ break;
+ }
+ if (obj.is_a == "heading") {
+ writeln(
+ "markup: ", obj.heading_lev_markup,
+ "> ", obj.dom_markedup,
+ "; collapsed: ", obj.heading_lev_collapsed,
+ "> ", obj.dom_collapsed,
+ "; ocn: ", obj.ocn,
+ " node: ", obj.node,
+ "; parent: ", obj.parent_lev_markup,
+ "; ocn: ", obj.parent_ocn,
+ "; ",
+ );
+ }
+ insert_doc_objects.bind(":t_of", obj.is_of);
+ insert_doc_objects.bind(":t_is", obj.is_a);
+ insert_doc_objects.bind(":ocn", obj.ocn);
+ insert_doc_objects.bind(":clean", obj_txt["text"]); // consider whether book index info should be made available within clear text for search
+ insert_doc_objects.bind(":body", obj_txt["html"]);
+ // insert_doc_objects.bind(":book_idx", ""); // not needed, but, consider whether should be made available within object for clear text search
+ insert_doc_objects.bind(":lev", obj.heading_lev_markup);
+ // // insert_doc_objects.bind(":dom_markedup", ""); // should make lev sequence below obsolete
+ // // insert_doc_objects.bind(":dom_collapsed", ""); // should add info
+ // insert_doc_objects.bind(":lev0", "");
+ // insert_doc_objects.bind(":lev1", "");
+ // insert_doc_objects.bind(":lev2", "");
+ // insert_doc_objects.bind(":lev3", "");
+ // insert_doc_objects.bind(":lev4", "");
+ // insert_doc_objects.bind(":lev5", "");
+ // insert_doc_objects.bind(":lev6", "");
+ // insert_doc_objects.bind(":lev7", "");
+ // insert_doc_objects.bind(":node", "");
+ // insert_doc_objects.bind(":type", "");
+ // insert_doc_objects.bind(":parent_ocn", "");
+ // insert_doc_objects.bind(":ancestors", "");
+ // insert_doc_objects.bind(":heading_lev_markup", "");
+ // insert_doc_objects.bind(":heading_lev_collapsed", "");
+ // insert_doc_objects.bind(":parent_lev_markup", "");
+ // insert_doc_objects.bind(":heading_ancestors", "");
+ // insert_doc_objects.bind(":node", "");
+ insert_doc_objects.execute(); insert_doc_objects.reset();
+ }
+ }
+ }
+ }
+ SQLiteObjectsLoop!()(doc_matters.keys_seq.sql);
+ }
+}
+template SQLiteTablesCreate() {
+ void SQLiteTablesCreate()() {
+ writeln("table create");
+ }
+}
+template SQLiteTablesDrop() {
+ void SQLiteTablesDrop()() {
+ writeln("table drop");
+ }
+}
--
cgit v1.2.3