-*- mode: org -*- #+TITLE: spine (doc_reform) output sqlite #+DESCRIPTION: documents - structuring, publishing in multiple formats & search #+FILETAGS: :spine:output:db:sql:sqlite: #+AUTHOR: Ralph Amissah #+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] #+COPYRIGHT: Copyright (C) 2015 - 2020 Ralph Amissah #+LANGUAGE: en #+STARTUP: content hideblocks hidestars noindent entitiespretty #+PROPERTY: header-args :exports code #+PROPERTY: header-args+ :noweb yes #+PROPERTY: header-args+ :eval no #+PROPERTY: header-args+ :results no #+PROPERTY: header-args+ :cache no #+PROPERTY: header-args+ :padline no - [[./spine.org][spine]] [[./][org/]] - [[./output_hub.org][output_hub]] sudo chown ralph:ralph /var/www - create db ~dr/bin/spine-ldc -v \ --sqlite-db-create --sqlite-db-filename="spine.search.db" \ --output=/var/www/html \ ~grotto/repo/git.repo/code/project-spine/doc-reform-markup/markup_samples/markup/pod/* - update db ~dr/bin/spine-ldc -v \ --sqlite-update --sqlite-db-filename="spine.search.db" \ --output=/var/www/html \ ~grotto/repo/git.repo/code/project-spine/doc-reform-markup/markup_samples/markup/pod/* - produce html (and some other) output files ~dr/bin/spine-ldc -v --html --epub --latex --odt \ --output=/var/www \ ~grotto/repo/git.repo/code/project-spine/doc-reform-markup/markup_samples/markup/pod/* * sql ** _module, templates_ :module: *** template #+BEGIN_SRC d :tangle "../src/doc_reform/io_out/sqlite.d" module doc_reform.io_out.sqlite; <<sqlite_imports>> <<sqlite_common>> <<sqlite_collection>> <<sqlite_discrete>> <<sqlite_run>> <<sqlite_munge>> <<sqlite_create>> <<sqlite_delete>> <<sqlite_insert>> <<sqlite_insert_doc_objects_loop>> <<sqlite_tables_create>> <<sqlite_tables_drop>> #+END_SRC *** hub **** common #+NAME: sqlite_common #+BEGIN_SRC d mixin spineRgxOut; mixin InternalMarkup; static auto rgx = RgxO(); static auto mkup = InlineMarkup(); long _metadata_tid_lastrowid; #+END_SRC **** collection #+NAME: sqlite_collection #+BEGIN_SRC d template SQLiteHubBuildTablesAndPopulate() { void SQLiteHubBuildTablesAndPopulate(D,M)( const D doc_abstraction, M doc_matters, ) { auto pth_sqlite = spinePathsSQLite!()(doc_matters.sqlite.filename, doc_matters.output_path); pth_sqlite.base.mkdirRecurse; auto db = Database(pth_sqlite.sqlite_file); template SQLiteDbStatementComposite() { void SQLiteDbStatementComposite(Db,D,M)( Db db, const D doc_abstraction, M doc_matters, ) { <<sqlite_db_statement_composite_collection>> if (!(doc_matters.opt.action.quiet)) { writeln(" ", pth_sqlite.sqlite_file); } } } SQLiteDbStatementComposite!()(db, doc_abstraction, doc_matters); } } #+END_SRC **** discrete #+NAME: sqlite_discrete #+BEGIN_SRC d template SQLiteHubDiscreteBuildTablesAndPopulate() { void SQLiteHubDiscreteBuildTablesAndPopulate(D,M)( const D doc_abstraction, M doc_matters, ) { auto url_html = spineUrlsHTML!()(doc_matters.conf_make_meta.conf.w_srv_data_root_url_html, doc_matters.src.language); auto pth_sqlite = spinePathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language); pth_sqlite.base.mkdirRecurse; auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename)); template SQLiteDiscreteDbStatementComposite() { void SQLiteDiscreteDbStatementComposite(Db,D,M)( Db db, const D doc_abstraction, M doc_matters, ) { <<sqlite_db_statement_composite_discrete>> if (!(doc_matters.opt.action.quiet)) { writeln(" ", pth_sqlite.sqlite_file(doc_matters.src.filename)); } } } SQLiteDiscreteDbStatementComposite!()(db, doc_abstraction, doc_matters); } } #+END_SRC *** db run #+NAME: sqlite_run #+BEGIN_SRC d template SQLiteDbRun() { void SQLiteDbRun(Db,St,O)( Db db, St db_statement, O opt_action, string note, ) { debug(sql_statement) { writeln(db_statement); } try { db.run( "\nBEGIN;\n" ~ db_statement ~ "\nCOMMIT;\n" ); } catch (ErrnoException ex) { writeln("ERROR SQLite : ", ex); } catch (Exception ex) { writeln("ERROR SQLite : ", ex); } { /+ debug +/ if (opt_action.debug_do && opt_action.verbose) { writeln(note); if (opt_action.very_verbose) { writeln(db_statement); } } } } } #+END_SRC *** munge #+NAME: sqlite_munge #+BEGIN_SRC d template SQLinsertDelimiter() { string SQLinsertDelimiter(string _txt) { _txt = _txt .replaceAll(rgx.quotation_mark_sql_insert_delimiter, "$0$0"); return _txt; } } template SQLiteFormatAndLoadObject() { auto SQLiteFormatAndLoadObject(M)( M doc_matters, ) { mixin spineRgxOut; struct sqlite_format_and_load_objects { <<sanitize_text_for_search>> <<sanitize_and_munge_inline_html>> <<html_objects>> <<sqlite_load_object>> <<hub_format_and_sqlite_load_objects>> } return sqlite_format_and_load_objects(); } } #+END_SRC *** sqlite instructions **** create #+NAME: sqlite_create #+BEGIN_SRC d template SQLiteTablesReCreate() { string SQLiteTablesReCreate()() { string _sql_instruct; _sql_instruct = format(q"┃ <<sqlite_statement_drop_existing_index>> <<sqlite_statement_drop_existing_tables>> <<sqlite_statement_create_table_metadata_and_src_txt>> <<sqlite_statement_create_table_topic_register>> <<sqlite_statement_create_table_objects>> <<sqlite_statement_create_index>> ┃",); return _sql_instruct; } } #+END_SRC **** delete #+NAME: sqlite_delete #+BEGIN_SRC d template SQLiteDeleteDocument() { string SQLiteDeleteDocument(M)( M doc_matters, ) { <<sqlite_formatted_delete>> return _delete_uid; } } #+END_SRC **** insert #+NAME: sqlite_insert #+BEGIN_SRC d template SQLiteInsertMetadata() { string SQLiteInsertMetadata(M)( M doc_matters, ) { <<sqlite_formatted_insertions_doc_matters_metadata>> <<sqlite_formatted_insertions_topic_register>> return _insert_metadata; } } #+END_SRC **** insert doc objects loop #+NAME: sqlite_insert_doc_objects_loop #+BEGIN_SRC d template SQLiteInsertDocObjectsLoop() { string SQLiteInsertDocObjectsLoop(D,M)( const D doc_abstraction, M doc_matters, ) { string _uid = SQLinsertDelimiter!()(doc_matters.src.doc_uid); auto url_html = spineUrlsHTML!()(doc_matters.conf_make_meta.conf.w_srv_data_root_url_html, doc_matters.src.language); string insertDocObjectsRow(O)(O obj) { <<sqlite_formatted_insertions_doc_objects>> return _insert_doc_objects_row; } <<sqlite_objects_loop>> } } #+END_SRC **** tables create #+NAME: sqlite_tables_create #+BEGIN_SRC d template SQLiteTablesCreate() { void SQLiteTablesCreate(E,O,C)(E env, O opt_action, C config) { import d2sqlite3; template SQLiteTablesReCreate() { string SQLiteTablesReCreate()() { string _sql_instruct; _sql_instruct = format(q"┃ <<sqlite_statement_drop_existing_index>> <<sqlite_statement_drop_existing_tables>> <<sqlite_statement_create_table_metadata_and_src_txt>> <<sqlite_statement_create_table_topic_register>> <<sqlite_statement_create_table_objects>> <<sqlite_statement_create_index>> ┃",); return _sql_instruct; } } if (opt_action.sqlite_db_create) { string _db_statement; string db_filename = (opt_action.sqlite_filename.length > 0) ? opt_action.sqlite_filename : (config.conf.w_srv_db_sqlite.length > 0) ? config.conf.w_srv_db_sqlite : ""; string output_path = (opt_action.output_dir_set.length > 0) ? opt_action.output_dir_set : (config.conf.output_path.length > 0) ? config.conf.output_path : ""; if (db_filename.length > 0 && output_path.length > 0) { if ((opt_action.verbose)) { writeln("db name & path: ", config.conf.output_path, "/sqlite/", db_filename); } auto pth_sqlite = spinePathsSQLite!()(db_filename, output_path); pth_sqlite.base.mkdirRecurse; auto db = Database(pth_sqlite.sqlite_file); { _db_statement ~= SQLiteTablesReCreate!()(); } SQLiteDbRun!()(db, _db_statement, opt_action, "TABLE RE-CREATE"); } else { writeln("must provide db name & output root path either on the command line or in configuration file"); writeln("db name: ", db_filename); writeln("output root path (path less /sqlite which is added: ", config.conf.output_path); } } } } #+END_SRC **** tables drop #+NAME: sqlite_tables_drop #+BEGIN_SRC d template SQLiteDbDrop() { void SQLiteDbDrop(O,C)(O opt_action, C config) { writeln("db drop"); if ((opt_action.sqlite_db_drop)) { string db_filename = (opt_action.sqlite_filename.length > 0) ? opt_action.sqlite_filename : (config.conf.w_srv_db_sqlite.length > 0) ? config.conf.w_srv_db_sqlite : ""; string output_path = (opt_action.output_dir_set.length > 0) ? opt_action.output_dir_set : (config.conf.output_path.length > 0) ? config.conf.output_path : ""; if (db_filename.length > 0 && output_path.length > 0) { auto pth_sqlite = spinePathsSQLite!()(db_filename, output_path); writeln("remove(", pth_sqlite.sqlite_file, ")"); try { remove(pth_sqlite.sqlite_file); } catch (FileException ex) { // handle error } } else { writeln("must provide db name & output root path either on the command line or in configuration file"); writeln("db name: ", db_filename); writeln("output root path (path less /sqlite which is added: ", config.conf.output_path); } } } } #+END_SRC ** 1. [#A] sqlite_db_statement :statement: *** collection #+NAME: sqlite_db_statement_composite_collection #+BEGIN_SRC d { { string _db_statement; if ((doc_matters.opt.action.sqlite_db_create)) { auto pth_sqlite = spinePathsSQLite!()(doc_matters.sqlite.filename, doc_matters.output_path); pth_sqlite.base.mkdirRecurse; _db_statement ~= SQLiteTablesReCreate!()(); SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "TABLE RE-CREATE"); } if (doc_matters.opt.action.sqlite_delete) { _db_statement ~= SQLiteDeleteDocument!()(doc_matters); SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "DELETE Document"); } if (doc_matters.opt.action.sqlite_update) { _db_statement ~= SQLiteDeleteDocument!()(doc_matters); SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "DELETE Document"); _db_statement ~= SQLiteInsertMetadata!()(doc_matters); SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "INSERT MetaData"); /+ get tid (lastrowid or max) for use in doc_objects table +/ _db_statement ~= doc_abstraction.SQLiteInsertDocObjectsLoop!()(doc_matters); SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "INSERT DocObjects"); } } db.close; } #+END_SRC *** discrete #+NAME: sqlite_db_statement_composite_discrete #+BEGIN_SRC d { { string _db_statement; _db_statement ~= SQLiteTablesReCreate!()(); _db_statement ~= SQLiteInsertMetadata!()(doc_matters); _db_statement ~= doc_abstraction.SQLiteInsertDocObjectsLoop!()(doc_matters); SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "table CREATE Tables, INSERT DocObjects"); } db.close; } #+END_SRC ** 2. imports #+NAME: sqlite_imports #+BEGIN_SRC d import doc_reform.io_out; import std.file, std.uri; import std.conv : to; import std.typecons : Nullable; import d2sqlite3; #+END_SRC ** 3. format and load template *** 1. prepare objects (munge, sanitize, markup) **** 1. _text_ generic munge (sanitize text for search) - [3/4] (search text, applies to all but code blocks) - [ ] remove whitespace, paragraph on single line (formatting kept so far) - [X] remove font face attributes - [X] move embedded endnotes - [X] place after text object - [X] remove embedded endnote numbers (rely on html output to represent) - [X] urls - [X] clean url markers, leave plain link text - [X] place urls after text object and its endnotes #+NAME: sanitize_text_for_search #+BEGIN_SRC d string generic_munge_sanitize_text_for_search( string _txt, ) { string _notes; string _urls; if (_txt.matchFirst(rgx.inline_notes_al_gen)) { foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { _notes ~= "\n" ~ m["text"]; } _txt = _txt.replaceAll(rgx.inline_notes_al_gen, ""); } if (_txt.matchFirst(rgx.inline_link)) { foreach (m; _txt.matchAll(rgx.inline_link)) { if (m["link"].match(rgx.url)) { _urls ~= "\n" ~ m["link"]; } } _txt = _txt.replaceAll(rgx.inline_link_clean, ""); } if (_notes.length > 0) { _txt ~= _notes; } if (_urls.length > 0) { _txt ~= _urls; } { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { writeln(_txt, "\n"); } } debug(sql_text_clean) { writeln(_txt); } return _txt; } #+END_SRC **** 2. _html_ light html objects - place endnotes after text object - light inline html markup ***** munge ****** general munge (special characters, inline markup, move notes) #+NAME: sanitize_and_munge_inline_html #+BEGIN_SRC d string munge_html(M,O)( M doc_matters, const O obj, ) { string _html_special_characters(string _txt){ _txt = _txt .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") .replaceAll(rgx.xhtml_greater_than, ">") .replaceAll(rgx.nbsp_char, " ") .replaceAll(rgx.xhtml_line_break, "<br />"); return _txt; } string _html_font_face(string _txt){ _txt = _txt .replaceAll(rgx.inline_emphasis, "<em>$1</em>") .replaceAll(rgx.inline_bold, "<b>$1</b>") .replaceAll(rgx.inline_underscore, "<u>$1</u>") .replaceAll(rgx.inline_italics, "<i>$1</i>") .replaceAll(rgx.inline_superscript, "<sup>$1</sup>") .replaceAll(rgx.inline_subscript, "<sub>$1</sub>") .replaceAll(rgx.inline_strike, "<del>$1</del>") .replaceAll(rgx.inline_insert, "<ins>$1</ins>") .replaceAll(rgx.inline_mono, "<tt>$1</tt>") .replaceAll(rgx.inline_cite, "<cite>$1</cite>"); return _txt; } string _notes; string _urls; string _txt = _html_font_face(_html_special_characters(obj.text)); if (_txt.matchFirst(rgx.inline_notes_al_gen)) { foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { _notes ~= "\n" ~ m["text"]; } _txt = _txt.replaceAll(rgx.inline_notes_al_gen_ref, "<sup>$1</sup> "); } if (_notes.length > 0) { _txt ~= _notes; } { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { writeln(_txt, "\n"); } } return _txt; } #+END_SRC ****** special characters #+NAME: sanitize_and_munge_inline_html #+BEGIN_SRC d string html_special_characters(string _txt){ _txt = _txt .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") .replaceAll(rgx.xhtml_greater_than, ">") .replaceAll(rgx.nbsp_char, " ") .replaceAll(rgx.xhtml_line_break, "<br />"); return _txt; } #+END_SRC ****** special characters for code #+NAME: sanitize_and_munge_inline_html #+BEGIN_SRC d string html_special_characters_code(string _txt){ _txt = _txt .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") .replaceAll(rgx.xhtml_greater_than, ">") .replaceAll(rgx.nbsp_char, " "); return _txt; } #+END_SRC ****** font_face #+NAME: sanitize_and_munge_inline_html #+BEGIN_SRC d string html_font_face(string _txt){ _txt = _txt .replaceAll(rgx.inline_emphasis, "<em>$1</em>") .replaceAll(rgx.inline_bold, "<b>$1</b>") .replaceAll(rgx.inline_underscore, "<u>$1</u>") .replaceAll(rgx.inline_italics, "<i>$1</i>") .replaceAll(rgx.inline_superscript, "<sup>$1</sup>") .replaceAll(rgx.inline_subscript, "<sub>$1</sub>") .replaceAll(rgx.inline_strike, "<del>$1</del>") .replaceAll(rgx.inline_insert, "<ins>$1</ins>") .replaceAll(rgx.inline_mono, "<tt>$1</tt>") .replaceAll(rgx.inline_cite, "<cite>$1</cite>"); return _txt; } #+END_SRC ****** inline markup ******* images #+NAME: sanitize_and_munge_inline_html #+BEGIN_SRC d string inline_images(M,O)( M doc_matters, const O obj, string _txt, string _suffix = ".html", string _xml_type = "seg", ) { string _img_pth; if (_xml_type == "epub") { _img_pth = "image/"; } else if (_xml_type == "scroll") { _img_pth = "../../image/"; } else if (_xml_type == "seg") { _img_pth = "../../../image/"; } if (_txt.match(rgx.inline_image)) { _txt = _txt.replaceAll( // TODO bug where image dimensions (w or h) not given & consequently set to 0; should not be used (calculate earlier, abstraction) rgx.inline_image, ("$1<img src=\"" ~ _img_pth ~ "$3\" width=\"$4\" height=\"$5\" naturalsizeflag=\"0\" align=\"bottom\" border=\"0\" /> $6")); } return _txt; } #+END_SRC ******* links ******** scroll, seg, epub #+NAME: sanitize_and_munge_inline_html #+BEGIN_SRC d string inline_links(M,O)( M doc_matters, const O obj, string _txt, string _xml_type = "seg", ) { if (obj.has.inline_links) { if (obj.metainfo.is_a != "code") { _txt = replaceAll!(m => m["linked_text"] ~ "┤" ~ to!string((obj.stow.link[m["num"].to!ulong])).encode ~ "├" )(_txt, rgx.inline_link_number_only); } if ((_txt.match(rgx.mark_internal_site_lnk)) && (_xml_type == "scroll")) { // conditions reversed to avoid: gdc compiled program run segfault _txt = _txt.replaceAll( rgx.inline_seg_link, "$1"); } auto pth_html = spinePathsHTML!()(doc_matters.output_path, doc_matters.src.language); if (_xml_type == "seg") { foreach (m; _txt.matchAll(rgx.inline_link_seg_and_hash)) { if (m.captures["hash"] in doc_matters.has.tag_associations) { if (m.captures["hash"] == doc_matters.has.tag_associations[(m.captures["hash"])]["seg_lv4"]) { _txt = _txt.replaceFirst( rgx.inline_link_seg_and_hash, "┥$1┝┤" ~ doc_matters.conf_make_meta.conf.w_srv_data_root_url_html ~ "/" ~ pth_html.tail_fn_seg(doc_matters.src.filename, "$2.html") ~ "├" ); } else { _txt = _txt.replaceFirst( rgx.inline_link_seg_and_hash, "┥$1┝┤" ~ doc_matters.conf_make_meta.conf.w_srv_data_root_url_html ~ "/" ~ doc_matters.src.filename_base ~ "/" ~ doc_matters.has.tag_associations[(m.captures["hash"])]["seg_lv4"] ~ ".html" ~ "#" ~ m.captures["hash"] ~ "├" ); } } else { if (!(doc_matters.opt.action.quiet)) { writeln( "WARNING on internal document links, anchor to link <<" ~ m.captures["hash"] ~ ">> not found in document, " ~ "anchor: " ~ m.captures["hash"] ~ " document: " ~ doc_matters.src.filename ); } } } } else { if (auto m = _txt.match(rgx.inline_link_seg_and_hash)) { _txt = _txt.replaceFirst( rgx.inline_link_seg_and_hash, "┥$1┝┤" ~ doc_matters.conf_make_meta.conf.w_srv_data_root_url_html ~ "/" ~ pth_html.tail_fn_scroll(doc_matters.src.filename) ~ "#" ~ m.captures["hash"] ~ "├" ); } } _txt = _txt .replaceAll( rgx.inline_link_fn_suffix, ("$1.html")) .replaceAll( rgx.inline_link, ("<a href=\"$2\">$1</a>")) .replaceAll( rgx.mark_internal_site_lnk, ""); } debug(markup_links) { if (_txt.match(rgx.inline_link)) { writeln(__LINE__, " (missed) markup link identified (", obj.has.inline_links, "): ", obj.metainfo.is_a, ": ", obj.text ); } } debug(markup) { if (_txt.match(rgx.inline_link)) { writeln(__LINE__, " (missed) markup link identified (", obj.has.inline_links, "): ", obj.metainfo.is_a, ": ", obj.text ); } } return _txt; } #+END_SRC ******* notes ******** scroll #+NAME: sanitize_and_munge_inline_html #+BEGIN_SRC d string inline_notes_scroll(M,O)( M doc_matters, const O obj, string _txt, ) { if (obj.has.inline_notes_reg) { // _txt = font_face(_txt); _txt = _txt.replaceAll( rgx.inline_notes_al_regular_number_note, ("<a href=\"#note_$1\"><note id=\"noteref_$1\"> <sup>$1</sup> </note></a>") ); } debug(markup_endnotes) { if (_txt.match(rgx.inline_notes_al_regular_number_note)) { writeln(__LINE__, " (missed) markup endnote: ", obj.metainfo.is_a, ": ", obj.text); } } debug(markup) { if (_txt.match(rgx.inline_notes_al_regular_number_note)) { writeln(__LINE__, " (missed) markup endnote: ", obj.metainfo.is_a, ": ", obj.text); } } return _txt; } #+END_SRC ******** seg #+NAME: sanitize_and_munge_inline_html #+BEGIN_SRC d Tuple!(string, string[]) inline_notes_seg(M,O)( M doc_matters, const O obj, string _txt, ) { string[] _endnotes; if (obj.has.inline_notes_reg) { /+ need markup for text, and separated footnote +/ foreach(m; _txt.matchAll(rgx.inline_notes_al_regular_number_note)) { _endnotes ~= format( "%s%s%s%s\n %s%s%s%s%s\n %s\n%s", "<p class=\"endnote\">", "<a href=\"#noteref_", m.captures[1], "\">", "<note id=\"note_", m.captures[1], "\"> <sup>", m.captures[1], ".</sup></note></a>", m.captures[2], "</p>" ); } _txt = _txt.replaceAll( rgx.inline_notes_al_regular_number_note, ("<a href=\"#note_$1\"><note id=\"noteref_$1\"> <sup>$1</sup> </note></a>") ); } else if (_txt.match(rgx.inline_notes_al_regular_number_note)) { debug(markup) { writeln(__LINE__, " endnote: ", obj.metainfo.is_a, ": ", obj.text); } } Tuple!(string, string[]) t = tuple( _txt, _endnotes, ); return t; } #+END_SRC ******* inline markup #+NAME: sanitize_and_munge_inline_html #+BEGIN_SRC d string xml_type="seg"; /+ set html document type to be linked to here (seg|scroll) +/ string inline_markup(M,O)( M doc_matters, const O obj, string _txt, ) { _txt = inline_images(doc_matters, obj, _txt, xml_type); _txt = inline_links(doc_matters, obj, _txt, xml_type); _txt = inline_notes_scroll(doc_matters, obj, _txt); return _txt; } #+END_SRC ***** objects ****** heading #+NAME: html_objects #+BEGIN_SRC d string html_heading(M,O)( M doc_matters, const O obj, ) { assert(obj.metainfo.is_of_part == "body" || "frontmatter" || "backmatter"); assert(obj.metainfo.is_of_section == "body" || "toc" || "endnotes" || "glossary" || "bibliography" || "bookindex" || "blurb"); assert(obj.metainfo.is_of_type == "para"); assert(obj.metainfo.is_a == "heading"); string _txt = munge_html(doc_matters, obj); _txt = inline_markup(doc_matters, obj, _txt); string o = format(q"┃<p class="%s"><b> %s </b></p>┃", obj.metainfo.is_a, _txt, ); return o; } #+END_SRC ******* +fancy+ ##+NAME: prepare_objects_html #+BEGIN_SRC d string html_heading(M,O)( M doc_matters, const O obj, ) { string o; string _txt = munge_html(doc_matters, obj); o = format(q"┃<h%s class="%s"> %s </h%s>┃", obj.metainfo.heading_lev_markup, obj.metainfo.is_a, _txt, obj.metainfo.heading_lev_markup, ); return o; } #+END_SRC ****** para #+NAME: html_objects #+BEGIN_SRC d string html_para(M,O)( M doc_matters, const O obj, ) { assert(obj.metainfo.is_of_part == "body" || "frontmatter" || "backmatter"); assert(obj.metainfo.is_of_section == "body" || "toc" || "endnotes" || "glossary" || "bibliography" || "bookindex" || "blurb"); assert(obj.metainfo.is_of_type == "para"); assert(obj.metainfo.is_a == "para" || "toc" || "endnote" || "glossary" || "bibliography" || "bookindex" || "blurb"); string _txt = munge_html(doc_matters, obj); _txt = (obj.attrib.bullet) ? ("●  " ~ _txt) : _txt; _txt = inline_markup(doc_matters, obj, _txt); string o = format(q"┃<p class="%s" indent="h%si%s"> %s </p>┃", obj.metainfo.is_a, obj.attrib.indent_hang, obj.attrib.indent_base, _txt ); return o; } #+END_SRC ****** quote #+NAME: html_objects #+BEGIN_SRC d string html_quote(M,O)( M doc_matters, const O obj, ) { assert(obj.metainfo.is_of_part == "body"); assert(obj.metainfo.is_of_section == "body" || "glossary" || "bibliography" || "bookindex" || "blurb"); assert(obj.metainfo.is_of_type == "block"); assert(obj.metainfo.is_a == "quote"); string _txt = munge_html(doc_matters, obj); string o = format(q"┃<p class="%s"> %s </p>┃", obj.metainfo.is_a, _txt ); return o; } #+END_SRC ****** group #+NAME: html_objects #+BEGIN_SRC d string html_group(M,O)( M doc_matters, const O obj, ) { assert(obj.metainfo.is_of_part == "body"); assert(obj.metainfo.is_of_section == "body" || "glossary" || "bibliography" || "bookindex" || "blurb"); assert(obj.metainfo.is_of_type == "block"); assert(obj.metainfo.is_a == "group"); string _txt = munge_html(doc_matters, obj); string o = format(q"┃<p class="%s"> %s </p>┃", obj.metainfo.is_a, _txt ); return o; } #+END_SRC ****** block #+NAME: html_objects #+BEGIN_SRC d string html_block(M,O)( M doc_matters, const O obj, ) { assert(obj.metainfo.is_of_part == "body"); assert(obj.metainfo.is_of_section == "body" || "glossary" || "bibliography" || "bookindex" || "blurb"); assert(obj.metainfo.is_of_type == "block"); assert(obj.metainfo.is_a == "block"); string _txt = munge_html(doc_matters, obj); string o = format(q"┃ <p class="%s">%s</p>┃", obj.metainfo.is_a, _txt.stripRight ); return o; } #+END_SRC ****** verse #+NAME: html_objects #+BEGIN_SRC d string html_verse(M,O)( M doc_matters, const O obj, ) { assert(obj.metainfo.is_of_part == "body"); assert(obj.metainfo.is_of_section == "body" || "glossary" || "bibliography" || "bookindex" || "blurb"); assert(obj.metainfo.is_of_type == "block"); assert(obj.metainfo.is_a == "verse"); string _txt = munge_html(doc_matters, obj); string o = format(q"┃<p class="%s">%s</p>┃", obj.metainfo.is_a, _txt ); return o; } #+END_SRC ****** code #+NAME: html_objects #+BEGIN_SRC d string html_code(O)( const O obj, ) { assert(obj.metainfo.is_of_part == "body"); assert(obj.metainfo.is_of_section == "body"); assert(obj.metainfo.is_of_type == "block"); assert(obj.metainfo.is_a == "code"); string _txt = html_special_characters_code(obj.text); string o = format(q"┃<p class="%s">%s</p>┃", obj.metainfo.is_a, _txt ); return o; } #+END_SRC ****** table #+NAME: html_objects #+BEGIN_SRC d string html_table(M,O)( M doc_matters, const O obj, ) { assert(obj.metainfo.is_of_part == "body"); assert(obj.metainfo.is_of_section == "body"); assert(obj.metainfo.is_of_type == "block"); assert(obj.metainfo.is_a == "table"); Tuple!(string, string) _tablarize(O)( const O obj, string _txt, ) { string[] _table_rows = _txt.split(rgx.table_delimiter_row); string[] _table_cols; string _table; string _tablenote; foreach(row_idx, row; _table_rows) { _table_cols = row.split(rgx.table_delimiter_col); _table ~= "<tr>"; foreach(col_idx, cell; _table_cols) { if ((_table_cols.length == 1) && (_table_rows.length <= row_idx+2)) { // check row_idx+2 (rather than == ++row_idx) _tablenote ~= cell; } else { string _col_is = (row_idx == 0 && obj.table.heading) ? "th" : "td"; string _align = ("style=\"text-align:" ~ ((obj.table.column_aligns[col_idx] == "l") ? "left\"" : "right\"")); _table ~= "<" ~ _col_is ~ " width=\"" ~ obj.table.column_widths[col_idx].to!string ~ "%\" " ~ _align ~ ">"; _table ~= cell; _table ~= "</" ~ _col_is ~ ">"; } } _table ~= "</tr>"; } Tuple!(string, string) t = tuple( _table, _tablenote, ); return t; } string _txt = munge_html(doc_matters, obj); Tuple!(string, string) t = _tablarize(obj, _txt); _txt = t[0]; string _note = t[1]; string o = format(q"┃<p class="%s"> <table summary="normal text css" width="95%%" border="0" cellpadding="2" align="center"> %s </table> %s </p>┃", obj.metainfo.is_a, _txt, _note ); return o; } #+END_SRC *** 2. hub (sqlite_format_and_load_objects) **** sql related #+NAME: sqlite_load_object #+BEGIN_SRC d string sqlite_load_string(M,O)( M doc_matters, const O obj, ) { string o; return o; } #+END_SRC #+NAME: sqlite_load_object #+BEGIN_SRC d string postgresql_load_string(M,O)( M doc_matters, const O obj, ) { string o; return o; } #+END_SRC #+NAME: sqlite_load_object #+BEGIN_SRC d string sqlite_statement(O)( const O obj, string _txt, string _html, ) { void _sql_exe(O)( string _sql, ) { writeln(_html); writeln(_sql); } string _sql; return _sql; } #+END_SRC **** heading #+NAME: hub_format_and_sqlite_load_objects #+BEGIN_SRC d string[string] heading(M,O)( M doc_matters, const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_heading(doc_matters, obj) ]; { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } } return obj_txt; } #+END_SRC **** para #+NAME: hub_format_and_sqlite_load_objects #+BEGIN_SRC d string[string] para(M,O)( M doc_matters, const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_para(doc_matters, obj) ]; { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } } return obj_txt; } #+END_SRC **** quote #+NAME: hub_format_and_sqlite_load_objects #+BEGIN_SRC d string[string] quote(M,O)( M doc_matters, const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_quote(doc_matters, obj) ]; { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } } return obj_txt; } #+END_SRC **** group #+NAME: hub_format_and_sqlite_load_objects #+BEGIN_SRC d string[string] group(M,O)( M doc_matters, const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_group(doc_matters, obj) ]; { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } } return obj_txt; } #+END_SRC **** block #+NAME: hub_format_and_sqlite_load_objects #+BEGIN_SRC d string[string] block(M,O)( M doc_matters, const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_block(doc_matters, obj) ]; { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } } return obj_txt; } #+END_SRC **** verse #+NAME: hub_format_and_sqlite_load_objects #+BEGIN_SRC d string[string] verse(M,O)( M doc_matters, const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_verse(doc_matters, obj) ]; { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } } return obj_txt; } #+END_SRC **** code #+NAME: hub_format_and_sqlite_load_objects #+BEGIN_SRC d string[string] code(M,O)( M doc_matters, const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_code(obj) ]; { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } } return obj_txt; } #+END_SRC **** table #+NAME: hub_format_and_sqlite_load_objects #+BEGIN_SRC d string[string] table(M,O)( M doc_matters, const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_table(doc_matters, obj) ]; { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } } return obj_txt; } #+END_SRC ** 4. ↻ loop, identify, load - loop template #+NAME: sqlite_objects_loop #+BEGIN_SRC d auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters); string[string] obj_txt; string doc_text; string[] _insert_doc_objects; foreach (part; doc_matters.has.keys_seq.sql) { foreach (obj; doc_abstraction[part]) { switch (obj.metainfo.is_of_part) { case "frontmatter": assert(part == "head", part); switch (obj.metainfo.is_of_type) { case "para": switch (obj.metainfo.is_a) { case "heading": obj_txt = format_and_sqlite_load.heading(doc_matters, obj); break; default: { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_a); } } break; } break; default: { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_of_type); } } break; } break; case "body": // assert(part == "body", part); switch (obj.metainfo.is_of_type) { case "para": switch (obj.metainfo.is_a) { case "heading": debug (asserts) { if (part != "body") { writeln(__LINE__, ": ", obj.text); } } obj_txt = format_and_sqlite_load.heading(doc_matters, obj); break; case "para": obj_txt = format_and_sqlite_load.para(doc_matters, obj); break; default: { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_a); } } break; } break; case "block": switch (obj.metainfo.is_a) { case "quote": obj_txt = format_and_sqlite_load.quote(doc_matters, obj); break; case "group": obj_txt = format_and_sqlite_load.group(doc_matters, obj); break; case "block": obj_txt = format_and_sqlite_load.block(doc_matters, obj); break; case "poem": // double check on keeping both poem & verse break; case "verse": obj_txt = format_and_sqlite_load.verse(doc_matters, obj); break; case "code": obj_txt = format_and_sqlite_load.code(doc_matters, obj); break; case "table": obj_txt = format_and_sqlite_load.table(doc_matters, obj); break; default: { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_a); } } break; } break; default: { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_of_type); } } break; } break; case "backmatter": assert(part == "glossary" || "bibliography" || "bookindex" || "blurb" || "tail", part); switch (obj.metainfo.is_of_type) { case "para": switch (obj.metainfo.is_a) { case "heading": obj_txt = format_and_sqlite_load.heading(doc_matters, obj); break; case "glossary": assert(part == "glossary", part); obj_txt = format_and_sqlite_load.para(doc_matters, obj); break; case "bibliography": assert(part == "bibliography", part); obj_txt = format_and_sqlite_load.para(doc_matters, obj); break; case "bookindex": assert(part == "bookindex", part); obj_txt = format_and_sqlite_load.para(doc_matters, obj); break; case "blurb": assert(part == "blurb", part); obj_txt = format_and_sqlite_load.para(doc_matters, obj); break; default: { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_a); } } break; } break; default: { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_of_type); } } break; } break; case "comment": break; default: { /+ debug +/ if (doc_matters.opt.action.debug_do && doc_matters.opt.action.verbose) { writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_of_part); // check where empty value could come from writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_a); writeln(__FILE__, ":", __LINE__, ": ", obj.text); // check where empty value could come from } } break; } if (obj.metainfo.is_a == "heading") { if ((doc_matters.opt.action.very_verbose)) { writeln( "markup: ", obj.metainfo.heading_lev_markup, "> ", obj.metainfo.dom_structure_markedup_tags_status, "; collapsed: ", obj.metainfo.heading_lev_collapsed, "> ", obj.metainfo.dom_structure_collapsed_tags_status, "; ocn: ", obj.metainfo.ocn, " node: ", obj.metainfo.node, "; parent: ", obj.metainfo.parent_lev_markup, "; ocn: ", obj.metainfo.parent_ocn, "; ", ); } } if (!(obj.metainfo.is_a == "comment")) { _insert_doc_objects ~= insertDocObjectsRow(obj); } } // loop closes } return _insert_doc_objects.join.to!(char[]).toUTF8; #+END_SRC ** 5. SQL statements :statement: *** drop index and tables **** DROP INDEX IF EXISTS #+NAME: sqlite_statement_drop_existing_index #+BEGIN_SRC sql DROP INDEX IF EXISTS idx_ocn; DROP INDEX IF EXISTS idx_uid; DROP INDEX IF EXISTS idx_digest_clean; DROP INDEX IF EXISTS idx_digest_all; DROP INDEX IF EXISTS idx_clean; DROP INDEX IF EXISTS idx_title; DROP INDEX IF EXISTS idx_author; DROP INDEX IF EXISTS src_filename_base; DROP INDEX IF EXISTS idx_language_document_char; DROP INDEX IF EXISTS idx_classify_topic_register; #+END_SRC **** DROP TABLE IF EXISTS #+NAME: sqlite_statement_drop_existing_tables #+BEGIN_SRC sql DROP TABLE IF EXISTS metadata_and_text; DROP TABLE IF EXISTS topic_register; DROP TABLE IF EXISTS doc_objects; DROP TABLE IF EXISTS urls; #+END_SRC *** create tables and index **** CREATE TABLE metadata_and_text #+NAME: sqlite_statement_create_table_metadata_and_src_txt #+BEGIN_SRC sql CREATE TABLE metadata_and_text ( uid VARCHAR(256) UNIQUE, -- filename, language char, pod/txt (decide on delimiter [,;:/]) src_composite_id_per_txt VARCHAR(256) NOT NULL, -- UNIQUE, z pod name if any + src filename + language code src_composite_id_per_pod VARCHAR(256) NOT NULL, -- z pod name if any + src filename title VARCHAR(800) NOT NULL, title_main VARCHAR(400) NOT NULL, title_sub VARCHAR(400) NULL, title_short VARCHAR(400) NULL, title_edition VARCHAR(10) NULL, title_language VARCHAR(100) NULL, title_language_char VARCHAR(6) NULL, creator_author VARCHAR(600) NOT NULL, creator_author_last_first VARCHAR(600) NOT NULL, creator_author_email VARCHAR(100) NULL, creator_author_hon VARCHAR(100) NULL, creator_author_nationality VARCHAR(100) NULL, creator_editor VARCHAR(600) NULL, creator_contributor VARCHAR(600) NULL, creator_illustrator VARCHAR(600) NULL, creator_photographer VARCHAR(600) NULL, creator_translator VARCHAR(600) NULL, creator_prepared_by VARCHAR(600) NULL, creator_digitized_by VARCHAR(600) NULL, creator_audio VARCHAR(600) NULL, creator_video VARCHAR(600) NULL, language_document VARCHAR(100) NULL, language_document_char VARCHAR(6) NOT NULL, language_original VARCHAR(100) NULL, language_original_char VARCHAR(6) NULL, date_added_to_site VARCHAR(10) NULL, date_available VARCHAR(10) NULL, date_created VARCHAR(10) NULL, date_issued VARCHAR(10) NULL, date_modified VARCHAR(10) NULL, date_published VARCHAR(10) NULL, date_valid VARCHAR(10) NULL, date_translated VARCHAR(10) NULL, date_original_publication VARCHAR(10) NULL, date_generated VARCHAR(10) NULL, original_title VARCHAR(800) NULL, original_publisher VARCHAR(600) NULL, original_language VARCHAR(100) NULL, original_language_char VARCHAR(6) NULL, original_source VARCHAR(600) NULL, original_institution VARCHAR(600) NULL, original_nationality VARCHAR(100) NULL, rights_copyright VARCHAR(2500) NULL, rights_copyright_audio VARCHAR(2500) NULL, rights_copyright_cover VARCHAR(2500) NULL, rights_copyright_illustrations VARCHAR(2500) NULL, rights_copyright_photographs VARCHAR(2500) NULL, rights_copyright_text VARCHAR(2500) NULL, rights_copyright_translation VARCHAR(2500) NULL, rights_copyright_video VARCHAR(2500) NULL, rights_license VARCHAR(2500) NULL, identifier_oclc VARCHAR(30) NULL, identifier_isbn VARCHAR(16) NULL, classify_topic_register VARCHAR(2500) NULL, classify_subject VARCHAR(600) NULL, classify_loc VARCHAR(30) NULL, classify_dewey VARCHAR(30) NULL, classify_keywords VARCHAR(600) NULL, notes_abstract TEXT NULL, notes_description TEXT NULL, notes_comment TEXT NULL, notes_coverage VARCHAR(200) NULL, notes_relation VARCHAR(200) NULL, notes_history VARCHAR(600) NULL, notes_type VARCHAR(600) NULL, notes_format VARCHAR(600) NULL, notes_prefix TEXT NULL, notes_prefix_a TEXT NULL, notes_prefix_b TEXT NULL, notes_suffix TEXT NULL, publisher VARCHAR(600) NULL, src_filename_base VARCHAR(256) NOT NULL, src_filename_suffix VARCHAR(6) NOT NULL, src_fingerprint VARCHAR(256) NULL, src_filesize VARCHAR(10) NULL, src_wordcount VARCHAR(10) NULL, pod_name VARCHAR(256) NULL, -- zipped pod, work to be done here pod_fingerprint VARCHAR(256) NULL, -- zipped pod, work to be done here pod_size VARCHAR(10) NULL, -- zipped pod, work to be done here site_url_doc_root VARCHAR(256) NULL, -- url path to doc root site_url_html_toc VARCHAR(256) NULL, site_url_html_scroll VARCHAR(256) NULL, site_url_epub VARCHAR(256) NULL, links TEXT NULL ); #+END_SRC **** CREATE TABLE topic_register #+NAME: sqlite_statement_create_table_topic_register #+BEGIN_SRC sql CREATE TABLE topic_register ( -- tid BIGINT PRIMARY KEY, uid_metadata_and_text VARCHAR(256) REFERENCES metadata_and_text(uid) ON DELETE CASCADE, -- src_composite_id_per_txt VARCHAR(256) NOT NULL, - UNIQUE, - z pod name if any + src filename + language code -- src_composite_id_per_pod VARCHAR(256) NOT NULL, - z pod name if any + src filename topic_register_lv0 VARCHAR(250) NOT NULL, topic_register_lv1 VARCHAR(250) NULL, topic_register_lv2 VARCHAR(250) NULL, topic_register_lv3 VARCHAR(250) NULL, topic_register_lv4 VARCHAR(250) NULL, site_url_doc_root VARCHAR(256) NULL, -- url path to doc root site_url_html_toc VARCHAR(256) NULL, site_url_html_scroll VARCHAR(256) NULL ); #+END_SRC **** CREATE TABLE site_urls ? #+NAME: sqlite_statement_create_table_site_urls #+BEGIN_SRC sql CREATE TABLE site_urls ( -- tid BIGINT PRIMARY KEY, uid_metadata_and_text VARCHAR(256) REFERENCES metadata_and_text(uid) ON DELETE CASCADE, src_composite_id_per_txt VARCHAR(256) NOT NULL, -- UNIQUE, - z pod name if any + src filename + language code src_composite_id_per_pod VARCHAR(256) NOT NULL, -- z pod name if any + src filename site_url_doc_root VARCHAR(256) NULL, -- url path to doc root site_url_html_toc VARCHAR(256) NULL, site_url_html_scroll VARCHAR(256) NULL ); #+END_SRC **** CREATE TABLE doc_objects #+NAME: sqlite_statement_create_table_objects #+BEGIN_SRC sql CREATE TABLE doc_objects ( lid BIGINT PRIMARY KEY, uid_metadata_and_text VARCHAR(256) REFERENCES metadata_and_text(uid) ON DELETE CASCADE, ocn SMALLINT, obj_id VARCHAR(6) NULL, clean TEXT NULL, body TEXT NULL, seg VARCHAR(256) NULL, lev_an VARCHAR(1), is_of_type VARCHAR(16), is_a VARCHAR(16), lev SMALLINT NULL, node VARCHAR(16) NULL, parent VARCHAR(16) NULL, last_decendant VARCHAR(16) NULL, -- headings only digest_clean CHAR(256), digest_all CHAR(256), seg_name CHAR(256), types CHAR(1) NULL ); #+END_SRC **** CREATE INDEX #+NAME: sqlite_statement_create_index #+BEGIN_SRC sql CREATE INDEX idx_ocn ON doc_objects(ocn); CREATE INDEX idx_digest_clean ON doc_objects(digest_clean); CREATE INDEX idx_digest_all ON doc_objects(digest_all); CREATE INDEX idx_clean ON doc_objects(clean); CREATE INDEX idx_title ON metadata_and_text(title); CREATE INDEX idx_author ON metadata_and_text(creator_author_last_first); CREATE INDEX idx_uid ON metadata_and_text(uid); CREATE INDEX idx_filename ON metadata_and_text(src_filename_base); CREATE INDEX idx_language ON metadata_and_text(language_document_char); CREATE INDEX idx_topics ON metadata_and_text(classify_topic_register); #+END_SRC *** TODO local site link & info *** delete rows (delete document) **** DELETE uid rows doc matters & metadata ***** sql statement: dlang format #+NAME: sqlite_formatted_delete #+BEGIN_SRC d string _uid = doc_matters.src.doc_uid; string _delete_uid = format(q"┃ #+END_SRC ***** DELETE FROM ... WHERE #+NAME: sqlite_formatted_delete #+BEGIN_SRC sql DELETE FROM metadata_and_text WHERE uid = '%s'; DELETE FROM doc_objects WHERE uid_metadata_and_text = '%s'; #+END_SRC ***** VALUES #+NAME: sqlite_formatted_delete #+BEGIN_SRC d ┃", _uid, _uid, ); #+END_SRC *** inserts **** INSERT doc matters & metadata ***** sql statement: dlang format #+NAME: sqlite_formatted_insertions_doc_matters_metadata #+BEGIN_SRC d string _uid = SQLinsertDelimiter!()(doc_matters.src.doc_uid); string _insert_metadata = format(q"┃ #+END_SRC ***** INSERT INTO #+NAME: sqlite_formatted_insertions_doc_matters_metadata #+BEGIN_SRC sql INSERT INTO metadata_and_text ( uid, src_filename_base, src_filename_suffix, src_composite_id_per_txt, src_composite_id_per_pod, title, title_main, title_sub, title_short, title_edition, title_language, creator_author, creator_author_last_first, creator_author_email, creator_illustrator, creator_translator, language_document, language_document_char, date_added_to_site, date_available, date_created, date_issued, date_modified, date_published, date_valid, rights_copyright, rights_copyright_audio, rights_copyright_cover, rights_copyright_illustrations, rights_copyright_photographs, rights_copyright_text, rights_copyright_translation, rights_copyright_video, rights_license, identifier_oclc, identifier_isbn, classify_dewey, classify_keywords, classify_loc, classify_subject, classify_topic_register, original_title, original_publisher, original_language, original_language_char, original_source, notes_abstract, notes_description, publisher, site_url_doc_root ) #+END_SRC ***** VALUES #+NAME: sqlite_formatted_insertions_doc_matters_metadata #+BEGIN_SRC sql VALUES ( '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s' ); #+END_SRC ***** dlang values for formatting #+NAME: sqlite_formatted_insertions_doc_matters_metadata #+BEGIN_SRC d ┃", _uid, SQLinsertDelimiter!()(doc_matters.src.filename_base), SQLinsertDelimiter!()(doc_matters.src.filename_extension), SQLinsertDelimiter!()(doc_matters.src.docname_composite_unique_per_src_doc), SQLinsertDelimiter!()(doc_matters.src.docname_composite_unique_per_src_pod), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_full), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_main), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_subtitle), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_short), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_edition), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_language), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author_surname_fn), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author_email), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_illustrator), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_translator), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document_char), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_added_to_site), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_available), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_created), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_issued), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_modified), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_published), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_valid), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_audio), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_cover), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_illustrations), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_photographs), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_text), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_translation), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_video), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_license), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_oclc), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_isbn), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_dewey), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_keywords), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_loc), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_subject), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_topic_register), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_abstract), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_description), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_title), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_publisher), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language_char), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_source), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.publisher), SQLinsertDelimiter!()(doc_matters.conf_make_meta.conf.w_srv_data_root_url_html) ); #+END_SRC **** INSERT topic register writeln(doc_matters.conf_make_meta.meta.classify_topic_register_arr); ***** { if topic register then loop topic register array #+NAME: sqlite_formatted_insertions_topic_register #+BEGIN_SRC d if (doc_matters.conf_make_meta.meta.classify_topic_register_arr.length > 0) { string _insert_topics; foreach (topic; doc_matters.conf_make_meta.meta.classify_topic_register_arr) { string[] subject_tree = topic.split(mkup.sep); #+END_SRC ***** sql statement: dlang format #+NAME: sqlite_formatted_insertions_topic_register #+BEGIN_SRC d _insert_topics ~= format(q"┃ #+END_SRC ***** INSERT INTO #+NAME: sqlite_formatted_insertions_topic_register #+BEGIN_SRC sql INSERT INTO topic_register ( uid_metadata_and_text, topic_register_lv0, topic_register_lv1, topic_register_lv2, topic_register_lv3, topic_register_lv4 ) #+END_SRC ***** VALUES #+NAME: sqlite_formatted_insertions_topic_register #+BEGIN_SRC sql VALUES ( '%s', '%s', '%s', '%s', '%s', '%s' ); #+END_SRC ***** dlang values for formatting #+NAME: sqlite_formatted_insertions_topic_register #+BEGIN_SRC d ┃", _uid, (subject_tree.length > 0) ? subject_tree[0] : "", (subject_tree.length > 1) ? subject_tree[1] : "", (subject_tree.length > 2) ? subject_tree[2] : "", (subject_tree.length > 3) ? subject_tree[3] : "", (subject_tree.length > 4) ? subject_tree[4] : "" ); #+END_SRC ***** } close topic register & loop topic register array #+NAME: sqlite_formatted_insertions_topic_register #+BEGIN_SRC d } } #+END_SRC **** INSERT doc objects lid unique, increment by 1 per object, not ocn metadata tid document number unique either: - increment by adding 1 for each document, - make hash of document filename or url and use? ***** sql statement: dlang format #+NAME: sqlite_formatted_insertions_doc_objects #+BEGIN_SRC d string _insert_doc_objects_row = format(q"┃ #+END_SRC ***** INSERT INTO #+NAME: sqlite_formatted_insertions_doc_objects #+BEGIN_SRC sql INSERT INTO doc_objects ( uid_metadata_and_text, ocn, obj_id, clean, body, lev, is_of_type, is_a, seg_name ) #+END_SRC ***** VALUES #+NAME: sqlite_formatted_insertions_doc_objects #+BEGIN_SRC sql VALUES ( '%s', %s, '%s', '%s', '%s', %s, '%s', '%s', '%s' ); #+END_SRC ***** dlang values for formatting #+NAME: sqlite_formatted_insertions_doc_objects #+BEGIN_SRC d ┃", _uid, obj.metainfo.ocn, obj.metainfo.identifier, SQLinsertDelimiter!()(obj_txt["text"]), SQLinsertDelimiter!()(obj_txt["html"]), obj.metainfo.heading_lev_markup, obj.metainfo.is_of_type, obj.metainfo.is_a, obj.tags.html_segment_anchor_tag_is ); #+END_SRC * __END__