From 90051a7ea55acb043434b1c2483b878d602246ba Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Fri, 2 Apr 2021 19:37:00 -0400 Subject: org mode (ruby code within) --- org/db.org | 4808 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 4808 insertions(+) create mode 100644 org/db.org (limited to 'org/db.org') diff --git a/org/db.org b/org/db.org new file mode 100644 index 00000000..4a2f216b --- /dev/null +++ b/org/db.org @@ -0,0 +1,4808 @@ +-*- mode: org -*- +#+TITLE: sisu db sql +#+DESCRIPTION: documents - structuring, various output representations & search +#+FILETAGS: :sisu:db:sql: +#+AUTHOR: Ralph Amissah +#+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] +#+COPYRIGHT: Copyright (C) 2015 - 2021 Ralph Amissah +#+LANGUAGE: en +#+STARTUP: content hideblocks hidestars noindent entitiespretty +#+OPTIONS: H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t +#+PROPERTY: header-args :exports code +#+PROPERTY: header-args+ :noweb yes +#+PROPERTY: header-args+ :eval no +#+PROPERTY: header-args+ :results no +#+PROPERTY: header-args+ :cache no +#+PROPERTY: header-args+ :padline no + +* dbi.rb + +#+BEGIN_SRC ruby :tangle "../lib/sisu/dbi.rb" +# <> +module SiSU_DBI #% database building + require_relative 'se' # se.rb + include SiSU_Env; include SiSU_Screen + require_relative 'dp' # dp.rb + include SiSU_Param + require_relative 'db_dbi' # db_dbi.rb + include SiSU_DbDBI + require_relative 'html_lite_shared' # html_lite_shared.rb + include SiSU_FormatShared + class SQL + def initialize(opt) + @opt=opt + @db=SiSU_Env::InfoDb.new + if @opt.act[:psql][:set]==:on \ + or @opt.act[:sqlite][:set]==:on + @sql_type=if @opt.act[:psql][:set]==:on + maintenance_check(@opt,__FILE__,__LINE__) if @opt.act[:maintenance][:set]==:on + :pg + elsif @opt.act[:psql][:set]==:on + maintenance_check(@opt,__FILE__,__LINE__) if @opt.act[:maintenance][:set]==:on + :pg + elsif @opt.act[:sqlite][:set]==:on + maintenance_check(@opt,__FILE__,__LINE__) if @opt.act[:maintenance][:set]==:on + :sqlite + elsif @opt.act[:sqlite][:set]==:on + maintenance_check(@opt,__FILE__,__LINE__) if @opt.act[:maintenance][:set]==:on + :sqlite + else + maintenance_check(@opt,__FILE__,__LINE__) if @opt.act[:maintenance][:set]==:on + :sqlite + end + if @sql_type==:pg then SiSU_Env::Load.new('pg',true).prog + elsif @sql_type==:sqlite then SiSU_Env::Load.new('sqlite3',true).prog + end + end + end + def maintenance_check(opt,file,line) + p opt.selections.str + p "at #{file} #{line}" + end + def read_psql + begin + begin + require 'pg' + rescue LoadError + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). + error('pg NOT FOUND (LoadError)') + end + @conn=@db.psql.conn_pg + rescue + if @opt.act[:psql_create][:set]==:on + cX=SiSU_Screen::Ansi.new(@opt.act[:color_state][:set]).cX + puts <<-WOK +manually create the database: "#{cX.green}#{@db.db}#{cX.off}" if it does not yet exist + #{cX.yellow}createdb #{@db.db}#{cX.off} + WOK + #sudo su -p postgres; createdb #{@db.db}; #[createuser?] + end + ensure + end + end + def read_sqlite + begin + begin + begin + require 'sqlite3' + rescue LoadError + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). + error('sqlite3 NOT FOUND (LoadError)') + end + @conn=@db.sqlite.conn_sqlite3 + rescue LoadError + errmsg='sqlite3 NOT FOUND (LoadError)' + if @opt.act[:no_stop][:set]==:on + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). + error(errmsg + ', ' + 'attempt to proceed without sqlite output (as requested)') + else + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). + error(errmsg + ', ' + 'STOPPING') + exit + end + end + end + end + def connect + case @sql_type + when :pg then read_psql #read_pg + when :sqlite then read_sqlite + end + SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + "SQL DB #{@sql_type.upcase}", + @opt.fno + ).dbi_title unless @opt.act[:quiet][:set]==:on + begin + SiSU_DbDBI::Case.new(@opt,@conn,@sql_type).cases + rescue + SiSU_Errors::Rescued.new($!,$@,@cf,@opt.fns).location do + __LINE__.to_s + ':' + __FILE__ + end + ensure + end + begin + rescue + connect + end + end + end +end +__END__ +#+END_SRC + +* dbi_discrete.rb + +#+BEGIN_SRC ruby :tangle "../lib/sisu/dbi_discrete.rb" +# <> +module SiSU_DBI_Discrete #% database building + require_relative 'se' # se.rb + include SiSU_Env; include SiSU_Screen + require_relative 'dp' # dp.rb + include SiSU_Param + require_relative 'db_dbi' # db_dbi.rb + include SiSU_DbDBI + require_relative 'html_lite_shared' # html_lite_shared.rb + include SiSU_FormatShared + begin + require 'fileutils' + include FileUtils::Verbose + rescue LoadError + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). + error('fileutils NOT FOUND (LoadError)') + end + class SQL + def initialize(opt) + SiSU_Env::Load.new('sqlite3',true).prog + @opt=opt + @particulars=SiSU_Particulars::CombinedSingleton.instance.get_all(opt) + @md=@particulars.md + if @opt.act[:sqlite][:set]==:on + @sql_type=:sqlite + if @opt.act[:maintenance][:set]==:on + maintenance_check(@opt,__FILE__,__LINE__) + end + end + @output_path=@md.file.output_path.sqlite_discrete.dir + @filename=@md.file.base_filename.sqlite_discrete + @file_maint=sql_maintenance_file + @file="#{@output_path}/#{@filename}" + end + def build + prepare + create_and_populate + end + def maintenance_check(opt,file,line) + #p opt.selections.str + p "at #{file} #{line}" + end + def prepare + if not FileTest.directory?(@output_path) + FileUtils::mkdir_p(@output_path) + elsif @file + FileUtils::rm_rf(@file) + end + end + def db_exist?(db,conn) + msg=%{no connection with sqlite database established, createdb "#{db.sqlite.db}"?} + if (not (FileTest.file?(db.sqlite.db)) \ + or FileTest.zero?(db.sqlite.db)) + puts msg + exit + end + if conn.is_a?(NilClass) + puts msg + exit + end + end + def create_and_populate + db=SiSU_Env::DbOp.new(@md) + conn=db.sqlite_discrete.conn_sqlite3 + sdb=SiSU_DbDBI::Create.new(@opt,conn,@file,:sqlite) + sdb_index=SiSU_DbDBI::Index.new(@opt,conn,@file,:sqlite) + sdb.output_dir? + begin + SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + 'SQLite (discrete)', + "[#{@opt.f_pth[:lng_is]}] #{@opt.fno}" + ).green_title_hi unless @opt.act[:quiet][:set]==:on + sdb.create_db + sdb.create_table.metadata_and_text + sdb.create_table.doc_objects + sdb.create_table.endnotes + sdb.create_table.endnotes_asterisk + sdb.create_table.endnotes_plus + sdb.create_table.urls + sdb_index.create_indexes + db_exist?(db,conn) + sdb_import=SiSU_DbDBI::Import.new(@opt,conn,@file_maint,:sqlite) + sdb_import.marshal_load + tell=SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + "sqlite3 #{db.sqlite.db} database?" + ) + tell.puts_grey if @opt.act[:verbose][:set]==:on + rescue + SiSU_Errors::Rescued.new($!,$@,'-d').location do + __LINE__.to_s + ':' + __FILE__ + end + sdb.output_dir? + end + end + def read_sqlite + begin + begin + require 'sqlite3' + @conn=@db.sqlite.conn_sqlite3 + rescue LoadError + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia).error('sqlite3 not available') + ensure + Dir.chdir(@opt.f_pth[:pth]) + end + end + end + def connect + begin + SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + "DBI (#{@sql_type}) #{@opt.selections.str}", + @opt.fns + ).dbi_title unless @opt.act[:quiet][:set]==:on + @db.sqlite_discrete.conn_sqlite3 + rescue + SiSU_Errors::Rescued.new($!,$@,@cf,@opt.fns).location do + __LINE__.to_s + ':' + __FILE__ + end + ensure + end + end + def populate + end + def sql_maintenance_file + file=if @opt.act[:maintenance][:set]==:on + if @opt.fns and not @opt.fns.empty? + @env=SiSU_Env::InfoEnv.new(@opt.fns) if @opt.fns + if @sql_type ==:sqlite + puts "\n#{@env.processing_path.sqlite}/#{@opt.fns}.sql" + end + @db=SiSU_Env::InfoDb.new + @job="sqlite3 #{@db.sqlite.db} < #{@env.processing_path.sqlite}/#{@opt.fns}.sql" + if @sql_type ==:sqlite + File.new("#{@env.processing_path.sqlite}/#{@opt.fns}.sql",'w+') + else + File.new("#{@env.processing_path.postgresql}/#{@opt.fns}.sql",'w+') + end + elsif @opt.fns \ + and @opt.fns.inspect =~/create/ + nil #sort variations later + else nil + end + else nil + end + file + end + end +end +__END__ +#+END_SRC + +* db_dbi.rb + +#+BEGIN_SRC ruby :tangle "../lib/sisu/db_dbi.rb" +# <> +module SiSU_DbDBI + require_relative 'db_columns' # db_columns.rb + require_relative 'db_tests' # db_tests.rb + require_relative 'db_create' # db_create.rb + require_relative 'db_select' # db_select.rb + require_relative 'db_indexes' # db_indexes.rb + require_relative 'db_drop' # db_drop.rb + require_relative 'db_remove' # db_remove.rb + require_relative 'db_load_tuple' # db_load_tuple.rb + require_relative 'db_import' # db_import.rb + class ColumnSize < SiSU_DbColumns::ColumnSize # db_columns.rb + end + class Test < SiSU_DbTests::Test # db_tests.rb + end + class Create > +module SiSU_DbText + class Prepare + def special_character_escape(str) + str=str.to_s.gsub(/'/m,"''"). #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") + gsub(/(\\)/m,'\1\1'). #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql + gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/m,"
\n"). + gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/m,''). #check + gsub(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/m,'[image: \1] \2'). + gsub(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/m,'\1\2'). + gsub(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/m,'\1') + end + def clean_searchable_text_from_document_objects(arr) + en=[] + arr=(arr.is_a?(String)) ? [ arr ] : arr + txt_arr=arr.each.map do |s| + s=s.gsub(/#{Mx[:fa_o]}[a-z]{1,4}#{Mx[:fa_o_c]}/m,''). + gsub(/#{Mx[:fa_c_o]}[a-z]{1,4}#{Mx[:fa_c]}/m,''). + gsub(/
/m,' ') + en << s.scan(/#{Mx[:en_a_o]}\s*(.+?)\s*#{Mx[:en_a_c]}/m) + s=s.gsub(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/m,''). + gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,''). + gsub(/ \s+/m,' ') + #p s if s =~/[^ \nA-Za-z0-9'"`?!#@$%^&*=+,.;:\[\]()<>{}‹›|\\\/~_-]/ + s + end + txt_arr=txt_arr << en + txt=txt_arr.flatten.join("\n") + special_character_escape(txt) + end + def clean_document_objects_body(arr) + en=[] + arr=(arr.is_a?(String)) ? [ arr ] : arr + txt_arr=arr.each.map do |s| + en << s.scan(/#{Mx[:en_a_o]}\s*(.+?)\s*#{Mx[:en_a_c]}/m) + s=s. + gsub(/#{Mx[:en_a_o]}\s*(\d+).+?#{Mx[:en_a_c]}/m, + '\1'). + gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,''). + gsub(/ \s+/m,' ') + s + end + en_arr=en.flatten.each.map do |e| + e.sub(/^(\d+)\s*/,'\1 ') + end + txt_arr=txt_arr << en_arr + txt=txt_arr.flatten.join("\n
") + special_character_escape(txt) + end + def clean_searchable_text_from_document_source(arr) + txt_arr,en=[],[] + arr=(arr.is_a?(String)) ? arr.split(/\n+/m) : arr + arr.each do |s| + s=s.gsub(/([*\/_-])\{(.+?)\}\1/m,'\2'). + gsub(/^(?:block|group|poem|code)\{/m,''). + gsub(/^\}(?:block|group|poem|code)/m,''). + gsub(/\A(?:@\S+:\s+.+)\Z/m,'') + if s =~/^:A~/ + if defined? @md.creator \ + and defined? @md.creator.author \ + and not @md.creator.author.empty? + s=s.gsub(/@author/,@md.creator.author) + else + SiSU_Screen::Ansi.new( + 'v', + 'WARNING Document Author information missing; provide @creator: :author:', + @md.fnb + ).warn unless @md.opt.act[:quiet][:set]==:on + end + if defined? @md.title \ + and defined? @md.title.full \ + and not @md.title.full.empty? + s=s.gsub(/@title/,@md.title.full) + else + SiSU_Screen::Ansi.new( + 'v', + 'WARNING Document Title missing; provide @title:', + @md.fnb + ).warn unless @md.opt.act[:quiet][:set]==:on + end + end + s=s.gsub(/^(?:_[1-9]\*?|_\*)\s+/m,''). + gsub(/^(?:[1-9]\~(\S+)?)\s+/m,''). + gsub(/^(?::?[A-C]\~(\S+)?)\s+/m,''). + gsub(/^%{1,3} .+/m,''). #removed even if contained in code block + gsub(/
/m,' ') + #en << s.scan(/~\{\s*(.+?)\s*\}~/m) + s=s.gsub(/~\{.+?\}~/m,''). + gsub(/ \s+/m,' ') + ##special_character_escape(s) + #p s if s =~/[^ \nA-Za-z0-9'"`?!#@$%^&*=+,.;:\[\]()<>{}‹›|\\\/~_-]/ + s + end + txt_arr << arr << en + txt=txt_arr.flatten.join("\n") + txt=special_character_escape(txt) + txt + end + def strip_markup(str) #define rules, make same as in dal clean + str=str.gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]'). + gsub(/(?: \\;|#{Mx[:nbsp]})+/,' '). + gsub(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1'). #tables + gsub(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' '). #tables + gsub(/#{Mx[:tc_p]}/u,' '). #tables tidy later + gsub(/<.+?>/,''). + gsub(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] '). # else image names found in search + gsub(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]'). # else image names found in search + gsub(/\s\s+/,' '). + strip + end + def unique_words(str) + a=str.scan(/[a-zA-Z0-9\\\/_-]{2,}/) #a=str.scan(/\S+{2,}/) + str=a.uniq.sort.join(' ') + str + end + end +end +__END__ +#+END_SRC + +* create drop import remove +** db_create.rb + +#+BEGIN_SRC ruby :tangle "../lib/sisu/db_create.rb" +# <> +module SiSU_DbCreate + require_relative 'db_columns' # db_columns.rb + class Create < SiSU_DbColumns::Columns + require_relative 'se' # se.rb + @@dl=nil + def initialize(opt,conn,file,sql_type=:pg) + @opt,@conn,@file,@sql_type=opt,conn,file,sql_type + @cX=SiSU_Screen::Ansi.new(@opt.act[:color_state][:set]).cX + @comment=(@sql_type==:pg) \ + ? (SiSU_DbCreate::Comment.new(@conn,@sql_type)) + : nil + @@dl ||=SiSU_Env::InfoEnv.new.digest.length + end + def available + DBI.available_drivers.each do |driver| + puts "Driver: #{driver}" + DBI.data_sources(driver).each do |dsn| + puts "\tDatasource: #{dsn}" + end + end + end + def create_db + @env=SiSU_Env::InfoEnv.new(@opt.fns) + tell=(@sql_type==:sqlite) \ + ? SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + 'Create SQLite db tables in:', + %{"#{@file}"} + ) + : SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + 'Create pgSQL db tables in:', + %{"#{Db[:name_prefix]}#{@env.path.base_markup_dir_stub}"} + ) + if (@opt.act[:verbose][:set]==:on \ + || @opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) + tell.dark_grey_title_hi + end + SiSU_Env::SystemCall.new.create_pg_db(@env.path.base_markup_dir_stub) if @sql_type==:pg #watch use of path.base_markup_dir_stub instead of stub + end + def output_dir? + dir=SiSU_Env::InfoEnv.new('') + if @opt.act[:sqlite][:set]==:on + dir.path.webserv_stub_ensure + end + end + def create_table + def conn_exec(sql) + if @sql_type==:pg + conn_exec_pg(sql) + elsif @sql_type==:sqlite + conn_exec_sqlite(sql) + end + end + def conn_exec_pg(sql) + begin + @conn.exec_params(sql) + rescue + if @conn.is_a?(NilClass) + errmsg="No pg connection (check pg dependencies)" + if @opt.act[:no_stop][:set]==:on + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). + error("#{errmsg}, proceeding without pg output (as requested)") + else + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). + error("#{errmsg}, STOPPING") + exit + end + end + end + end + def conn_exec_sqlite(sql) + begin + @conn.execute(sql) + rescue + if @conn.is_a?(NilClass) + errmsg="No sqlite3 connection (check sqlite3 dependencies)" + if @opt.act[:no_stop][:set]==:on + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). + error("#{errmsg}, proceeding without sqlite output (as requested)") + else + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). + error("#{errmsg}, STOPPING") + exit + end + end + end + end + def metadata_and_text + if (@opt.act[:verbose_plus][:set]==:on \ + or @opt.act[:maintenance][:set]==:on) + print %{ + currently using sisu_dbi module + to be populated from document files + create tables metadata_and_text + data import through ruby transfer + } + end + create_metadata_and_text=%{ + CREATE TABLE metadata_and_text ( + tid BIGINT PRIMARY KEY, + /* title */ + #{column.title.create_column} + #{column.title_main.create_column} + #{column.title_sub.create_column} + #{column.title_short.create_column} + #{column.title_edition.create_column} + #{column.title_note.create_column} + #{column.title_language.create_column} + #{column.title_language_char.create_column} + /* creator */ + #{column.creator_author.create_column} + #{column.creator_author_honorific.create_column} + #{column.creator_author_nationality.create_column} + #{column.creator_editor.create_column} + #{column.creator_contributor.create_column} + #{column.creator_illustrator.create_column} + #{column.creator_photographer.create_column} + #{column.creator_translator.create_column} + #{column.creator_prepared_by.create_column} + #{column.creator_digitized_by.create_column} + #{column.creator_audio.create_column} + #{column.creator_video.create_column} + /* language */ + #{column.language_document.create_column} + #{column.language_document_char.create_column} + #{column.language_original.create_column} + #{column.language_original_char.create_column} + /* date */ + #{column.date_added_to_site.create_column} + #{column.date_available.create_column} + #{column.date_created.create_column} + #{column.date_issued.create_column} + #{column.date_modified.create_column} + #{column.date_published.create_column} + #{column.date_valid.create_column} + #{column.date_translated.create_column} + #{column.date_original_publication.create_column} + #{column.date_generated.create_column} + /* publisher */ + #{column.publisher.create_column} + /* original */ + #{column.original_publisher.create_column} + #{column.original_language.create_column} + #{column.original_language_char.create_column} + #{column.original_source.create_column} + #{column.original_institution.create_column} + #{column.original_nationality.create_column} + /* rights */ + #{column.rights_all.create_column} + #{column.rights_copyright_text.create_column} + #{column.rights_copyright_translation.create_column} + #{column.rights_copyright_illustrations.create_column} + #{column.rights_copyright_photographs.create_column} + #{column.rights_copyright_preparation.create_column} + #{column.rights_copyright_digitization.create_column} + #{column.rights_copyright_audio.create_column} + #{column.rights_copyright_video.create_column} + #{column.rights_license.create_column} + /* classify */ + #{column.classify_topic_register.create_column} + #{column.classify_subject.create_column} + #{column.classify_loc.create_column} + #{column.classify_dewey.create_column} + #{column.classify_keywords.create_column} + /* identifier */ + #{column.identifier_oclc.create_column} + #{column.identifier_isbn.create_column} + /* notes */ + #{column.notes_abstract.create_column} + #{column.notes_description.create_column} + #{column.notes_comment.create_column} + #{column.notes_history.create_column} + #{column.notes_coverage.create_column} + #{column.notes_relation.create_column} + /* column.notes_source.create_column */ + #{column.notes_type.create_column} + #{column.notes_format.create_column} + #{column.notes_prefix.create_column} + #{column.notes_prefix_a.create_column} + #{column.notes_prefix_b.create_column} + #{column.notes_suffix.create_column} + /* src */ + #{column.src_filename.create_column} + #{column.src_fingerprint.create_column} + #{column.src_filesize.create_column} + #{column.src_word_count.create_column} + #{column.src_txt.create_column} + /* misc */ + #{column.fulltext.create_column} + #{column.links.create_column.gsub(/,$/,'')} +/* subj VARCHAR(64) NULL, */ +/* contact VARCHAR(100) NULL, */ +/* information VARCHAR(100) NULL, */ +/* types CHAR(1) NULL, */ +/* writing_focus_nationality VARCHAR(100) NULL, */ + ); + } + conn_exec(create_metadata_and_text) + @comment.psql.metadata_and_text if @comment + end + def doc_objects # create doc_objects base + if (@opt.act[:verbose_plus][:set]==:on \ + or @opt.act[:maintenance][:set]==:on) + print %{ + to be populated from documents files + create tables doc_objects + data import through ruby transfer + } + end + create_doc_objects=%{ + CREATE TABLE doc_objects ( + lid BIGINT PRIMARY KEY, + metadata_tid BIGINT REFERENCES metadata_and_text, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + clean TEXT NULL, + body TEXT NULL, + book_idx TEXT NULL, + seg VARCHAR(256) NULL, + lev_an VARCHAR(1), + lev SMALLINT NULL, + lev0 SMALLINT, + lev1 SMALLINT, + lev2 SMALLINT, + lev3 SMALLINT, + lev4 SMALLINT, + lev5 SMALLINT, + lev6 SMALLINT, + lev7 SMALLINT, + en_a SMALLINT NULL, + en_z SMALLINT NULL, + en_a_asterisk SMALLINT NULL, + en_z_asterisk SMALLINT NULL, + en_a_plus SMALLINT NULL, + en_z_plus SMALLINT NULL, + t_of VARCHAR(16), + t_is VARCHAR(16), + node VARCHAR(16) NULL, + parent VARCHAR(16) NULL, + digest_clean CHAR(#{@@dl}), + digest_all CHAR(#{@@dl}), + types CHAR(1) NULL + ); + } + conn_exec(create_doc_objects) + @comment.psql.doc_objects if @comment + end + def endnotes + if (@opt.act[:verbose_plus][:set]==:on \ + or @opt.act[:maintenance][:set]==:on) + print %{ + to be populated from document files + create tables endnotes + data import through ruby transfer + } + end + create_endnotes=%{ + CREATE TABLE endnotes ( + nid BIGINT PRIMARY KEY, + document_lid BIGINT REFERENCES doc_objects, + nr SMALLINT, + clean TEXT NULL, + body TEXT NULL, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + digest_clean CHAR(#{@@dl}), + metadata_tid BIGINT REFERENCES metadata_and_text + ); + } + conn_exec(create_endnotes) + @comment.psql.endnotes if @comment + end + def endnotes_asterisk + if (@opt.act[:verbose_plus][:set]==:on \ + or @opt.act[:maintenance][:set]==:on) + print %{ + to be populated from document files + create tables endnotes_asterisk + data import through ruby transfer + } + end + create_endnotes_asterisk=%{ + CREATE TABLE endnotes_asterisk ( + nid BIGINT PRIMARY KEY, + document_lid BIGINT REFERENCES doc_objects, + nr SMALLINT, + clean TEXT NULL, + body TEXT NULL, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + digest_clean CHAR(#{@@dl}), + metadata_tid BIGINT REFERENCES metadata_and_text + ); + } + conn_exec(create_endnotes_asterisk) + @comment.psql.endnotes_asterisk if @comment + end + def endnotes_plus + if (@opt.act[:verbose_plus][:set]==:on \ + or @opt.act[:maintenance][:set]==:on) + print %{ + to be populated from document files + create tables endnotes_plus + data import through ruby transfer + } + end + create_endnotes_plus=%{ + CREATE TABLE endnotes_plus ( + nid BIGINT PRIMARY KEY, + document_lid BIGINT REFERENCES doc_objects, + nr SMALLINT, + clean TEXT NULL, + body TEXT NULL, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + digest_clean CHAR(#{@@dl}), + metadata_tid BIGINT REFERENCES metadata_and_text + ); + } + conn_exec(create_endnotes_plus) + @comment.psql.endnotes_plus if @comment + end + def urls # create doc_objects file links mapping + if (@opt.act[:verbose_plus][:set]==:on \ + or @opt.act[:maintenance][:set]==:on) + print %{ + currently using sisu_dbi module + to be populated from doc_objects files + create tables urls + data import through ruby transfer + } + end + create_urls=%{ + CREATE TABLE urls ( + metadata_tid BIGINT REFERENCES metadata_and_text, + plaintext varchar(512), + html_toc varchar(512), + html_doc varchar(512), + xhtml varchar(512), + xml_sax varchar(512), + xml_dom varchar(512), + odf varchar(512), + pdf_p varchar(512), + pdf_l varchar(512), + concordance varchar(512), + latex_p varchar(512), + latex_l varchar(512), + digest varchar(512), + manifest varchar(512), + markup varchar(512), + sisupod varchar(512) + ); + } + conn_exec(create_urls) + @comment.psql.urls if @comment + end + self + end + end + class Comment < SiSU_DbColumns::Columns + def initialize(conn,sql_type=:pg) + @conn=conn + if sql_type == :pg then psql + end + end + def psql + def conn_execute_array(sql_arr) + @conn.transaction do |conn| + sql_arr.each do |sql| + conn.exec_params(sql) + end + end + end + def metadata_and_text + sql_arr=[ + %{COMMENT ON Table metadata_and_text + IS 'contains SiSU metadata and fulltext for search (including source .sst if shared)';}, + %{COMMENT ON COLUMN metadata_and_text.tid + IS 'unique';}, + %{#{column.title.column_comment}}, + %{#{column.title_main.column_comment}}, + %{#{column.title_sub.column_comment}}, + %{#{column.title_short.column_comment}}, + %{#{column.title_edition.column_comment}}, + %{#{column.title_note.column_comment}}, + %{#{column.title_language.column_comment}}, + %{#{column.title_language_char.column_comment}}, + %{#{column.creator_author.column_comment}}, + %{#{column.creator_author_honorific.column_comment}}, + %{#{column.creator_author_nationality.column_comment}}, + %{#{column.creator_editor.column_comment}}, + %{#{column.creator_contributor.column_comment}}, + %{#{column.creator_illustrator.column_comment}}, + %{#{column.creator_photographer.column_comment}}, + %{#{column.creator_translator.column_comment}}, + %{#{column.creator_prepared_by.column_comment}}, + %{#{column.creator_digitized_by.column_comment}}, + %{#{column.creator_audio.column_comment}}, + %{#{column.creator_video.column_comment}}, + %{#{column.language_document.column_comment}}, + %{#{column.language_document_char.column_comment}}, + %{#{column.language_original.column_comment}}, + %{#{column.language_original_char.column_comment}}, + %{#{column.date_added_to_site.column_comment}}, + %{#{column.date_available.column_comment}}, + %{#{column.date_created.column_comment}}, + %{#{column.date_issued.column_comment}}, + %{#{column.date_modified.column_comment}}, + %{#{column.date_published.column_comment}}, + %{#{column.date_valid.column_comment}}, + %{#{column.date_translated.column_comment}}, + %{#{column.date_original_publication.column_comment}}, + %{#{column.date_generated.column_comment}}, + %{#{column.publisher.column_comment}}, + %{#{column.original_publisher.column_comment}}, + %{#{column.original_language.column_comment}}, + %{#{column.original_language_char.column_comment}}, + %{#{column.original_source.column_comment}}, + %{#{column.original_institution.column_comment}}, + %{#{column.original_nationality.column_comment}}, + %{#{column.rights_all.column_comment}}, + %{#{column.rights_copyright_text.column_comment}}, + %{#{column.rights_copyright_translation.column_comment}}, + %{#{column.rights_copyright_illustrations.column_comment}}, + %{#{column.rights_copyright_photographs.column_comment}}, + %{#{column.rights_copyright_preparation.column_comment}}, + %{#{column.rights_copyright_digitization.column_comment}}, + %{#{column.rights_copyright_audio.column_comment}}, + %{#{column.rights_copyright_video.column_comment}}, + %{#{column.rights_license.column_comment}}, + %{#{column.classify_topic_register.column_comment}}, + %{#{column.classify_subject.column_comment}}, + %{#{column.classify_loc.column_comment}}, + %{#{column.classify_dewey.column_comment}}, + %{#{column.classify_keywords.column_comment}}, + %{#{column.identifier_oclc.column_comment}}, + %{#{column.identifier_isbn.column_comment}}, + %{#{column.notes_abstract.column_comment}}, + %{#{column.notes_comment.column_comment}}, + %{#{column.notes_description.column_comment}}, + %{#{column.notes_history.column_comment}}, + %{#{column.notes_coverage.column_comment}}, + %{#{column.notes_relation.column_comment}}, + %{#{column.notes_type.column_comment}}, + %{#{column.notes_format.column_comment}}, + %{#{column.notes_prefix.column_comment}}, + %{#{column.notes_prefix_a.column_comment}}, + %{#{column.notes_prefix_b.column_comment}}, + %{#{column.notes_suffix.column_comment}}, + %{#{column.src_filename.column_comment}}, + %{#{column.src_fingerprint.column_comment}}, + %{#{column.src_filesize.column_comment}}, + %{#{column.src_word_count.column_comment}}, + %{#{column.src_txt.column_comment}}, + %{#{column.fulltext.column_comment}}, + %{#{column.links.column_comment}}, + ] + conn_execute_array(sql_arr) + end + def doc_objects + sql_arr=[ + %{COMMENT ON Table doc_objects + IS 'contains searchable text of SiSU document objects';}, + %{COMMENT ON COLUMN doc_objects.lid + IS 'unique';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text';}, + %{COMMENT ON COLUMN doc_objects.lev_an + IS 'doc level A-D 1-4';}, + %{COMMENT ON COLUMN doc_objects.lev + IS 'doc level 0-7 \d\~';}, + %{COMMENT ON COLUMN doc_objects.seg + IS 'segment name from level number 4 (lv 1)';}, + %{COMMENT ON COLUMN doc_objects.ocn + IS 'object citation number';}, + %{COMMENT ON COLUMN doc_objects.en_a + IS 'first endnote number in text object (eg. NULL or 34) (used with en_z to create range)';}, + %{COMMENT ON COLUMN doc_objects.en_z + IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a to create range)';}, + %{COMMENT ON COLUMN doc_objects.en_a_asterisk + IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_asterisk to create range)';}, + %{COMMENT ON COLUMN doc_objects.en_z_asterisk + IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_asterisk to create range)';}, + %{COMMENT ON COLUMN doc_objects.en_a_plus + IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_plus to create range)';}, + %{COMMENT ON COLUMN doc_objects.en_z_plus + IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_plus to create range)';}, + %{COMMENT ON COLUMN doc_objects.types + IS 'document types seg scroll';}, + %{COMMENT ON COLUMN doc_objects.clean + IS 'text object - substantive text: clean, stripped of markup';}, + %{COMMENT ON COLUMN doc_objects.body + IS 'text object - substantive text: light html markup';}, + %{COMMENT ON COLUMN doc_objects.book_idx + IS 'book index creation information for paragraph, if provided';}, + %{COMMENT ON COLUMN doc_objects.lev0 + IS 'document structure, level number 0';}, + %{COMMENT ON COLUMN doc_objects.lev1 + IS 'document structure, level number 1';}, + %{COMMENT ON COLUMN doc_objects.lev2 + IS 'document structure, level number 2';}, + %{COMMENT ON COLUMN doc_objects.lev3 + IS 'document structure, level number 3';}, + %{COMMENT ON COLUMN doc_objects.lev4 + IS 'document structure, level number 4';}, + %{COMMENT ON COLUMN doc_objects.lev5 + IS 'document structure, level number 5';}, + %{COMMENT ON COLUMN doc_objects.lev6 + IS 'document structure, level number 6';}, + %{COMMENT ON COLUMN doc_objects.lev7 + IS 'document structure, level number 7';}, + %{COMMENT ON COLUMN doc_objects.t_of + IS 'document structure, type of object (object is of)';}, + %{COMMENT ON COLUMN doc_objects.t_is + IS 'document structure, object is';}, + %{COMMENT ON COLUMN doc_objects.node + IS 'document structure, object node if heading';}, + %{COMMENT ON COLUMN doc_objects.parent + IS 'document structure, object parent (is a heading)';} + ] + conn_execute_array(sql_arr) + end + def endnotes + sql_arr=[ + %{COMMENT ON Table endnotes + IS 'contains searchable text of SiSU documents endnotes';}, + %{COMMENT ON COLUMN endnotes.nid + IS 'unique';}, + %{COMMENT ON COLUMN endnotes.document_lid + IS 'ties to text block from which referenced';}, + %{COMMENT ON COLUMN endnotes.nr + IS 'endnote number ';}, + %{COMMENT ON COLUMN endnotes.clean + IS 'endnote substantive content, stripped of markup';}, + %{COMMENT ON COLUMN endnotes.body + IS 'endnote substantive content';}, + %{COMMENT ON COLUMN endnotes.ocn + IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document';} + ] + conn_execute_array(sql_arr) + end + def endnotes_asterisk + sql_arr=[ + %{COMMENT ON Table endnotes_asterisk + IS 'contains searchable text of SiSU documents endnotes marked with asterisk';}, + %{COMMENT ON COLUMN endnotes_asterisk.nid + IS 'unique';}, + %{COMMENT ON COLUMN endnotes_asterisk.document_lid + IS 'ties to text block from which referenced';}, + %{COMMENT ON COLUMN endnotes_asterisk.nr + IS 'endnote number ';}, + %{COMMENT ON COLUMN endnotes_asterisk.clean + IS 'endnote substantive content, stripped of markup';}, + %{COMMENT ON COLUMN endnotes_asterisk.body + IS 'endnote substantive content';}, + %{COMMENT ON COLUMN endnotes_asterisk.ocn + IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document';} + ] + conn_execute_array(sql_arr) + end + def endnotes_plus + sql_arr=[ + %{COMMENT ON Table endnotes_plus + IS 'contains searchable text of SiSU documents endnotes marked with plus';}, + %{COMMENT ON COLUMN endnotes_plus.nid + IS 'unique';}, + %{COMMENT ON COLUMN endnotes_plus.document_lid + IS 'ties to text block from which referenced';}, + %{COMMENT ON COLUMN endnotes_plus.nr + IS 'endnote number ';}, + %{COMMENT ON COLUMN endnotes_plus.clean + IS 'endnote substantive content, stripped of markup';}, + %{COMMENT ON COLUMN endnotes_plus.body + IS 'endnote substantive content';}, + %{COMMENT ON COLUMN endnotes_plus.ocn + IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document';}, + ] + conn_execute_array(sql_arr) + end + def urls + sql_arr=[ + %{COMMENT ON Table urls + IS 'contains base url links to different SiSU output';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document, the mapping of rows is one to one';}, + %{COMMENT ON COLUMN urls.plaintext + IS 'plaintext utf-8';}, + %{COMMENT ON COLUMN urls.html_toc + IS 'table of contents for segmented html document';}, + %{COMMENT ON COLUMN urls.html_doc + IS 'html document (scroll)';}, + %{COMMENT ON COLUMN urls.xhtml + IS 'xhtml document (scroll)';}, + %{COMMENT ON COLUMN urls.xml_sax + IS 'xml sax oriented document (scroll)';}, + %{COMMENT ON COLUMN urls.xml_dom + IS 'xml dom oriented document (scroll)';}, + %{COMMENT ON COLUMN urls.odf + IS 'opendocument format text';}, + %{COMMENT ON COLUMN urls.pdf_p + IS 'pdf portrait';}, + %{COMMENT ON COLUMN urls.pdf_l + IS 'pdf landscape';}, + %{COMMENT ON COLUMN urls.concordance + IS 'rudimentary document index linked to html';}, + %{COMMENT ON COLUMN urls.latex_p + IS 'latex portrait';}, + %{COMMENT ON COLUMN urls.latex_l + IS 'latex_landscape';}, + %{COMMENT ON COLUMN urls.markup + IS 'markup';}, + %{COMMENT ON COLUMN urls.sisupod + IS 'SiSU document format .tgz (all SiSU information on document)';}, + ] + conn_execute_array(sql_arr) + end + self + end + end +end +__END__ +#+END_SRC + +** db_drop.rb + +#+BEGIN_SRC ruby :tangle "../lib/sisu/db_drop.rb" +# <> +module SiSU_DbDrop + require_relative 'utils_response' # utils_response.rb + class Drop + include SiSU_Response + def initialize(opt,conn,db_info,sql_type) + @opt,@conn,@db_info,@sql_type=opt,conn,db_info,sql_type + case @sql_type + when :sqlite + cascade='' + else + cascade='CASCADE' + end + @drop_table=[ + "DROP TABLE metadata_and_text #{cascade};", + "DROP TABLE doc_objects #{cascade};", + "DROP TABLE urls #{cascade};", + "DROP TABLE endnotes #{cascade};", + "DROP TABLE endnotes_asterisk #{cascade};", + "DROP TABLE endnotes_plus #{cascade};", + ] + end + def drop + def tables #% drop all tables + begin + msg_sqlite="as not all disk space is recovered after dropping the database << #{@db_info.sqlite.db} >>, you may be better off deleting the file, and recreating it as necessary" + case @sql_type + when :sqlite + puts msg_sqlite + ans=response?('remove sql database?') + if ans \ + and File.exist?(@db_info.sqlite.db) + @conn.close + File.unlink(@db_info.sqlite.db) + db=SiSU_Env::InfoDb.new + conn=db.sqlite.conn_sqlite3 + sdb=SiSU_DbDBI::Create.new(@opt,conn,@db_info,@sql_type) + sdb_index=SiSU_DbDBI::Index.new(@opt,conn,@db_info,@sql_type) + sdb.output_dir? + begin + sdb.create_db + sdb.create_table.metadata_and_text + sdb.create_table.doc_objects + sdb.create_table.endnotes + sdb.create_table.endnotes_asterisk + sdb.create_table.endnotes_plus + sdb.create_table.urls + sdb_index.create_indexes + rescue + SiSU_Errors::Rescued.new($!,$@,'-D').location do + __LINE__.to_s + ':' + __FILE__ + end + sdb.output_dir? + end + exit + else + @conn.transaction + @drop_table.each do |d| + begin + @conn.exec_params(d) + rescue + next + end + end + @conn.commit + end + when :pg + @conn.transaction + @drop_table.each do |d| + begin + @conn.exec_params(d) + rescue + next + end + end + @conn.commit + end + rescue + case @sql_type + when :sqlite + ans=response?('remove sql database?') + if ans and File.exist?(@db_info.sqlite.db); File.unlink(@db_info.sqlite.db) + end + else + @drop_table.each do |d| + begin + @conn.exec_params(d) + rescue + next + end + end + end + ensure + end + end + def indexes + def conn_execute_array(sql_arr) + @conn.transaction do |conn| + sql_arr.each do |sql| + begin + conn.exec_params(sql) + rescue + next + end + end + end + end + def base #% drop base indexes + print "\n drop documents common indexes\n" unless @opt.act[:quiet][:set]==:on + sql_arr=[ + %{DROP INDEX idx_title;}, + %{DROP INDEX idx_author;}, + %{DROP INDEX idx_filename;}, + %{DROP INDEX idx_topics;}, + %{DROP INDEX idx_ocn;}, + %{DROP INDEX idx_digest_clean;}, + %{DROP INDEX idx_digest_all;}, + %{DROP INDEX idx_lev0;}, + %{DROP INDEX idx_lev1;}, + %{DROP INDEX idx_lev2;}, + %{DROP INDEX idx_lev3;}, + %{DROP INDEX idx_lev4;}, + %{DROP INDEX idx_lev5;}, + %{DROP INDEX idx_lev6;}, + %{DROP INDEX idx_endnote_nr;}, + %{DROP INDEX idx_digest_en;}, + %{DROP INDEX idx_endnote_nr_asterisk;}, + %{DROP INDEX idx_endnote_asterisk;}, + %{DROP INDEX idx_digest_en_asterisk;}, + %{DROP INDEX idx_endnote_nr_plus;}, + %{DROP INDEX idx_endnote_plus;}, + %{DROP INDEX idx_digest_en_plus}, + ] + conn_execute_array(sql_arr) + end + def text #% drop TEXT indexes, sqlite + print "\n drop documents TEXT indexes\n" unless @opt.act[:quiet][:set]==:on + sql_arr=[ + %{DROP INDEX idx_clean;}, + %{DROP INDEX idx_endnote}, + ] + conn_execute_array(sql_arr) + end + self + end + indexes.base + @opt.act[:psql][:set]==:on ? '' : indexes.text + self + end + end +end +__END__ +#+END_SRC + +** db_import.rb + +#+BEGIN_SRC ruby :tangle "../lib/sisu/db_import.rb" +# <> +module SiSU_DbImport + require_relative 'db_columns' # db_columns.rb + require_relative 'db_load_tuple' # db_load_tuple.rb + require_relative 'db_sqltxt' # db_sqltxt.rb + require_relative 'html_lite_shared' # html_lite_shared.rb + class Import < SiSU_DbText::Prepare + include SiSU_Param + include SiSU_Screen + include SiSU_DbAction + @@dl=nil + @@hname=nil + attr_accessor :tp + def initialize(opt,conn,file_maint,sql_type=:pg) + @opt,@conn,@file_maint,@sql_type=opt,conn,file_maint,sql_type + @cX=SiSU_Screen::Ansi.new(@opt.act[:color_state][:set]).cX + @env=SiSU_Env::InfoEnv.new(@opt.fns) + @dal="#{@env.processing_path.ao}" + @fnb=if @opt.fns.empty? \ + or @opt.selections.str.empty? + '' + else + @md=SiSU_Param::Parameters.new(@opt).get + @md.fnb + end + @fnc="#{@dal}/#{@opt.fns}.content.rbm" + @@seg,@@seg_full='','' #create? consider placing field just before clean text as opposed to seg which contains seg(.html) name info seg_full would contain seg info for levels 5 & 6 where available eg seg_full may be 7.3 (level 5) and 7.3.1 (level 6) where seg is 7 + @col=Hash.new('') + @col[:ocn]='' + @counter={} + @db=SiSU_Env::InfoDb.new + if @sql_type==:sqlite + @driver_sqlite3= + (@conn.inspect.match(/^(.{10})/)[1] \ + == @db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) \ + ? true + : false + end + sql='SELECT MAX(lid) FROM doc_objects' + begin + @col[:lid] ||=0 + @col[:lid]=@driver_sqlite3 \ + ? @conn.execute( sql ).join.to_i + : @conn.exec( sql ).getvalue(0,0).to_i + rescue + if @opt.act[:maintenance][:set]==:on + puts "#{__FILE__}:#{__LINE__}" + end + end + @col[:lid]=0 if @col[:lid].nil? or @col[:lid].to_s.empty? + sql='SELECT MAX(nid) FROM endnotes' + begin + @id_n=@driver_sqlite3 \ + ? @conn.execute( sql ).join.to_i + : @conn.exec( sql ).getvalue(0,0).to_i + @id_n ||=0 + rescue + if @opt.act[:maintenance][:set]==:on + puts "#{__FILE__}:#{__LINE__}" + end + end + @id_n =0 if @col[:lid].nil? or @col[:lid].to_s.empty? + @col[:lv0]=@col[:lv1]=@col[:lv2]=@col[:lv3]=@col[:lv4]=@col[:lv5]=@col[:lv6]=@col[:lv7]=0 + @db=SiSU_Env::InfoDb.new + @pdf_fn=SiSU_Env::FileOp.new(@md).base_filename + @@dl ||=SiSU_Env::InfoEnv.new.digest.length + end + def marshal_load + require_relative 'ao' # ao.rb + @ao_array=SiSU_AO::Source.new(@opt).get # ao file drawn here + if (@opt.act[:verbose][:set]==:on \ + || @opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) + SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + "#{@db.psql.db}::#{@opt.fns}" + ).puts_blue + end + SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + 'Marshal Load', + @fnc + ).puts_grey if @opt.act[:verbose][:set]==:on + select_first_match=%{ + SELECT metadata_and_text.tid + FROM metadata_and_text + WHERE metadata_and_text.src_filename = '#{@md.fns}' + AND metadata_and_text.language_document_char = '#{@opt.lng}' + ;} # note, for .ssm: @md.fns (is set during runtime & is) != @opt.fns @md.opt.fns + file_exist=if @sql_type==:sqlite + begin + @conn.get_first_value(select_first_match) + rescue SQLite3::Exception => e + # not tested + puts "Exception occurred" + puts e + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:yellow).mark( + "\n" \ + + 'Attempting to initialize db' + "\n" \ + + 'Creating db tables' + ) + sdb={ + create: SiSU_DbDBI::Create.new(@opt,@conn,@file_maint,@sql_type), + index: SiSU_DbDBI::Index.new(@opt,@conn,@file_maint,@sql_type), + } + db_action(sdb).create + end + else + begin + @conn.exec(select_first_match).field_values("tid")[0] + rescue PG::Error => e + err=[ + e.result.error_field( PG::Result::PG_DIAG_SEVERITY ), + e.result.error_field( PG::Result::PG_DIAG_SQLSTATE ), + e.result.error_field( PG::Result::PG_DIAG_MESSAGE_PRIMARY ), + e.result.error_field( PG::Result::PG_DIAG_MESSAGE_DETAIL ), + e.result.error_field( PG::Result::PG_DIAG_MESSAGE_HINT ), + e.result.error_field( PG::Result::PG_DIAG_STATEMENT_POSITION ), + e.result.error_field( PG::Result::PG_DIAG_INTERNAL_POSITION ), + e.result.error_field( PG::Result::PG_DIAG_INTERNAL_QUERY ), + e.result.error_field( PG::Result::PG_DIAG_CONTEXT ), + e.result.error_field( PG::Result::PG_DIAG_SOURCE_FILE ), + e.result.error_field( PG::Result::PG_DIAG_SOURCE_LINE ), + e.result.error_field( PG::Result::PG_DIAG_SOURCE_FUNCTION ), + ] + p err + if err[2] =~/relation "\S+?" does not exist/ \ + or err.inspect =~/relation "\S+?" does not exist/ + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:yellow).mark( + "\n" \ + + err[2] + "\n" \ + + 'Attempting to initialize db' + "\n" \ + + 'Creating db tables' + ) + sdb={ + create: SiSU_DbDBI::Create.new(@opt,@conn,@file_maint,@sql_type), + index: SiSU_DbDBI::Index.new(@opt,@conn,@file_maint,@sql_type), + } + db_action(sdb).create + retry + end + end + end + if not file_exist + t_d=[] # transaction_data + t_d << db_import_metadata + t_d << db_import_documents(@ao_array) + t_d << db_import_urls(@ao_array,@fnc) #import OID on/off + t_d=t_d.flatten + if (@opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) + puts @conn.class if defined? @conn.class + puts @conn.driver_name if defined? @conn.driver_name + puts @conn.driver if defined? @conn.driver + end + begin #% sql + if @sql_type==:sqlite + @conn.transaction do |conn| + t_d.each do |sql| + conn.execute(sql) + end + end + #also 'execute' works for sqlite + #@conn.execute("BEGIN") + # t_d.each do |sql| + # @conn.execute(sql) + # end + #@conn.execute("COMMIT") + else + #'do' works for postgresql + @conn.exec("BEGIN") + t_d.each do |sql| + @conn.exec(sql) + end + @conn.exec("COMMIT") + end + rescue + SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do + __LINE__.to_s + ':' + __FILE__ + end + sqlfn="#{@env.processing_path.sql}/#{@md.fnb}.sql" + sql=File.new(sqlfn,'w') + t_d.each {|i| sql.puts i} + p sqlfn + if @opt.act[:maintenance][:set]==:on + puts sql + p @conn.methods.sort + puts "#{__FILE__}:#{__LINE__}" + end + ensure + end + else + if file_exist + @db=SiSU_Env::InfoDb.new + puts "\nfile #{@opt.fns} in language code #{@opt.lng} already exists in database #{@db.psql.db}, use --update instead?" + end + end + end + def pf_db_import_transaction_open + end + def pf_db_import_transaction_close + end + def book_idx_hash_to_str(book_idx) + book_idx=book_idx ? book_idx : '' + book_idx_str,book_subidx_part='','' + if not book_idx.empty? + book_idx_str='' + book_idx.each_pair do |k0,v0| + book_idx_str << %{#{k0}+#{v0[:plus]}} + book_subidx_part='' + if v0[:sub].length > 0 + v0[:sub].each do |subterms| + subterms.each_pair do |k1,v1| + book_subidx_part << %{\n #{k1}+#{v1[:plus]} | } + end + end + book_idx_str=book_idx_str + ':' + book_subidx_part + end + end + end + book_idx_str + end + def db_import_metadata #% import documents - populate database + if (@opt.act[:verbose][:set]==:on \ + || @opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) + print %{ #{@cX.grey}import documents dbi_unit #{@cX.off} } + end + @tp={} + @md=SiSU_Param::Parameters.new(@opt).get +#% sisutxt & fulltxt + if FileTest.exist?(@md.fns) + txt_arr=IO.readlines(@md.fns,'') + src=txt_arr.join("\n") + src=special_character_escape(src) + @tp[:sisutxt_f],@tp[:sisutxt_i]='sisutxt, ',"'#{src}', " + txt=clean_searchable_text_from_document_source(txt_arr) + #txt=special_character_escape(txt) + @tp[:fulltxt_f],@tp[:fulltxt_i]='fulltxt, ',"'#{txt}', " + end +#% title + if defined? @md.title.full \ + and @md.title.full=~/\S+/ # DublinCore 1 - title + #@tp[:title]=@md.title.full + #special_character_escape(@tp[:title]) + #@tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', " + sql='SELECT MAX(tid) FROM metadata_and_text;' + begin + @@id_t ||=0 + id_t=@driver_sqlite3 \ + ? @conn.execute( sql ).join.to_i # { |x| id_t=x.join.to_i } + : @conn.exec( sql ).getvalue(0,0).to_i + @@id_t=id_t if id_t + rescue + if @opt.act[:maintenance][:set]==:on + puts "#{__FILE__} #{__LINE__}" + end + end + @@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks @title: + if (@opt.act[:verbose][:set]==:on \ + || @opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) + puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}} + end + end + ################ CLEAR ############## + SiSU_DbDBI::Test.new(self,@opt).verify #% import title names, filenames (tuple) + t=SiSU_DbTuple::LoadMetadata.new(@conn,@@id_t,@md,@file_maint) + tuple=t.tuple + tuple + end + def db_import_documents(ao_array) #% import documents - populate main database table, import into substantive database tables (tuple) + begin + @col[:tid]=@@id_t + @en,@en_ast,@en_pls,@tuple_array=[],[],[],[] + @col[:en_a],@col[:en_z]=nil,nil + ao_array.each do |data| + data.obj=data.obj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1'). + gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1'). + gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1'). + gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1'). + gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1'). + gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1'). + gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1'). + gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1'). + gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1'). + gsub(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 '). + gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check + @col[:seg]=@@seg + if data.of ==:para \ + || data.of ==:heading \ + || data.of ==:heading_insert \ + || data.of ==:block \ + || data.of ==:group # regular text what of code-blocks grouped text etc. + notedata=data.obj.dup + #% :headings + if data.is==:heading \ + && (data.ln.inspect=~/[0-3]/) + ( + @col[:lev], + txt,@col[:ocn], + @col[:lev_an], + @col[:ocnd],@col[:ocns], + @col[:t_of],@col[:t_is], + @col[:node],@col[:parent], + @col[:digest_clean],@col[:digest_all]= + data.ln, + data.obj,data.ocn, + data.lv, + data.odv,data.osp, + data.of,data.is, + data.node,data.parent, + '','' + ) + @col[:lid]+=1 + txt=endnotes(txt).extract_any + body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_minus + @col[:body]=clean_document_objects_body(body) + plaintext=@col[:body].dup + plaintext=strip_markup(plaintext) + @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext) + book_idx=book_idx_hash_to_str(data.idx) + @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx) + if @en[0] then @en_a,@en_z=@en[0].first,@en[0].last + end + if @en_ast[0] then @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last + end + if @en_pls[0] then @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last + end + t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) + @tuple_array << t.tuple + case @col[:lev] + when /0/ then @col[:lv0]+=1 + when /1/ then @col[:lv1]+=1 + when /2/ then @col[:lv2]+=1 + when /3/ then @col[:lv3]+=1 + when /4/ then @col[:lv4]+=1 + end + @col[:lev]=@col[:plaintext]=@col[:body]='' + elsif data.is==:heading \ + && data.ln==4 + ( + @@seg,txt, + @col[:ocn],@col[:lev_an], + @col[:ocnd],@col[:ocns], + @col[:t_of],@col[:t_is], + @col[:node],@col[:parent], + @col[:digest_clean],@col[:digest_all]= + data.name,data.obj, + data.ocn,data.lv, + data.odv,data.osp, + data.of,data.is, + data.node,data.parent, + '','' + ) + @col[:seg]=@@seg + @col[:lv4]+=1 + @col[:lid]+=1 + @col[:lev]=4 + @hname=if @col[:seg] \ + and not @col[:seg].to_s.empty? + @@hname=@col[:seg].to_s + else @@hname + end + @env=SiSU_Env::InfoEnv.new(@md.fns) + @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" + txt=endnotes(txt).extract_any + body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus + @col[:body]=clean_document_objects_body(body) + plaintext=@col[:body].dup + plaintext=strip_markup(plaintext) + @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext) + book_idx=book_idx_hash_to_str(data.idx) + @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx) + @en_a,@en_z=@en[0].first,@en[0].last if @en[0] + @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] + @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] + t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) + @tuple_array << t.tuple + @col[:lev]=@col[:plaintext]=@col[:body]='' + elsif data.is==:heading \ + && data.ln==5 + ( + txt, + @col[:ocn],@col[:lev_an], + @col[:ocnd],@col[:ocns], + @col[:t_of],@col[:t_is], + @col[:node],@col[:parent], + @col[:digest_clean],@col[:digest_all]= + data.obj, + data.ocn,data.lv, + data.odv,data.osp, + data.of,data.is, + data.node,data.parent, + '','' + ) + @@seg_full=data.name if data.is==:heading \ + && data.ln==5 \ + && data.name #check data.name + @@seg ||='' #nil # watch + @col[:seg]=@@seg + @col[:lv5]+=1 + @col[:lid]+=1 + @col[:lev]=5 + @hname=if @col[:seg] \ + and not @col[:seg].to_s.empty? + @@hname=@col[:seg].to_s + else @@hname + end + @env=SiSU_Env::InfoEnv.new(@md.fns) + @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" + txt=endnotes(txt).extract_any + body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus + @col[:body]=clean_document_objects_body(body) + plaintext=@col[:body].dup + plaintext=strip_markup(plaintext) + @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext) + book_idx=book_idx_hash_to_str(data.idx) + @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx) + @en_a,@en_z=@en[0].first,@en[0].last if @en[0] + @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] + @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] + t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) + @tuple_array << t.tuple + @col[:lev]=@col[:plaintext]=@col[:body]='' + elsif data.is==:heading \ + && data.ln==6 + txt, @col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]= + data.obj,data.ocn, data.lv, data.odv, data.osp, data.of, data.is, data.node, data.parent, '', '' + @@seg_full=data.name if data.is==:heading && data.ln==6 && data.name #check data.name + @@seg ||='' #nil # watch + @col[:seg]=@@seg + @col[:lv6]+=1 + @col[:lid]+=1 + @col[:lev]=6 + @hname=if @col[:seg] \ + and not @col[:seg].to_s.empty? + @@hname=@col[:seg].to_s + else @@hname + end + @env=SiSU_Env::InfoEnv.new(@md.fns) + @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" + txt=endnotes(txt).extract_any + body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus + @col[:body]=clean_document_objects_body(body) + plaintext=@col[:body].dup + plaintext=strip_markup(plaintext) + @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext) + book_idx=book_idx_hash_to_str(data.idx) + @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx) + @en_a,@en_z=@en[0].first,@en[0].last if @en[0] + @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] + @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] + t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) + @tuple_array << t.tuple + @col[:lev]=@col[:plaintext]=@col[:body]='' + elsif data.is==:heading \ + && data.ln==7 + txt, @col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]= + data.obj,data.ocn, data.lv, data.odv, data.osp, data.of, data.is, data.node, data.parent, '', '' + @@seg_full=data.name if data.is==:heading && data.ln==7 && data.name #check data.name + @@seg ||='' #nil # watch + @col[:seg]=@@seg + @col[:lv7]+=1 + @col[:lid]+=1 + @col[:lev]=7 + @hname=if @col[:seg] \ + and not @col[:seg].to_s.empty? + @@hname=@col[:seg].to_s + else @@hname + end + @env=SiSU_Env::InfoEnv.new(@md.fns) + @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" + txt=endnotes(txt).extract_any + body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus + @col[:body]=clean_document_objects_body(body) + plaintext=@col[:body].dup + plaintext=strip_markup(plaintext) + @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext) + book_idx=book_idx_hash_to_str(data.idx) + @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx) + @en_a,@en_z=@en[0].first,@en[0].last if @en[0] + @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] + @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] + t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) + @tuple_array << t.tuple + @col[:lev]=@col[:plaintext]=@col[:body]='' + #% :structure :layout :comment + elsif data.of==:structure \ + || data.of==:layout \ + || data.of==:comment + #added watch + #% : + else #% regular text + @col[:lid]+=1 + ( + txt='' + txt,@col[:ocn], + @col[:ocnd],@col[:ocns], + @col[:t_of],@col[:t_is], + @col[:node],@col[:parent], + @col[:digest_clean],@col[:digest_all], + @col[:lev]= + data.obj,data.ocn, + data.odv,data.osp, + data.of,data.is, + '',data.parent, + '','', + 9 + ) + @hname=if @col[:seg] \ + and not @col[:seg].to_s.empty? + @@hname=@col[:seg].to_s + else @@hname + end + @env=SiSU_Env::InfoEnv.new(@md.fns) + @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" + txt=endnotes(txt).extract_any + if @sql_type==:pg \ + and txt.size > (SiSU_DbColumns::ColumnSize.new.document_clean - 1) # examine pg build & remove limitation + puts "\n\nTOO LARGE (TXT - see error log)\n\n" + open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| + error.puts("\n#{@opt.fns}\nTEXT BODY\n#{@col[:body].size} object #{@col[:ocn]} -> #{@col[:body].slice(0..500)}") + end + txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} + end + @en_a,@en_z=@en[0].first,@en[0].last if @en[0] + @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] + @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] + body=if data.is==:table + SiSU_FormatShared::CSS_Format.new(@md,data).html_table + elsif data.is==:code + SiSU_FormatShared::CSS_Format.new(@md,data).code + elsif defined? data.indent \ + and defined? data.hang \ + and data.indent =~/[1-9]/ \ + and data.indent == data.hang + SiSU_FormatShared::CSS_Format.new(@md,data).indent(data.indent) + elsif defined? data.indent \ + and defined? data.hang \ + and data.hang =~/[0-9]/ \ + and data.indent != data.hang + SiSU_FormatShared::CSS_Format.new(@md,data).hang_indent(data.hang,data.indent) + else + SiSU_FormatShared::CSS_Format.new(@md,data).norm + end + @col[:body]=clean_document_objects_body(body) + plaintext=@col[:body].dup + plaintext=strip_markup(plaintext) + @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext) + book_idx=book_idx_hash_to_str(data.idx) + @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx) + t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) + @tuple_array << t.tuple + @en,@en_ast,@en_pls=[],[],[] + @col[:en_a]=@col[:en_z]=nil + @col[:lev]=@col[:plaintext]=@col[:body]=@col[:words]='' + end + if notedata =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ #% import into database endnotes tables + endnote_array=notedata.scan(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) + endnote_array.each do |inf| + if inf[/#{Mx[:en_a_o]}\d+.+?#{Mx[:en_a_c]}/] + if inf[/#{Mx[:en_a_o]}(\d+)(.+?)#{Mx[:en_a_c]}/] + nr,txt,digest_clean=$1,$2.strip,0 + end + @id_n ||=0 + @id_n+=1 + txt=special_character_escape(txt) + body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt) + txt=strip_markup(txt) + if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1) + puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" + open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| + error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") + end + txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} + end + if txt + en={ + type: 'endnotes', + id: @id_n, + lid: @col[:lid], + nr: nr, + txt: txt, + body: body, + ocn: @col[:ocn], + ocnd: @col[:ocnd], + ocns: @col[:ocns], + id_t: @@id_t, + hash: digest_clean + } + t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint) + @tuple_array << t.tuple + end + end + end + word_mode=notedata.scan(/\S+/) + end + if notedata =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ #% import into database endnotes tables + endnote_array=notedata.scan(/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) + endnote_array.each do |inf| + if inf[/#{Mx[:en_b_o]}\*\d+.+?#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 + if inf[/#{Mx[:en_b_o]}[*](\d+)(.+?)#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 + nr,txt,digest_clean=$1,$2.strip,0 + end + @id_n+=1 + txt=special_character_escape(txt) + body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt) + txt=strip_markup(txt) + if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1) + puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" + open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| + error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") + end + txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} + end + if txt + en={ + type: 'endnotes_asterisk', + id: @id_n, + lid: @col[:lid], + nr: nr, + txt: txt, + body: body, + ocn: @col[:ocn], + ocnd: @col[:ocnd], + ocns: @col[:ocns], + id_t: @@id_t, + hash: digest_clean + } + t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint) + @tuple_array << t.tuple + end + end + end + word_mode=notedata.scan(/\S+/) + end + if notedata =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ #% import into database endnotes tables + endnote_array=notedata.scan(/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) + endnote_array.each do |inf| + if inf[/#{Mx[:en_b_o]}\+\d+.+?#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 + if inf[/#{Mx[:en_b_o]}[+](\d+)(.+?)#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 + nr,txt,digest_clean=$1,$2.strip,0 + end + @id_n+=1 + txt=special_character_escape(txt) + body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt) + txt=strip_markup(txt) + if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1) + puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" + open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| + error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") + end + txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} + end + if txt + en={ + type: 'endnotes_plus', + id: @id_n, + lid: @col[:lid], + nr: nr, + txt: txt, + body: body, + ocn: @col[:ocn], + ocnd: @col[:ocnd], + ocns: @col[:ocns], + id_t: @@id_t, + hash: digest_clean + } + t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint) + @tuple_array << t.tuple + end + end + end + word_mode=notedata.scan(/\S+/) + end + end + end + rescue + SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do + __LINE__.to_s + ':' + __FILE__ + end + ensure + end + @tuple_array + end + def endnotes(txt) + @txt=txt + def extract_any + if @txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ + endnotes(@txt).range + @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ + @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ + @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ + @txt=endnotes(@txt).clean_text + end + @txt + end + def standard + (@txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) \ + ? @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) + : nil + end + def asterisk + (@txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) \ + ? @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/) + : nil + end + def plus + (@txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) \ + ? @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/) + : nil + end + def clean_text(base_url=nil) + @txt=if base_url + @txt.gsub(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,%{\\1}). + gsub(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,%{\\1}). + gsub(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,%{\\1}) + else + @txt.gsub(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,'\1'). + gsub(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,'\1'). + gsub(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,'\1') + end + @txt + end + def range + @col[:en_a]=@col[:en_z]=nil + if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}|#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/ + word_array=@txt.scan(/\S+/) + word_array.each do |w| + if w[/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/] # not tested since change 2003w31 + @col[:en_a]=$1 unless @col[:en_a] + @col[:en_z]=@col[:en_a].dup unless @col[:en_a] + @col[:en_z]=$1 if @col[:en_a] + end + end + end + @col + end + self + end + def db_import_urls(dbi_unit,content) #% import documents OID - populate database + begin + @fnc=content + @env=SiSU_Env::InfoEnv.new(@opt.fns) + f,u={},{} + if @fnb.empty? \ + or @fnb.nil? + p 'file output path error' #remove + end + if FileTest.file?("#{@md.file.output_path.txt.dir}/#{@md.file.base_filename.txt}")==true + f[:txt],u[:txt]='plaintext,', "'#{@md.file.output_path.txt.url}/#{@md.file.base_filename.txt}'," + end + if FileTest.file?("#{@md.file.output_path.html_seg.dir}/#{@md.file.base_filename.html_segtoc}")==true + f[:html_toc],u[:html_toc]='html_toc,', "'#{@md.file.output_path.html_seg.url}/#{@md.file.base_filename.html_segtoc}'," + end + if FileTest.file?("#{@md.file.output_path.html_scroll.dir}/#{@md.file.base_filename.html_scroll}")==true + f[:html_doc],u[:html_doc]='html_doc,', "'#{@md.file.output_path.html_scroll.url}/#{@md.file.base_filename.html_scroll}'," + end + if FileTest.file?("#{@md.file.output_path.xhtml.dir}/#{@md.file.base_filename.xhtml}")==true + f[:xhtml],u[:xhtml]='xhtml,', "'#{@md.file.output_path.xhtml.url}/#{@md.file.base_filename.xhtml}'," + end + if FileTest.file?("#{@md.file.output_path.xml_sax.dir}/#{@md.file.base_filename.xml_sax}")==true + f[:xml_sax],u[:xml_sax]='xml_sax,', "'#{@md.file.output_path.xml_sax.url}/#{@md.file.base_filename.xml_sax}'," + end + if FileTest.file?("#{@md.file.output_path.xml_dom.dir}/#{@md.file.base_filename.xml_dom}")==true + f[:xml_dom],u[:xml_dom]='xml_dom,', "'#{@md.file.output_path.xml_dom.url}/#{@md.file.base_filename.xml_dom}'," + end + if FileTest.file?("#{@md.file.output_path.epub.dir}/#{@md.file.base_filename.epub}")==true + f[:epub],u[:epub]='epub,', "'#{@md.file.output_path.epub.url}/#{@md.file.base_filename.epub}'," + end + if FileTest.file?("#{@md.file.output_path.odt.dir}/#{@md.file.base_filename.odt}")==true + f[:odf],u[:odf]='odf,', "'#{@md.file.output_path.odt.url}/#{@md.file.base_filename.odt}'," + end + if FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_p_a4}")==true #\ + #or FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_p_letter}")==true + f[:pdf_p],u[:pdf_p]='pdf_p,', "'#{@md.file.output_path.pdf.url}/#{@pdf_fn.pdf_p_a4}'," + end + if FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_l_a4}")==true #\ + #or FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_l_letter}")==true + f[:pdf_l],u[:pdf_l]='pdf_l,', "'#{@md.file.output_path.pdf.url}/#{@pdf_fn.pdf_l_a4}'," + end + if FileTest.file?("#{@md.file.output_path.html_concordance.dir}/#{@md.file.base_filename.html_concordance}")==true + f[:concordance],u[:concordance]='concordance,', "'#{@md.file.output_path.html_concordance.url}/#{@md.file.base_filename.html_concordance}'," + end + #if FileTest.file?("#{@md.file.output_path.x.dir}/#{@md.file.base_filename.x}")==true + # f[:latex_p],u[:latex_p]='latex_p,', "'#{@md.file.output_path.x.url}/#{@md.file.base_filename.x}'," + #end + ##if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.tex")==true + ## f[:latex_p],u[:latex_p]='latex_p,', "'#{base}/#{@fnb}/#{@opt.fns}.tex'," + ##end + #if FileTest.file?("#{@md.file.output_path.x.dir}/#{@md.file.base_filename.x}")==true + # f[:latex_l],u[:latex_l]='latex_l,', "'#{@md.file.output_path.x.url}/#{@md.file.base_filename.x}'," + #end + ##if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.landscape.tex")==true + ## f[:latex_l],u[:latex_l]='latex_l,', "'#{base}/#{@fnb}/#{@opt}.fns}.landscape.tex'," + ##end + if FileTest.file?("#{@md.file.output_path.digest.dir}/#{@md.file.base_filename.digest}")==true + f[:digest],u[:digest]='digest,', "'#{@md.file.output_path.digest.url}/#{@md.file.base_filename.digest}'," + end + if FileTest.file?("#{@md.file.output_path.manifest.dir}/#{@md.file.base_filename.manifest}")==true #revisit, was to be text, this is html + f[:manifest],u[:manifest]='manifest,', "'#{@md.file.output_path.manifest.url}/#{@md.file.base_filename.manifest}'," + end + if FileTest.file?("#{@md.file.output_path.src.dir}/#{@md.file.base_filename.src}")==true + f[:markup],u[:markup]='markup,', "'#{@md.file.output_path.src.url}/#{@md.file.base_filename.src}'," + end + if FileTest.file?("#{@md.file.output_path.sisupod.dir}/#{@md.file.base_filename.sisupod}")==true + f[:sisupod],u[:sisupod]='sisupod,', "'#{@md.file.output_path.sisupod.url}/#{@md.file.base_filename.sisupod}'," + end + t=SiSU_DbTuple::LoadUrls.new(@conn,f,u,@@id_t,@opt,@file_maint) + tuple=t.tuple + rescue + SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do + __LINE__.to_s + ':' + __FILE__ + end + ensure + end + tuple + end + end +end +__END__ +#+END_SRC + +** db_remove.rb + +#+BEGIN_SRC ruby :tangle "../lib/sisu/db_remove.rb" +# <> +module SiSU_DbRemove + class Remove + include SiSU_DbAction + def initialize(opt,conn,file,sql_type) + @opt,@conn,@file,@sql_type=opt,conn,file,sql_type + @md=SiSU_Param::Parameters.new(@opt).get + @fnb=@md.fnb + @db=SiSU_Env::InfoDb.new + end + def remove + driver_sqlite3=if @sql_type==:sqlite + (@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) \ + ? true + : false + end + del_id=if driver_sqlite3 + begin + remove_selected=%{ + SELECT tid + FROM metadata_and_text + WHERE src_filename = '#{@md.fns}' + AND metadata_and_text.language_document_char = '#{@opt.lng}' + ;} # note, for .ssm: @md.fns (is set during runtime & is) != @opt.fns @md.opt.fns + @conn.get_first_value(remove_selected).to_i + rescue SQLite3::Exception => e + #not tested + puts "Exception occurred" + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia).mark(e.inspect) + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:yellow).mark( + "\n" \ + + 'Attempting to initialize db' + "\n" \ + + 'Creating db tables' + ) + sdb={ + create: SiSU_DbDBI::Create.new(@opt,@conn,@file_maint,@sql_type), + index: SiSU_DbDBI::Index.new(@opt,@conn,@file_maint,@sql_type), + } + db_action(sdb).create + end + else + begin + remove_selected=%{ + SELECT metadata_and_text.tid + FROM metadata_and_text + WHERE metadata_and_text.src_filename = '#{@md.fns}' + AND metadata_and_text.language_document_char = '#{@opt.lng}' + ;} # note, for .ssm: @md.fns (is set during runtime & is) != @opt.fns @md.opt.fns + x=@conn.exec(remove_selected) + x.field_values("tid")[0] + rescue PG::Error => e + err=[ + e.result.error_field( PG::Result::PG_DIAG_SEVERITY ), + e.result.error_field( PG::Result::PG_DIAG_SQLSTATE ), + e.result.error_field( PG::Result::PG_DIAG_MESSAGE_PRIMARY ), + e.result.error_field( PG::Result::PG_DIAG_MESSAGE_DETAIL ), + e.result.error_field( PG::Result::PG_DIAG_MESSAGE_HINT ), + e.result.error_field( PG::Result::PG_DIAG_STATEMENT_POSITION ), + e.result.error_field( PG::Result::PG_DIAG_INTERNAL_POSITION ), + e.result.error_field( PG::Result::PG_DIAG_INTERNAL_QUERY ), + e.result.error_field( PG::Result::PG_DIAG_CONTEXT ), + e.result.error_field( PG::Result::PG_DIAG_SOURCE_FILE ), + e.result.error_field( PG::Result::PG_DIAG_SOURCE_LINE ), + e.result.error_field( PG::Result::PG_DIAG_SOURCE_FUNCTION ), + ] + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia).mark("\n" + err.inspect) + if err[2] =~/relation "\S+?" does not exist/ \ + or err.inspect =~/relation "\S+?" does not exist/ + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:yellow).mark( + "\n" \ + + err[2] + "\n" \ + + 'Attempting to initialize db' + "\n" \ + + 'Creating db tables' + ) + sdb={ + create: SiSU_DbDBI::Create.new(@opt,@conn,@file_maint,@sql_type), + index: SiSU_DbDBI::Index.new(@opt,@conn,@file_maint,@sql_type), + } + db_action(sdb).create + end + end + end + if del_id + sql_entry=[ + "DELETE FROM endnotes WHERE metadata_tid = '#{del_id}';", + "DELETE FROM endnotes_asterisk WHERE metadata_tid = '#{del_id}';", + "DELETE FROM endnotes_plus WHERE metadata_tid = '#{del_id}';", + "DELETE FROM doc_objects WHERE metadata_tid = '#{del_id}';", + "DELETE FROM urls WHERE metadata_tid = '#{del_id}';", + "DELETE FROM metadata_and_text WHERE metadata_and_text.tid = '#{del_id}';", + ] + if driver_sqlite3 + @conn.transaction + sql_entry.each do |s| + begin + @conn.execute(s) + rescue + next + end + end + @conn.commit if driver_sqlite3 + else + sql_entry.each do |s| + begin + @conn.exec_params(s) + rescue + next + end + end + end + if @opt.act[:maintenance][:set]==:on + @file.puts sql_entry + end + else + if (@opt.act[:verbose][:set]==:on \ + || @opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) + SiSU_Screen::Ansi.new( + @opt.selections.str, + "no such file in database #{@db.psql.db}::#{@opt.fns}" + ).puts_grey + end + end + end + end +end +__END__ +#+END_SRC + +* db_load_tuple.rb + +#+BEGIN_SRC ruby :tangle "../lib/sisu/db_load_tuple.rb" +# <> +module SiSU_DbTuple + require_relative 'db_columns' # db_columns.rb + class LoadDocuments + require_relative 'dp' # dp.rb + include SiSU_Param + def initialize(conn,col,opt,file_maint) + @conn,@col,@opt,@file_maint=conn,col,opt,file_maint + @col[:lev]=@col[:lev].to_i + unless @col[:lev].inspect=~/^[0-7]/ \ + or @col[:lev]==0..7 + @col[:lev]=9 + end + @col[:ocn]=0 unless @col[:ocn].inspect=~/\d+/ + @cX=SiSU_Screen::Ansi.new(@opt.act[:color_state][:set]).cX + end + def tuple #% import line + sql_entry=if @col[:en_a] + "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, book_idx, ocn, ocnd, ocns, seg, lev0, lev1, lev2, lev3, lev4, lev5, lev6, lev7, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:book_idx]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv0]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:lv7]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" + else + "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, book_idx, ocn, ocnd, ocns, seg, lev0, lev1, lev2, lev3, lev4, lev5, lev6, lev7, t_of, t_is, node, parent, digest_clean, digest_all) " + + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:book_idx]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv0]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:lv7]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" + end + if @opt.act[:maintenance][:set]==:on + puts @file_maint.inspect + puts sql_entry + @file_maint.puts sql_entry + elsif @opt.act[:verbose_plus][:set]==:on + puts sql_entry + end + if @opt.act[:verbose][:set]==:on + if @col[:lev].inspect =~/[0-35-7]/ + lev=case @col[:lev].inspect + when /0/ then ':A' + when /1/ then ':B' + when /2/ then ':C' + when /3/ then ':D' + when /5/ then ' 2' + when /6/ then ' 3' + when /7/ then ' 4' + end + puts %{#{lev}>\t#{@col[:lv0]}\t#{@col[:lv1]}\t#{@col[:lv2]}\t#{@col[:lv3]}\t#{@col[:lv4]}\t#{@col[:lv5]}\t#{@col[:lv6]}\t#{@col[:lv7]}\t#{@col[:ocn]}\t#{@col[:node]}\t#{@col[:ocns]}} + elsif @col[:lev].inspect =~/[4]/ + puts %{ #{@cX.green}1>#{@cX.off}\t#{@col[:lv0]}\t#{@col[:lv1]}\t#{@col[:lv2]}\t#{@col[:lv3]}\t#{@col[:lv4]}\t#{@col[:lv5]}\t#{@col[:lv6]}\t#{@col[:lv7]}\t#{@col[:ocn]}\t#{@col[:node]}\t#{@col[:ocns]}\t#{@col[:seg]}} + end + end + sql_entry + end + end + class LoadMetadata #< SiSU_DbColumns::Columns + def initialize(conn,id,md,file_maint) + @conn,@id,@md,@file_maint=conn,id,md,file_maint + @tp=SiSU_DbColumns::Columns.new(md) + end + def tuple + sql_entry="INSERT INTO metadata_and_text ( +#{@tp.column.title.tuple[0]} +#{@tp.column.title_main.tuple[0]} +#{@tp.column.title_sub.tuple[0]} +#{@tp.column.title_short.tuple[0]} +#{@tp.column.title_edition.tuple[0]} +#{@tp.column.title_note.tuple[0]} +#{@tp.column.title_language.tuple[0]} +#{@tp.column.title_language_char.tuple[0]} +#{@tp.column.creator_author.tuple[0]} +#{@tp.column.creator_author_honorific.tuple[0]} +#{@tp.column.creator_author_nationality.tuple[0]} +#{@tp.column.creator_editor.tuple[0]} +#{@tp.column.creator_contributor.tuple[0]} +#{@tp.column.creator_illustrator.tuple[0]} +#{@tp.column.creator_photographer.tuple[0]} +#{@tp.column.creator_translator.tuple[0]} +#{@tp.column.creator_prepared_by.tuple[0]} +#{@tp.column.creator_digitized_by.tuple[0]} +#{@tp.column.creator_audio.tuple[0]} +#{@tp.column.creator_video.tuple[0]} +#{@tp.column.language_document.tuple[0]} +#{@tp.column.language_document_char.tuple[0]} +#{@tp.column.language_original.tuple[0]} +#{@tp.column.language_original_char.tuple[0]} +#{@tp.column.date_added_to_site.tuple[0]} +#{@tp.column.date_available.tuple[0]} +#{@tp.column.date_created.tuple[0]} +#{@tp.column.date_issued.tuple[0]} +#{@tp.column.date_modified.tuple[0]} +#{@tp.column.date_published.tuple[0]} +#{@tp.column.date_valid.tuple[0]} +#{@tp.column.date_translated.tuple[0]} +#{@tp.column.date_original_publication.tuple[0]} +#{@tp.column.date_generated.tuple[0]} +#{@tp.column.publisher.tuple[0]} +#{@tp.column.original_publisher.tuple[0]} +#{@tp.column.original_language.tuple[0]} +#{@tp.column.original_language_char.tuple[0]} +#{@tp.column.original_source.tuple[0]} +#{@tp.column.original_institution.tuple[0]} +#{@tp.column.original_nationality.tuple[0]} +#{@tp.column.rights_all.tuple[0]} +#{@tp.column.rights_copyright_text.tuple[0]} +#{@tp.column.rights_copyright_translation.tuple[0]} +#{@tp.column.rights_copyright_illustrations.tuple[0]} +#{@tp.column.rights_copyright_photographs.tuple[0]} +#{@tp.column.rights_copyright_preparation.tuple[0]} +#{@tp.column.rights_copyright_digitization.tuple[0]} +#{@tp.column.rights_copyright_audio.tuple[0]} +#{@tp.column.rights_copyright_video.tuple[0]} +#{@tp.column.rights_license.tuple[0]} +#{@tp.column.classify_topic_register.tuple[0]} +#{@tp.column.classify_subject.tuple[0]} +#{@tp.column.classify_loc.tuple[0]} +#{@tp.column.classify_dewey.tuple[0]} +#{@tp.column.classify_keywords.tuple[0]} +#{@tp.column.identifier_oclc.tuple[0]} +#{@tp.column.identifier_isbn.tuple[0]} +#{@tp.column.notes_abstract.tuple[0]} +#{@tp.column.notes_description.tuple[0]} +#{@tp.column.notes_comment.tuple[0]} +#{@tp.column.notes_history.tuple[0]} +#{@tp.column.notes_format.tuple[0]} +#{@tp.column.notes_relation.tuple[0]} +#{@tp.column.notes_coverage.tuple[0]} +#{@tp.column.notes_type.tuple[0]} +#{@tp.column.notes_prefix.tuple[0]} +#{@tp.column.notes_prefix_a.tuple[0]} +#{@tp.column.notes_prefix_b.tuple[0]} +#{@tp.column.notes_suffix.tuple[0]} +#{@tp.column.src_filename.tuple[0]} +#{@tp.column.src_fingerprint.tuple[0]} +#{@tp.column.src_filesize.tuple[0]} +#{@tp.column.src_word_count.tuple[0]} +#{@tp.column.src_txt.tuple[0]} +#{@tp.column.fulltext.tuple[0]} +tid) +" + + "VALUES ( +#{@tp.column.title.tuple[1]} +#{@tp.column.title_main.tuple[1]} +#{@tp.column.title_sub.tuple[1]} +#{@tp.column.title_short.tuple[1]} +#{@tp.column.title_edition.tuple[1]} +#{@tp.column.title_note.tuple[1]} +#{@tp.column.title_language.tuple[1]} +#{@tp.column.title_language_char.tuple[1]} +#{@tp.column.creator_author.tuple[1]} +#{@tp.column.creator_author_honorific.tuple[1]} +#{@tp.column.creator_author_nationality.tuple[1]} +#{@tp.column.creator_editor.tuple[1]} +#{@tp.column.creator_contributor.tuple[1]} +#{@tp.column.creator_illustrator.tuple[1]} +#{@tp.column.creator_photographer.tuple[1]} +#{@tp.column.creator_translator.tuple[1]} +#{@tp.column.creator_prepared_by.tuple[1]} +#{@tp.column.creator_digitized_by.tuple[1]} +#{@tp.column.creator_audio.tuple[1]} +#{@tp.column.creator_video.tuple[1]} +#{@tp.column.language_document.tuple[1]} +#{@tp.column.language_document_char.tuple[1]} +#{@tp.column.language_original.tuple[1]} +#{@tp.column.language_original_char.tuple[1]} +#{@tp.column.date_added_to_site.tuple[1]} +#{@tp.column.date_available.tuple[1]} +#{@tp.column.date_created.tuple[1]} +#{@tp.column.date_issued.tuple[1]} +#{@tp.column.date_modified.tuple[1]} +#{@tp.column.date_published.tuple[1]} +#{@tp.column.date_valid.tuple[1]} +#{@tp.column.date_translated.tuple[1]} +#{@tp.column.date_original_publication.tuple[1]} +#{@tp.column.date_generated.tuple[1]} +#{@tp.column.publisher.tuple[1]} +#{@tp.column.original_publisher.tuple[1]} +#{@tp.column.original_language.tuple[1]} +#{@tp.column.original_language_char.tuple[1]} +#{@tp.column.original_source.tuple[1]} +#{@tp.column.original_institution.tuple[1]} +#{@tp.column.original_nationality.tuple[1]} +#{@tp.column.rights_all.tuple[1]} +#{@tp.column.rights_copyright_text.tuple[1]} +#{@tp.column.rights_copyright_translation.tuple[1]} +#{@tp.column.rights_copyright_illustrations.tuple[1]} +#{@tp.column.rights_copyright_photographs.tuple[1]} +#{@tp.column.rights_copyright_preparation.tuple[1]} +#{@tp.column.rights_copyright_digitization.tuple[1]} +#{@tp.column.rights_copyright_audio.tuple[1]} +#{@tp.column.rights_copyright_video.tuple[1]} +#{@tp.column.rights_license.tuple[1]} +#{@tp.column.classify_topic_register.tuple[1]} +#{@tp.column.classify_subject.tuple[1]} +#{@tp.column.classify_loc.tuple[1]} +#{@tp.column.classify_dewey.tuple[1]} +#{@tp.column.classify_keywords.tuple[1]} +#{@tp.column.identifier_oclc.tuple[1]} +#{@tp.column.identifier_isbn.tuple[1]} +#{@tp.column.notes_abstract.tuple[1]} +#{@tp.column.notes_comment.tuple[1]} +#{@tp.column.notes_description.tuple[1]} +#{@tp.column.notes_history.tuple[1]} +#{@tp.column.notes_format.tuple[1]} +#{@tp.column.notes_relation.tuple[1]} +#{@tp.column.notes_coverage.tuple[1]} +#{@tp.column.notes_type.tuple[1]} +#{@tp.column.notes_prefix.tuple[1]} +#{@tp.column.notes_prefix_a.tuple[1]} +#{@tp.column.notes_prefix_b.tuple[1]} +#{@tp.column.notes_suffix.tuple[1]} +#{@tp.column.src_filename.tuple[1]} +#{@tp.column.src_fingerprint.tuple[1]} +#{@tp.column.src_filesize.tuple[1]} +#{@tp.column.src_word_count.tuple[1]} +#{@tp.column.src_txt.tuple[1]} +#{@tp.column.fulltext.tuple[1]} +#{@id} +);" + if @md.opt.act[:maintenance][:set]==:on + puts "maintenance mode on: creating sql transaction file (for last transaction set (document) only):\n\t#{@file_maint.inspect}" + @file_maint.puts sql_entry + end + sql_entry + end + end + class LoadUrls + def initialize(conn,f,u,id,opt,file_maint) + @conn,@f,@u,@id,@opt,@file_maint=conn,f,u,id,opt,file_maint + end + def tuple + sql_entry="INSERT INTO urls (#{@f[:txt]} #{@f[:html_toc]} #{@f[:html_doc]} #{@f[:xhtml]} #{@f[:xml_sax]} #{@f[:xml_dom]} #{@f[:odf]} #{@f[:pdf_p]} #{@f[:pdf_l]} #{@f[:concordance]} #{@f[:latex_p]} #{@f[:latex_l]} #{@f[:manifest]} #{@f[:digest]} #{@f[:markup]} #{@f[:sisupod]} metadata_tid) " + + "VALUES (#{@u[:txt]} #{@u[:html_toc]} #{@u[:html_doc]} #{@u[:xhtml]} #{@u[:xml_sax]} #{@u[:xml_dom]} #{@u[:odf]} #{@u[:pdf_p]} #{@u[:pdf_l]} #{@u[:concordance]} #{@u[:latex_p]} #{@u[:latex_l]} #{@u[:manifest]} #{@u[:digest]} #{@u[:markup]} #{@u[:sisupod]} #{@id});" + if @opt.act[:maintenance][:set]==:on + @file_maint.puts sql_entry + end + sql_entry + end + end + class LoadEndnotes + def initialize(conn,en,opt,file_maint) + @conn,@en,@opt,@file_maint=conn,en,opt,file_maint + end + def tuple + sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, ocn, ocnd, ocns, metadata_tid, digest_clean) " + + "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');" + if @opt.act[:maintenance][:set]==:on + @file_maint.puts sql_entry + end + sql_entry + end + end +end +__END__ +#+END_SRC + +* db_select.rb + +#+BEGIN_SRC ruby :tangle "../lib/sisu/db_select.rb" +# <> +module SiSU_DbAction + def db_action(sdb) + @sdb=sdb + def createdb + @sdb[:create].output_dir? + begin + @sdb[:create].create_db + rescue + @sdb[:create].output_dir? + end + end + def drop + @sdb[:drop].drop.tables + end + def create + @sdb[:create].output_dir? + begin + @sdb[:create].create_table.metadata_and_text + @sdb[:create].create_table.doc_objects + @sdb[:create].create_table.endnotes + @sdb[:create].create_table.endnotes_asterisk + @sdb[:create].create_table.endnotes_plus + @sdb[:create].create_table.urls + @sdb[:index].create_indexes + rescue + SiSU_Errors::Rescued.new($!,$@,'--sqlite').location + @sdb[:create].output_dir? do + __LINE__.to_s + ':' + __FILE__ + end + end + end + def import + db_exist? + @sdb[:import].marshal_load + tell=case @sql_type + when :sqlite + SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + "sqlite3 #{@db.sqlite.db} database?" + ) + when :pg + SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + "pgaccess or psql #{@db.psql.db} database?" + ) + else '???' + end + tell.puts_grey if @opt.act[:verbose][:set]==:on + end + def remove + db_exist? + @sdb[:remove_doc].remove + end + def update + remove + import + end + self + end +end +module SiSU_DbSelect + class Case + include SiSU_DbAction + def initialize(opt,conn='',sql_type=:pg) + @opt,@conn,@sql_type=opt,conn,sql_type + @db=SiSU_Env::InfoDb.new + @file_maint=sql_maintenance_file + @sdb={ + create: SiSU_DbDBI::Create.new(@opt,@conn,@file_maint,@sql_type), + index: SiSU_DbDBI::Index.new(@opt,@conn,@file_maint,@sql_type), + drop: SiSU_DbDBI::Drop.new(@opt,@conn,@db,@sql_type), + } + if (@opt.act[:psql_import][:set]==:on \ + || @opt.act[:psql_update][:set]==:on) \ + or (@opt.act[:sqlite_import][:set]==:on \ + || @opt.act[:sqlite_update][:set]==:on) + @sdb[:import]=SiSU_DbDBI::Import.new(@opt,@conn,@file_maint,@sql_type) + @sdb[:remove_doc]=SiSU_DbDBI::Remove.new(@opt,@conn,@file_maint,@sql_type) + elsif (@opt.act[:psql_remove][:set]==:on \ + or @opt.act[:sqlite_remove][:set]==:on) + @sdb[:remove_doc]=SiSU_DbDBI::Remove.new(@opt,@conn,@file_maint,@sql_type) + end + end + def db_exist? + if @sql_type==:sqlite \ + and (not (FileTest.file?(@db.sqlite.db)) \ + or FileTest.zero?(@db.sqlite.db)) + puts %{no connection with sqlite database established, you may need to run:\n} \ + + %{ sisu --sqlite --createall\n} \ + + %{ before attempting to populate the database} + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:yellow).mark( + "\n" \ + + 'Attempting to initialize db' + "\n" \ + + 'Creating db tables' + ) + db_action(@sdb).create + end + if @conn.is_a?(NilClass) + if @sql_type==:sqlite + puts %{no connection with sqlite database established, you may need to run:\n} \ + + %{ sisu --sqlite --createall\n} \ + + %{ before attempting to populate the database} + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:yellow).mark( + "\n" \ + + 'Attempting to initialize db' + "\n" \ + + 'Creating db tables' + ) + db_action(@sdb).create + @db.sqlite.db + else + puts %{no connection with pg database established, you may need to run:\n} \ + + %{ createdb "#{@db.psql.db}"\n} \ + + %{ after that don't forget to run:\n} \ + + %{ sisu --pg --createall\n} \ + + %{ before attempting to populate the database} + @db.psql.db + end + exit + end + end + def sql_maintenance_file + file=if @opt.act[:maintenance][:set]==:on + if @opt.fns and not @opt.fns.empty? + @env=SiSU_Env::InfoEnv.new(@opt.fns) if @opt.fns + if @sql_type ==:sqlite + puts "\n#{@env.processing_path.sqlite}/#{@opt.fns}.sql" + end + @db=SiSU_Env::InfoDb.new + @job="sqlite3 #{@db.sqlite.db} < #{@env.processing_path.sqlite}/#{@opt.fns}.sql" + if @sql_type ==:sqlite + File.new("#{@env.processing_path.sqlite}/#{@opt.fns}.sql",'w+') + else + File.new("#{@env.processing_path.postgresql}/#{@opt.fns}.sql",'w+') + end + elsif @opt.fns \ + and (@opt.act[:sqlite_create][:set] ==:on \ + || @opt.act[:psql_create][:set] ==:on) + nil #sort variations later + else nil + end + else nil + end + file + end + def cases + if @opt.act[:psql_drop][:set] ==:on \ + or @opt.act[:sqlite_drop][:set] ==:on + db_action(@sdb).drop + end + if @opt.act[:psql_createdb][:set] ==:on \ + or @opt.act[:sqlite_createdb][:set] ==:on + db_action(@sdb).createdb + end + if @opt.act[:psql_create][:set] ==:on \ + or @opt.act[:sqlite_create][:set] ==:on + db_action(@sdb).create + end + if @opt.act[:psql_update][:set] ==:on \ + or @opt.act[:sqlite_update][:set] ==:on + db_action(@sdb).update + else + if @opt.act[:psql_remove][:set] ==:on \ + or @opt.act[:sqlite_remove][:set] ==:on + db_action(@sdb).remove + end + if @opt.act[:psql_import][:set] ==:on \ + or @opt.act[:sqlite_import][:set] ==:on + db_action(@sdb).import + end + end + end + end +end +__END__ +#+END_SRC + +* structure +** db_columns.rb + +#+BEGIN_SRC ruby :tangle "../lib/sisu/db_columns.rb" +# <> +module SiSU_DbColumns + require_relative 'se' # se.rb + require_relative 'db_sqltxt' # db_sqltxt.rb + class Columns < SiSU_DbText::Prepare + def initialize(md=nil) + @md=md + @db=SiSU_Env::InfoDb.new #watch + @lang ||=SiSU_i18n::Languages.new + if defined? md.opt.act \ + and ((md.opt.act[:psql_import][:set]==:on \ + || md.opt.act[:psql_update][:set]==:on) \ + or (md.opt.act[:sqlite_import][:set]==:on \ + || md.opt.act[:sqlite_update][:set]==:on)) \ + and FileTest.exist?(md.fns) + txt_arr=IO.readlines(md.fns,'') + src=txt_arr.join("\n") + if @db.share_source? + @sisutxt=special_character_escape(src) + else @sisutxt='' + end + @fulltext=clean_searchable_text_from_document_objects(txt_arr) + else @sisutxt,@fulltext='','' + end + end +#% structures + #def column_define + # def varchar(name,size) + # "#{name} VARCHAR(#{size}) NULL," + # end + #end +=begin +#% title +@title: + :subtitle: + :short: + :edition: + :language: + :note: +=end + def column + def title # DublinCore 1 - title + def name + 'title' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title]}) NOT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata full document title [DC1]';} + end + def tuple + if defined? @md.title.full \ + and @md.title.full=~/\S+/ + txt=@md.title.full + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_main + def name + 'title_main' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title_part]}) NOT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata main document title';} + end + def tuple + if defined? @md.title.main \ + and @md.title.main=~/\S+/ + txt=@md.title.main + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_sub + def name + 'title_sub' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title_part]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document subtitle';} + end + def tuple + if defined? @md.title.sub \ + and @md.title.sub=~/\S+/ + txt=@md.title.sub + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_short + def name + 'title_short' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title_part]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document short title if any';} + end + def tuple + if defined? @md.title.short \ + and @md.title.short=~/\S+/ + txt=@md.title.short + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_edition + def name + 'title_edition' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title_edition]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document edition (version)';} + end + def tuple + if defined? @md.title.edition \ + and @md.title.edition=~/\S+/ + txt=@md.title.edition + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_note + def name + 'title_note' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes associated with title';} + end + def tuple + if defined? @md.title.note \ + and @md.title.note=~/\S+/ + txt=@md.title.note + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_language + def name + 'title_language' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language [DC12]';} + end + def tuple + if @lang.list[@md.opt.lng][:n] + txt=@lang.list[@md.opt.lng][:n] + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_language_char # consider + def name + 'title_language_char' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language iso code';} + end + def tuple + if defined? @md.opt.lng \ + and @md.opt.lng=~/\S+/ + txt=@md.opt.lng + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% creator +@creator: + :author: + :editor: + :contributor: + :illustrator: + :photographer: + :translator: + :prepared_by: + :digitized_by: + :audio: + :video: +=end + def creator_author # DublinCore 2 - creator/author (author) + def name + 'creator_author' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document author (creator) [DC2]';} + end + def tuple + if defined? @md.creator.author_detail \ + and @md.creator.author_detail.is_a?(Array) \ + and @md.creator.author_detail.length > 0 + txt='' + @md.creator.author_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt=txt.gsub(/[;, ]+\s*$/,'') + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_author_honorific # consider + def name + 'creator_author_hon' + end + def create_column + "#{name} VARCHAR(#{Db[:col_creator_misc_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document author honorific (title e.g, Ms. Dr. Prof.)';} + end + def tuple + if defined? @md.creator.author_hon \ + and @md.creator.author_hon=~/\S+/ + txt=@md.creator.author_hon + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_author_nationality # consider + def name + 'creator_author_nationality' + end + def create_column + "#{name} VARCHAR(#{Db[:col_creator_misc_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata nationality of document author (creator)';} + end + def tuple + if defined? @md.creator.author_nationality_detail \ + and @md.creator.author_nationality=~/\S+/ + txt=@md.creator.author_nationality_detail + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_editor + def name + 'creator_editor' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document editor name(s)';} + end + def tuple + if defined? @md.creator.editor_detail \ + and @md.creator.editor_detail.is_a?(Array) \ + and @md.creator.editor_detail.length > 0 + txt='' + @md.creator.editor_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt=txt.gsub(/[;, ]+\s*$/,'') + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_contributor # DublinCore 6 - contributor + def name + 'creator_contributor' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document contributor name(s) [DC6]';} + end + def tuple + if defined? @md.creator.contributor_detail \ + and @md.creator.contributor_detail.is_a?(Array) \ + and @md.creator.contributor_detail.length > 0 + txt='' + @md.creator.contributor_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt=txt.gsub(/[;, ]+\s*$/,'') + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_illustrator + def name + 'creator_illustrator' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document illustrator name(s)';} + end + def tuple + if defined? @md.creator.illustrator_detail \ + and @md.creator.illustrator_detail.is_a?(Array) \ + and @md.creator.illustrator_detail.length > 0 + txt='' + @md.creator.illustrator_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt=txt.gsub(/[;, ]+\s*$/,'') + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_photographer + def name + 'creator_photographer' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document photographer name(s)';} + end + def tuple + if defined? @md.creator.photographer_detail \ + and @md.creator.photographer_detail.is_a?(Array) \ + and @md.creator.photographer_detail.length > 0 + txt='' + @md.creator.photographer_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt=txt.gsub(/[;, ]+\s*$/,'') + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_translator + def name + 'creator_translator' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document translator name(s)';} + end + def tuple + if defined? @md.creator.translator_detail \ + and @md.creator.translator_detail.is_a?(Array) \ + and @md.creator.translator_detail.length > 0 + txt='' + @md.creator.translator_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt=txt.gsub(/[;, ]+\s*$/,'') + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_prepared_by + def name + 'creator_prepared_by' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document prepared by name(s)';} + end + def tuple + if defined? @md.creator.prepared_by_detail \ + and @md.creator.prepared_by_detail.is_a?(Array) \ + and @md.creator.prepared_by_detail.length > 0 + txt='' + @md.creator.prepared_by_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt=txt.gsub(/[;, ]+\s*$/,'') + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_digitized_by + def name + 'creator_digitized_by' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document digitized by name(s)';} + end + def tuple + if defined? @md.creator.digitized_by_detail \ + and @md.creator.digitized_by_detail.is_a?(Array) \ + and @md.creator.digitized_by_detail.length > 0 + txt='' + @md.creator.digitized_by_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt=txt.gsub(/[;, ]+\s*$/,'') + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_audio + def name + 'creator_audio' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document audio by name(s)';} + end + def tuple + if defined? @md.creator.audio_detail \ + and @md.creator.audio_detail.is_a?(Array) \ + and @md.creator.audio_detail.length > 0 + txt='' + @md.creator.audio_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt=txt.gsub(/[;, ]+\s*$/,'') + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_video + def name + 'creator_video' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document video by name(s)';} + end + def tuple + if defined? @md.creator.video_detail \ + and @md.creator.video_detail.is_a?(Array) \ + and @md.creator.video_detail.length > 0 + txt='' + @md.creator.video_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt=txt.gsub(/[;, ]+\s*$/,'') + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% language +#taken from other fields +@title: + :language: +@original: + :language: +#not available --> +#@language: +# :document: +# :original: +=end + def language_document + def name + 'language_document' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language';} + end + def tuple + if @lang.list[@md.opt.lng][:n] + txt=@lang.list[@md.opt.lng][:n] + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def language_document_char + def name + 'language_document_char' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language_char]}) NOT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language';} + end + def tuple + #modify check, is now required, SiSUv3d_ + if defined? @md.opt.lng \ + and @md.opt.lng=~/\S+/ + txt=@md.opt.lng + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def language_original + def name + 'language_original' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata original document/text language';} + end + def tuple + if defined? @md.language.original \ + and @md.language.original=~/\S+/ + txt=@md.language.original + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def language_original_char + def name + 'language_original_char' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language';} + end + def tuple + if defined? @md.language.original_char \ + and @md.language.original_char=~/\S+/ + txt=@md.language.original_char + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% date +@date: + :added_to_site: + :available: + :created: + :issued: + :modified: + :published: + :valid: + :translated: + :original_publication: +=end + def date_added_to_site + def name + 'date_added_to_site' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + #"#{name} DATE," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date added to site';} + end + def tuple + if defined? @md.date.added_to_site \ + and @md.date.added_to_site=~/\S+/ + txt=@md.date.added_to_site + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_available + def name + 'date_available' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date added to site [DC]';} + end + def tuple + if defined? @md.date.available \ + and @md.date.available=~/\S+/ + txt=@md.date.available + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_created + def name + 'date_created' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date created [DC]';} + end + def tuple + if defined? @md.date.created \ + and @md.date.created=~/\S+/ + txt=@md.date.created + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_issued + def name + 'date_issued' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date issued [DC]';} + end + def tuple + if defined? @md.date.issued \ + and @md.date.issued=~/\S+/ + txt=@md.date.issued + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_modified + def name + 'date_modified' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date modified [DC]';} + end + def tuple + if defined? @md.date.modified \ + and @md.date.modified=~/\S+/ + txt=@md.date.modified + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_published + def name + 'date_published' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date published [DC7]';} + end + def tuple + if defined? @md.date.published \ + and @md.date.published=~/\S+/ + txt=@md.date.published + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_valid + def name + 'date_valid' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date valid [DC]';} + end + def tuple + if defined? @md.date.valid \ + and @md.date.valid=~/\S+/ + txt=@md.date.valid + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_translated + def name + 'date_translated' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date translated';} + end + def tuple + if defined? @md.date.translated \ + and @md.date.translated=~/\S+/ + txt=@md.date.translated + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_original_publication + def name + 'date_original_publication' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date of original publication';} + end + def tuple + if defined? @md.date.original_publication \ + and @md.date.original_publication=~/\S+/ + txt=@md.date.original_publication + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_generated + def name + 'date_generated' + end + def create_column #choose other representation of time + "#{name} VARCHAR(30) NULL," + #"#{name} VARCHAR(10) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date of sisu generation of document, automatically populated';} + end + def tuple #choose other representation of time + if defined? @md.generated \ + and @md.generated.to_s=~/\S+/ + txt=@md.generated.to_s + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% publisher +@publisher: +=end + def publisher + def name + 'publisher' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document publisher [DC5]';} + end + def tuple + if defined? @md.publisher \ + and @md.publisher=~/\S+/ + txt=@md.publisher + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +##% current +# def current_publisher +# def name +# 'current_publisher' +# end +# def size +# 10 +# end +# def create_column +# "#{name} VARCHAR(#{current_publisher.size}) NULL," +# end +# def tuple +# t=if defined? @md.current.publisher \ +# and @md.current.publisher=~/\S+/ +# txt=@md.current.publisher +# txt=special_character_escape(txt) +# "'#{txt}', " +# end +# end +# self +# end +=begin +#% original +@original: + :publisher: + #:date: #repeated under date + :language: + :institution: + :nationality: + :source: +=end + def original_publisher + def name + 'original_publisher' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original publisher [DC5]';} + end + def tuple + if defined? @md.original.publisher \ + and @md.original.publisher=~/\S+/ + txt=@md.original.publisher + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_language + def name + 'original_language' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original language';} + end + def tuple + if defined? @md.original.language \ + and @md.original.language=~/\S+/ + txt=@md.original.language + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_language_char # consider + def name + 'original_language_char' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original language iso character';} + end + def tuple + if defined? @md.original.language_char \ + and @md.original.language_char=~/\S+/ + txt=@md.original.language_char + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_source + def name + 'original_source' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original source [DC11]';} + end + def tuple + if defined? @md.original.source \ + and @md.original.source=~/\S+/ + txt=@md.original.source + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_institution + def name + 'original_institution' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original institution';} + end + def tuple + if defined? @md.original.institution \ + and @md.original.institution=~/\S+/ + txt=@md.original.institution + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_nationality + def name + 'original_nationality' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original nationality';} + end + def tuple + if defined? @md.original.nationality \ + and @md.original.nationality=~/\S+/ + txt=@md.original.nationality + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% rights +@rights: + #:copyright: #mapped to :text: used where no other copyrights and included in :all: + :text: + :translation: + :illustrations: + :photographs: + :preparation: + :digitization: + :audio: + :video: + :license: + :all: +=end + def rights_all + def name + 'rights' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata rights associated with document (composite) [DC15]';} + end + def tuple + if defined? @md.rights.all \ + and @md.rights.all=~/\S+/ + txt=@md.rights.all + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_text + def name + 'rights_copyright_text' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text';} + end + def tuple + if defined? @md.rights.copyright_text \ + and @md.rights.copyright_text=~/\S+/ + txt=@md.rights.copyright_text + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_translation + def name + 'rights_copyright_translation' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text translation (if any)';} + end + def tuple + if defined? @md.rights.copyright_translation \ + and @md.rights.copyright_translation=~/\S+/ + txt=@md.rights.copyright_translation + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_illustrations + def name + 'rights_copyright_illustrations' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text illustrations (if any)';} + end + def tuple + if defined? @md.rights.copyright_illustrations \ + and @md.rights.copyright_illustrations=~/\S+/ + txt=@md.rights.copyright_illustrations + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_photographs + def name + 'rights_copyright_photographs' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text photographs (if any)';} + end + def tuple + if defined? @md.rights.copyright_photographs \ + and @md.rights.copyright_photographs=~/\S+/ + txt=@md.rights.copyright_photographs + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_preparation + def name + 'rights_copyright_preparation' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text preparation (if any)';} + end + def tuple + if defined? @md.rights.copyright_preparation \ + and @md.rights.copyright_preparation=~/\S+/ + txt=@md.rights.copyright_preparation + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_digitization + def name + 'rights_copyright_digitization' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text digitization (if any)';} + end + def tuple + if defined? @md.rights.copyright_digitization \ + and @md.rights.copyright_digitization=~/\S+/ + txt=@md.rights.copyright_digitization + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_audio + def name + 'rights_copyright_audio' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text audio (if any)';} + end + def tuple + if defined? @md.rights.copyright_audio \ + and @md.rights.copyright_audio=~/\S+/ + txt=@md.rights.copyright_audio + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_video + def name + 'rights_copyright_video' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text video (if any)';} + end + def tuple + if defined? @md.rights.copyright_video \ + and @md.rights.copyright_video=~/\S+/ + txt=@md.rights.copyright_video + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_license + def name + 'rights_license' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata license granted for use of document if any)';} + end + def tuple + if defined? @md.rights.license \ + and @md.rights.license=~/\S+/ + txt=@md.rights.license + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% identifier +@identifier: + :oclc: + :isbn: +=end + def identifier_oclc + def name + 'identifier_oclc' + end + def create_column + "#{name} VARCHAR(#{Db[:col_library]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata identifier document Online Computer Library Center number';} + end + def tuple + if defined? @md.identifier.oclc \ + and @md.identifier.oclc=~/\S+/ + txt=@md.identifier.oclc + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def identifier_isbn + def name + 'identifier_isbn' + end + def create_column + "#{name} VARCHAR(#{Db[:col_small]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata identifier document isbn (if any)';} + end + def tuple + if defined? @md.identifier.isbn \ + and @md.identifier.isbn=~/\S+/ + txt=@md.identifier.isbn + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% classify +@classify: + :topic_register: + :subject: + :keywords: + :type: + :loc: + :dewey: +=end + def classify_topic_register + def name + 'classify_topic_register' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document topic register (semi-structured document subject information)';} + end + def tuple + if defined? @md.classify.topic_register \ + and @md.classify.topic_register=~/\S+/ + txt=@md.classify.topic_register + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_subject + def name + 'classify_subject' + end + def create_column + "#{name} VARCHAR(#{Db[:col_txt_long]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document subject matter [DC3]';} + end + def tuple + if defined? @md.classify.subject \ + and @md.classify.subject=~/\S+/ + txt=@md.classify.subject + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_loc + def name + 'classify_loc' + end + def create_column + "#{name} VARCHAR(#{Db[:col_library]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document Library of Congress';} + end + def tuple + if defined? @md.classify.loc \ + and @md.classify.loc=~/\S+/ + txt=@md.classify.loc + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_dewey + def name + 'classify_dewey' + end + def create_column + "#{name} VARCHAR(#{Db[:col_library]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document Dewey';} + end + def tuple + if defined? @md.classify.dewey \ + and @md.classify.dewey=~/\S+/ + txt=@md.classify.dewey + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_keywords + def name + 'classify_keywords' + end + def create_column + "#{name} VARCHAR(#{Db[:col_txt_long]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document keywords';} + end + def tuple + if defined? @md.classify.keywords \ + and @md.classify.keywords=~/\S+/ + txt=@md.classify.keywords + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% notes +@notes: + :abstract: + :description: + :comment: + :coverage: + :relation: + :format: + :history: + :prefix: + :prefix_a: + :prefix_b: + :suffix: +=end + def notes_abstract + def name + 'notes_abstract' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes abstract';} + end + def tuple + if defined? @md.notes.abstract \ + and @md.notes.abstract=~/\S+/ + txt=@md.notes.abstract + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_description + def name + 'notes_description' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes description [DC4]';} + end + def tuple + if defined? @md.notes.description \ + and @md.notes.description=~/\S+/ + txt=@md.notes.description + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_comment + def name + 'notes_comment' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes comment';} + end + def tuple + if defined? @md.notes.comment \ + and @md.notes.comment=~/\S+/ + txt=@md.notes.comment + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_coverage + def name + 'notes_coverage' + end + def create_column + "#{name} VARCHAR(#{Db[:col_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document coverage [DC14]';} + end + def tuple + if defined? @md.classify.coverage \ + and @md.classify.coverage=~/\S+/ + txt=@md.classify.coverage + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_relation + def name + 'notes_relation' + end + def create_column + "#{name} VARCHAR(#{Db[:col_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document relation [DC13]';} + end + def tuple + if defined? @md.classify.relation \ + and @md.classify.relation=~/\S+/ + txt=@md.classify.relation + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_history #check, consider removal + def name + 'notes_history' + end + def create_column + "#{name} VARCHAR(#{Db[:col_txt_long]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes history';} + end + def tuple + if defined? @md.notes.history \ + and @md.notes.history=~/\S+/ + txt=@md.notes.history + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_type #check + def name + 'notes_type' + end + def create_column + "#{name} VARCHAR(#{Db[:col_txt_long]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata notes document type [DC8]';} + end + def tuple + if defined? @md.notes.type \ + and @md.notes.type=~/\S+/ + txt=@md.notes.type + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_format + def name + 'notes_format' + end + def create_column + "#{name} VARCHAR(#{Db[:col_txt_long]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document format [DC9]';} + end + def tuple + if defined? @md.classify.format \ + and @md.classify.format=~/\S+/ + txt=@md.classify.format + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_prefix + def name + 'notes_prefix' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes prefix';} + end + def tuple + if defined? @md.notes.prefix \ + and @md.notes.prefix=~/\S+/ + txt=@md.notes.prefix + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_prefix_a + def name + 'notes_prefix_a' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes prefix_a';} + end + def tuple + if defined? @md.notes.prefix_a \ + and @md.notes.prefix_a=~/\S+/ + txt=@md.notes.prefix_a + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_prefix_b + def name + 'notes_prefix_b' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes prefix_b';} + end + def tuple + if defined? @md.notes.prefix_b \ + and @md.notes.prefix_b=~/\S+/ + txt=@md.notes.prefix_b + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_suffix + def name + 'notes_suffix' + end + def create_column # keep text + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes suffix';} + end + def tuple + if defined? @md.notes.suffix \ + and @md.notes.suffix=~/\S+/ + txt=@md.notes.suffix + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% src +=end + def src_filename + def name + 'src_filename' + end + def create_column + "#{name} VARCHAR(#{Db[:col_filename]}) NOT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'sisu markup source text filename';} + end + def tuple + if defined? @md.fns \ + and @md.fns=~/\S+/ + txt=@md.fns + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def src_fingerprint + def name + 'src_fingerprint' #hash/digest, sha512, sha256 or md5 + end + def create_column + "#{name} VARCHAR(#{Db[:col_digest]}) NULL," + #"#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'sisu markup source text fingerprint, hash digest sha512, sha256 or md5';} + end + def tuple + if defined? @md.dgst \ + and @md.dgst.is_a?(Array) \ + and @md.dgst[1]=~/\S+/ + txt=@md.dgst[1] + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def src_filesize + def name + 'src_filesize' + end + def create_column + "#{name} VARCHAR(#{Db[:col_filesize]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'sisu markup source text file size';} + end + def tuple + if defined? @md.filesize \ + and @md.filesize=~/\S+/ + txt=@md.filesize + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def src_word_count + def name + 'src_word_count' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'sisu markup source text word count';} + end + def tuple + if defined? @md.wc_words \ + and @md.wc_words.to_s=~/\S+/ + txt=@md.wc_words + txt=special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def src_txt # consider naming sisusrc + def name + 'src_text' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'sisu markup source text (if shared)';} + end + def tuple + if ((@md.opt.act[:psql_import][:set]==:on \ + || @md.opt.act[:psql_update][:set]==:on) \ + or (@md.opt.act[:sqlite_import][:set]==:on \ + || @md.opt.act[:sqlite_update][:set]==:on)) \ + and FileTest.exist?(@md.fns) + ["#{name}, ","'#{@sisutxt}', "] + else ['',''] + end + end + self + end +=begin +#% misc +@links: +=end + def fulltext + def name + 'fulltext' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'document full text clean, searchable';} + end + def tuple + if ((@md.opt.act[:psql_import][:set]==:on \ + || @md.opt.act[:psql_update][:set]==:on) \ + or (@md.opt.act[:sqlite_import][:set]==:on \ + || @md.opt.act[:sqlite_update][:set]==:on)) \ + and FileTest.exist?(@md.fns) + ["#{name}, ","'#{@fulltext}', "] + else ['',''] + end + end + self + end + def links + def name + 'links' + end + def create_column + "#{name} TEXT NULL," + #"#{name} VARCHAR(#{links.size}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document links';} + end + #def tuple + # #BUG HERE - links is an array of paired values :say :url + # if defined? @md.links \ + # and @md.links=~/\S+/ + # txt=@md.links + # txt=special_character_escape(txt) + # ["#{name}, ","'#{txt}', "] + # else ['',''] + # end + #end + self + end + self + end + end + class ColumnSize + def document_clean # restriction not necessary + 60000 + end + def document_body + 16000 + end + def document_seg + 120 + end + def document_seg_full + 120 + end + def endnote_clean # restriction not necessary + 60000 + end + def endnote_body + 16000 + end + end +end +__END__ +#+END_SRC + +** db_indexes.rb + +#+BEGIN_SRC ruby :tangle "../lib/sisu/db_indexes.rb" +# <> +module SiSU_DbIndex + class Index # create documents Indexes def initialize(opt,conn='',sql_type) + def initialize(opt,conn,file,sql_type) + @opt,@conn,@file,@sql_type=opt,conn,file,sql_type + end + def create_indexes # check added from pg not tested + def conn_execute_sql_pg(conn,sql) + conn.exec_params(sql) + end + def conn_execute_sql_sqlite(conn,sql) + conn.execute(sql) + end + def conn_execute_sql(conn,sql) + if @sql_type==:pg + conn_execute_sql_pg(conn,sql) + elsif @sql_type==:sqlite + conn_execute_sql_sqlite(conn,sql) + else + end + end + def conn_execute_array(sql_arr) + begin + @conn.transaction do |conn| + sql_arr.each do |sql| + conn_execute_sql(conn,sql) + end + end + rescue + if @conn.is_a?(NilClass) + errmsg="No sqlite3 connection (check sqlite3 dependencies)" + if @opt.act[:no_stop][:set]==:on + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). + error("#{errmsg}, proceeding without sqlite output (as requested)") + else + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). + error("#{errmsg}, STOPPING") + exit + end + end + end + end + def base + if (@opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) + print "\n create documents common indexes\n" + end + sql_arr=[ + %{CREATE INDEX idx_ocn ON doc_objects(ocn);}, + %{CREATE INDEX idx_digest_clean ON doc_objects(digest_clean);}, + %{CREATE INDEX idx_digest_all ON doc_objects(digest_all);}, + %{CREATE INDEX idx_lev0 ON doc_objects(lev0);}, + %{CREATE INDEX idx_lev1 ON doc_objects(lev1);}, + %{CREATE INDEX idx_lev2 ON doc_objects(lev2);}, + %{CREATE INDEX idx_lev3 ON doc_objects(lev3);}, + %{CREATE INDEX idx_lev4 ON doc_objects(lev4);}, + %{CREATE INDEX idx_lev5 ON doc_objects(lev5);}, + %{CREATE INDEX idx_lev6 ON doc_objects(lev6);}, + %{CREATE INDEX idx_endnote_nr ON endnotes(nr);}, + %{CREATE INDEX idx_digest_en ON endnotes(digest_clean);}, + %{CREATE INDEX idx_endnote_nr_asterisk ON endnotes_asterisk(nr);}, + %{CREATE INDEX idx_endnote_asterisk ON endnotes_asterisk(clean);}, + %{CREATE INDEX idx_digest_en_asterisk ON endnotes_asterisk(digest_clean);}, + %{CREATE INDEX idx_endnote_nr_plus ON endnotes_plus(nr);}, + %{CREATE INDEX idx_endnote_plus ON endnotes_plus(clean);}, + %{CREATE INDEX idx_digest_en_plus ON endnotes_plus(digest_clean);}, + %{CREATE INDEX idx_title ON metadata_and_text(title);}, + %{CREATE INDEX idx_author ON metadata_and_text(creator_author);}, + %{CREATE INDEX idx_filename ON metadata_and_text(src_filename);}, + %{CREATE INDEX idx_language ON metadata_and_text(language_document_char);}, + %{CREATE INDEX idx_topics ON metadata_and_text(classify_topic_register)}, + ] + conn_execute_array(sql_arr) + end + def text + if (@opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) + print "\n create documents TEXT indexes\n" + end + sql_arr=[ + %{CREATE INDEX idx_clean ON doc_objects(clean);}, + %{CREATE INDEX idx_endnote ON endnotes(clean);} + ] + conn_execute_array(sql_arr) + end + base + @opt.act[:psql][:set]==:on ? '' : text + end + end +end +__END__ +#+END_SRC + +* db_tests.rb + +#+BEGIN_SRC ruby :tangle "../lib/sisu/db_tests.rb" +# <> +module SiSU_DbTests + class Test + def initialize(info,opt) + @ck,@opt=info,opt + unless @opt.act[:quiet][:set]==:on + puts @ck.tp[:fns] if @ck.tp[:fns] and not @ck.tp[:fns].empty? + puts @ck.tp[:title] if @ck.tp[:title] and not @ck.tp[:title].empty? + puts @ck.tp[:creator] if @ck.tp[:creator] and not @ck.tp[:creator].empty? + end + end + def verify + unless @opt.act[:quiet][:set]==:on + puts @ck.tp[:fns].length.to_s + ' checklength ' + @ck.tp[:fns] if @ck.tp[:fns] and @ck.tp[:fns].length >@ck.lt_filename + puts @ck.tp[:title].length.to_s + ' checklength ' + @ck.tp[:title] if @ck.tp[:title] and @ck.tp[:title].length >@ck.lt_title + puts @ck.tp[:subtitle].length.to_s + ' checklength ' + @ck.tp[:subtitle] if @ck.tp[:subtitle] and @ck.tp[:subtitle].length >@ck.lt_subtitle + puts @ck.tp[:creator].length.to_s + ' checklength ' + @ck.tp[:creator] if @ck.tp[:creator] and @ck.tp[:creator].length >@ck.lt_creator + puts @ck.tp[:author_title].length.to_s + ' checklength ' + @ck.tp[:author_title] if @ck.tp[:author_title] and @ck.tp[:author_title].length >@ck.lt_author_title + puts @ck.tp[:illustrator].length.to_s + ' checklength ' + @ck.tp[:illustrator] if @ck.tp[:illustrator] and @ck.tp[:illustrator].length >@ck.lt_illustrator + puts @ck.tp[:translator].length.to_s + ' checklength ' + @ck.tp[:translator] if @ck.tp[:translator] and @ck.tp[:translator].length >@ck.lt_translator + puts @ck.tp[:prepared_by].length.to_s + ' checklength ' + @ck.tp[:prepared_by] if @ck.tp[:prepared_by] and @ck.tp[:prepared_by].length >@ck.lt_prepared_by + puts @ck.tp[:digitized_by].length.to_s + ' checklength ' + @ck.tp[:digitized_by] if @ck.tp[:digitized_by] and @ck.tp[:digitized_by].length >@ck.lt_digitized_by + puts @ck.tp[:subject].length.to_s + ' checklength ' + @ck.tp[:subject] if @ck.tp[:subject] and @ck.tp[:subject].length >@ck.lt_subject + puts @ck.tp[:description].length.to_s + ' checklength ' + @ck.tp[:description] if @ck.tp[:description] and @ck.tp[:description].length >@ck.lt_description + puts @ck.tp[:publisher].length.to_s + ' checklength ' + @ck.tp[:publisher] if @ck.tp[:publisher] and @ck.tp[:publisher].length >@ck.lt_publisher + puts @ck.tp[:contributor].length.to_s + ' checklength ' + @ck.tp[:contributor] if @ck.tp[:contributor] and @ck.tp[:contributor].length >@ck.lt_contributor + puts @ck.tp[:date].length.to_s + ' checklength ' + @ck.tp[:date] if @ck.tp[:date] and @ck.tp[:date].length >@ck.lt_date + puts @ck.tp[:date_created].length.to_s + ' checklength ' + @ck.tp[:date_created] if @ck.tp[:date_created] and @ck.tp[:date_created].length >@ck.lt_date + puts @ck.tp[:date_issued].length.to_s + ' checklength ' + @ck.tp[:date_issued] if @ck.tp[:date_issued] and @ck.tp[:date_issued].length >@ck.lt_date + puts @ck.tp[:date_valid].length.to_s + ' checklength ' + @ck.tp[:date_valid] if @ck.tp[:date_valid] and @ck.tp[:date_valid].length >@ck.lt_date + puts @ck.tp[:date_available].length.to_s + ' checklength ' + @ck.tp[:date_available] if @ck.tp[:date_available] and @ck.tp[:date_available].length >@ck.lt_date + puts @ck.tp[:date_modified].length.to_s + ' checklength ' + @ck.tp[:date_modified] if @ck.tp[:date_modified] and @ck.tp[:date_modified].length >@ck.lt_date + puts @ck.tp[:date_translated].length.to_s + ' checklength ' + @ck.tp[:date_translated] if @ck.tp[:date_translated] and @ck.tp[:date_translated].length >@ck.lt_date + puts @ck.tp[:date_added_to_site].length.to_s + ' checklength ' + @ck.tp[:date_added_to_site] if @ck.tp[:date_added_to_site] and @ck.tp[:date_added_to_site].length >@ck.lt_date + puts @ck.tp[:type].length.to_s + ' checklength ' + @ck.tp[:type] if @ck.tp[:type] and @ck.tp[:type].length >@ck.lt_type + puts @ck.tp[:format].length.to_s + ' checklength ' + @ck.tp[:format] if @ck.tp[:format] and @ck.tp[:format].length >@ck.lt_format + puts @ck.tp[:identifier].length.to_s + ' checklength ' + @ck.tp[:identifier] if @ck.tp[:identifier] and @ck.tp[:identifier].length >@ck.lt_identifier + puts @ck.tp[:source].length.to_s + ' checklength ' + @ck.tp[:source] if @ck.tp[:source] and @ck.tp[:source].length >@ck.lt_source + puts @ck.tp[:language].length.to_s + ' checklength ' + @ck.tp[:language] if @ck.tp[:language] and @ck.tp[:language].length >@ck.lt_language + puts @ck.tp[:language_original].length.to_s + ' checklength ' + @ck.tp[:language_original] if @ck.tp[:language_original] and @ck.tp[:language_original].length >@ck.lt_language_original + puts @ck.tp[:relation].length.to_s + ' checklength ' + @ck.tp[:relation] if @ck.tp[:relation] and @ck.tp[:relation].length >@ck.lt_relation + puts @ck.tp[:coverage].length.to_s + ' checklength ' + @ck.tp[:coverage] if @ck.tp[:coverage] and @ck.tp[:coverage].length >@ck.lt_coverage + puts @ck.tp[:rights].length.to_s + ' checklength ' + @ck.tp[:rights] if @ck.tp[:rights] and @ck.tp[:rights].length >@ck.lt_rights + puts @ck.tp[:copyright].length.to_s + ' checklength ' + @ck.tp[:copyright] if @ck.tp[:copyright] and @ck.tp[:copyright].length >@ck.lt_copyright + puts @ck.tp[:owner].length.to_s + ' checklength ' + @ck.tp[:owner] if @ck.tp[:owner] and @ck.tp[:owner].length >@ck.lt_owner + puts @ck.tp[:keywords].length.to_s + ' checklength ' + @ck.tp[:keywords] if @ck.tp[:keywords] and @ck.tp[:keywords].length >@ck.lt_keywords + puts @ck.tp[:abstract].length.to_s + ' checklength ' + @ck.tp[:abstract] if @ck.tp[:abstract] and @ck.tp[:abstract].length >@ck.lt_abstract + puts @ck.tp[:comment].length.to_s + ' checklength ' + @ck.tp[:comment] if @ck.tp[:comment] and @ck.tp[:comment].length >@ck.lt_comment + puts @ck.tp[:loc].length.to_s + ' checklength ' + @ck.tp[:loc] if @ck.tp[:loc] and @ck.tp[:loc].length >@ck.lt_loc + puts @ck.tp[:dewey].length.to_s + ' checklength ' + @ck.tp[:dewey] if @ck.tp[:dewey] and @ck.tp[:dewey].length >@ck.lt_dewey + puts @ck.tp[:isbn].length.to_s + ' checklength ' + @ck.tp[:isbn] if @ck.tp[:isbn] and @ck.tp[:isbn].length >@ck.lt_isbn + puts @ck.tp[:pg].length.to_s + ' checklength ' + @ck.tp[:pg] if @ck.tp[:pg] and @ck.tp[:pg].length >@ck.lt_pg + puts @ck.tp[:topic_register].length.to_s + ' checklength ' + @ck.tp[:topic_register] if @ck.tp[:topic_register] and @ck.tp[:topci_register].length >@ck.lt_topic_register + puts @ck.tp[:date] if @ck.tp[:date] and not @ck.tp[:date].empty? and @ck.tp[:date] !~/\d\d-\d\d-\d\d/ + end + end + end +end +__END__ +#+END_SRC + +* document header + +#+NAME: sisu_document_header +#+BEGIN_SRC text +encoding: utf-8 +- Name: SiSU + + - Description: documents, structuring, processing, publishing, search + db sql + + - Author: Ralph Amissah + + + - Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2019, + 2020, 2021, Ralph Amissah, + All Rights Reserved. + + - License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + - SiSU uses: + - Standard SiSU markup syntax, + - Standard SiSU meta-markup syntax, and the + - Standard SiSU object citation numbering and system + + - Homepages: + + + - Git + + + +#+END_SRC -- cgit v1.2.3