-*- mode: org -*- #+TITLE: sisu db sql #+DESCRIPTION: documents - structuring, various output representations & search #+FILETAGS: :sisu:db:sql: #+AUTHOR: Ralph Amissah #+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] #+COPYRIGHT: Copyright (C) 2015 - 2021 Ralph Amissah #+LANGUAGE: en #+STARTUP: content hideblocks hidestars noindent entitiespretty #+OPTIONS: H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t #+PROPERTY: header-args :exports code #+PROPERTY: header-args+ :noweb yes #+PROPERTY: header-args+ :eval no #+PROPERTY: header-args+ :results no #+PROPERTY: header-args+ :cache no #+PROPERTY: header-args+ :padline no * dbi.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/dbi.rb" # <> module SiSU_DBI #% database building require_relative 'se' # se.rb include SiSU_Env; include SiSU_Screen require_relative 'dp' # dp.rb include SiSU_Param require_relative 'db_dbi' # db_dbi.rb include SiSU_DbDBI require_relative 'html_lite_shared' # html_lite_shared.rb include SiSU_FormatShared class SQL def initialize(opt) @opt=opt @db=SiSU_Env::InfoDb.new if @opt.act[:psql][:set]==:on \ or @opt.act[:sqlite][:set]==:on @sql_type=if @opt.act[:psql][:set]==:on maintenance_check(@opt,__FILE__,__LINE__) if @opt.act[:maintenance][:set]==:on :pg elsif @opt.act[:psql][:set]==:on maintenance_check(@opt,__FILE__,__LINE__) if @opt.act[:maintenance][:set]==:on :pg elsif @opt.act[:sqlite][:set]==:on maintenance_check(@opt,__FILE__,__LINE__) if @opt.act[:maintenance][:set]==:on :sqlite elsif @opt.act[:sqlite][:set]==:on maintenance_check(@opt,__FILE__,__LINE__) if @opt.act[:maintenance][:set]==:on :sqlite else maintenance_check(@opt,__FILE__,__LINE__) if @opt.act[:maintenance][:set]==:on :sqlite end if @sql_type==:pg then SiSU_Env::Load.new('pg',true).prog elsif @sql_type==:sqlite then SiSU_Env::Load.new('sqlite3',true).prog end end end def maintenance_check(opt,file,line) p opt.selections.str p "at #{file} #{line}" end def read_psql begin begin require 'pg' rescue LoadError SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). error('pg NOT FOUND (LoadError)') end @conn=@db.psql.conn_pg rescue if @opt.act[:psql_create][:set]==:on cX=SiSU_Screen::Ansi.new(@opt.act[:color_state][:set]).cX puts <<-WOK manually create the database: "#{cX.green}#{@db.db}#{cX.off}" if it does not yet exist #{cX.yellow}createdb #{@db.db}#{cX.off} WOK #sudo su -p postgres; createdb #{@db.db}; #[createuser?] end ensure end end def read_sqlite begin begin begin require 'sqlite3' rescue LoadError SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). error('sqlite3 NOT FOUND (LoadError)') end @conn=@db.sqlite.conn_sqlite3 rescue LoadError errmsg='sqlite3 NOT FOUND (LoadError)' if @opt.act[:no_stop][:set]==:on SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). error(errmsg + ', ' + 'attempt to proceed without sqlite output (as requested)') else SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). error(errmsg + ', ' + 'STOPPING') exit end end end end def connect case @sql_type when :pg then read_psql #read_pg when :sqlite then read_sqlite end SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], "SQL DB #{@sql_type.upcase}", @opt.fno ).dbi_title unless @opt.act[:quiet][:set]==:on begin SiSU_DbDBI::Case.new(@opt,@conn,@sql_type).cases rescue SiSU_Errors::Rescued.new($!,$@,@cf,@opt.fns).location do __LINE__.to_s + ':' + __FILE__ end ensure end begin rescue connect end end end end __END__ #+END_SRC * dbi_discrete.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/dbi_discrete.rb" # <> module SiSU_DBI_Discrete #% database building require_relative 'se' # se.rb include SiSU_Env; include SiSU_Screen require_relative 'dp' # dp.rb include SiSU_Param require_relative 'db_dbi' # db_dbi.rb include SiSU_DbDBI require_relative 'html_lite_shared' # html_lite_shared.rb include SiSU_FormatShared begin require 'fileutils' include FileUtils::Verbose rescue LoadError SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). error('fileutils NOT FOUND (LoadError)') end class SQL def initialize(opt) SiSU_Env::Load.new('sqlite3',true).prog @opt=opt @particulars=SiSU_Particulars::CombinedSingleton.instance.get_all(opt) @md=@particulars.md if @opt.act[:sqlite][:set]==:on @sql_type=:sqlite if @opt.act[:maintenance][:set]==:on maintenance_check(@opt,__FILE__,__LINE__) end end @output_path=@md.file.output_path.sqlite_discrete.dir @filename=@md.file.base_filename.sqlite_discrete @file_maint=sql_maintenance_file @file="#{@output_path}/#{@filename}" end def build prepare create_and_populate end def maintenance_check(opt,file,line) #p opt.selections.str p "at #{file} #{line}" end def prepare if not FileTest.directory?(@output_path) FileUtils::mkdir_p(@output_path) elsif @file FileUtils::rm_rf(@file) end end def db_exist?(db,conn) msg=%{no connection with sqlite database established, createdb "#{db.sqlite.db}"?} if (not (FileTest.file?(db.sqlite.db)) \ or FileTest.zero?(db.sqlite.db)) puts msg exit end if conn.is_a?(NilClass) puts msg exit end end def create_and_populate db=SiSU_Env::DbOp.new(@md) conn=db.sqlite_discrete.conn_sqlite3 sdb=SiSU_DbDBI::Create.new(@opt,conn,@file,:sqlite) sdb_index=SiSU_DbDBI::Index.new(@opt,conn,@file,:sqlite) sdb.output_dir? begin SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], 'SQLite (discrete)', "[#{@opt.f_pth[:lng_is]}] #{@opt.fno}" ).green_title_hi unless @opt.act[:quiet][:set]==:on sdb.create_db sdb.create_table.metadata_and_text sdb.create_table.doc_objects sdb.create_table.endnotes sdb.create_table.endnotes_asterisk sdb.create_table.endnotes_plus sdb.create_table.urls sdb_index.create_indexes db_exist?(db,conn) sdb_import=SiSU_DbDBI::Import.new(@opt,conn,@file_maint,:sqlite) sdb_import.marshal_load tell=SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], "sqlite3 #{db.sqlite.db} database?" ) tell.puts_grey if @opt.act[:verbose][:set]==:on rescue SiSU_Errors::Rescued.new($!,$@,'-d').location do __LINE__.to_s + ':' + __FILE__ end sdb.output_dir? end end def read_sqlite begin begin require 'sqlite3' @conn=@db.sqlite.conn_sqlite3 rescue LoadError SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia).error('sqlite3 not available') ensure Dir.chdir(@opt.f_pth[:pth]) end end end def connect begin SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], "DBI (#{@sql_type}) #{@opt.selections.str}", @opt.fns ).dbi_title unless @opt.act[:quiet][:set]==:on @db.sqlite_discrete.conn_sqlite3 rescue SiSU_Errors::Rescued.new($!,$@,@cf,@opt.fns).location do __LINE__.to_s + ':' + __FILE__ end ensure end end def populate end def sql_maintenance_file file=if @opt.act[:maintenance][:set]==:on if @opt.fns and not @opt.fns.empty? @env=SiSU_Env::InfoEnv.new(@opt.fns) if @opt.fns if @sql_type ==:sqlite puts "\n#{@env.processing_path.sqlite}/#{@opt.fns}.sql" end @db=SiSU_Env::InfoDb.new @job="sqlite3 #{@db.sqlite.db} < #{@env.processing_path.sqlite}/#{@opt.fns}.sql" if @sql_type ==:sqlite File.new("#{@env.processing_path.sqlite}/#{@opt.fns}.sql",'w+') else File.new("#{@env.processing_path.postgresql}/#{@opt.fns}.sql",'w+') end elsif @opt.fns \ and @opt.fns.inspect =~/create/ nil #sort variations later else nil end else nil end file end end end __END__ #+END_SRC * db_dbi.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/db_dbi.rb" # <> module SiSU_DbDBI require_relative 'db_columns' # db_columns.rb require_relative 'db_tests' # db_tests.rb require_relative 'db_create' # db_create.rb require_relative 'db_select' # db_select.rb require_relative 'db_indexes' # db_indexes.rb require_relative 'db_drop' # db_drop.rb require_relative 'db_remove' # db_remove.rb require_relative 'db_load_tuple' # db_load_tuple.rb require_relative 'db_import' # db_import.rb class ColumnSize < SiSU_DbColumns::ColumnSize # db_columns.rb end class Test < SiSU_DbTests::Test # db_tests.rb end class Create > module SiSU_DbText class Prepare def special_character_escape(str) str=str.to_s.gsub(/'/m,"''"). #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") gsub(/(\\)/m,'\1\1'). #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/m,"
\n"). gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/m,''). #check gsub(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/m,'[image: \1] \2'). gsub(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/m,'\1\2'). gsub(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/m,'\1') end def clean_searchable_text_from_document_objects(arr) en=[] arr=(arr.is_a?(String)) ? [ arr ] : arr txt_arr=arr.each.map do |s| s=s.gsub(/#{Mx[:fa_o]}[a-z]{1,4}#{Mx[:fa_o_c]}/m,''). gsub(/#{Mx[:fa_c_o]}[a-z]{1,4}#{Mx[:fa_c]}/m,''). gsub(/
/m,' ') en << s.scan(/#{Mx[:en_a_o]}\s*(.+?)\s*#{Mx[:en_a_c]}/m) s=s.gsub(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/m,''). gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,''). gsub(/ \s+/m,' ') #p s if s =~/[^ \nA-Za-z0-9'"`?!#@$%^&*=+,.;:\[\]()<>{}‹›|\\\/~_-]/ s end txt_arr=txt_arr << en txt=txt_arr.flatten.join("\n") special_character_escape(txt) end def clean_document_objects_body(arr) en=[] arr=(arr.is_a?(String)) ? [ arr ] : arr txt_arr=arr.each.map do |s| en << s.scan(/#{Mx[:en_a_o]}\s*(.+?)\s*#{Mx[:en_a_c]}/m) s=s. gsub(/#{Mx[:en_a_o]}\s*(\d+).+?#{Mx[:en_a_c]}/m, '\1'). gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,''). gsub(/ \s+/m,' ') s end en_arr=en.flatten.each.map do |e| e.sub(/^(\d+)\s*/,'\1 ') end txt_arr=txt_arr << en_arr txt=txt_arr.flatten.join("\n
") special_character_escape(txt) end def clean_searchable_text_from_document_source(arr) txt_arr,en=[],[] arr=(arr.is_a?(String)) ? arr.split(/\n+/m) : arr arr.each do |s| s=s.gsub(/([*\/_-])\{(.+?)\}\1/m,'\2'). gsub(/^(?:block|group|poem|code)\{/m,''). gsub(/^\}(?:block|group|poem|code)/m,''). gsub(/\A(?:@\S+:\s+.+)\Z/m,'') if s =~/^:A~/ if defined? @md.creator \ and defined? @md.creator.author \ and not @md.creator.author.empty? s=s.gsub(/@author/,@md.creator.author) else SiSU_Screen::Ansi.new( 'v', 'WARNING Document Author information missing; provide @creator: :author:', @md.fnb ).warn unless @md.opt.act[:quiet][:set]==:on end if defined? @md.title \ and defined? @md.title.full \ and not @md.title.full.empty? s=s.gsub(/@title/,@md.title.full) else SiSU_Screen::Ansi.new( 'v', 'WARNING Document Title missing; provide @title:', @md.fnb ).warn unless @md.opt.act[:quiet][:set]==:on end end s=s.gsub(/^(?:_[1-9]\*?|_\*)\s+/m,''). gsub(/^(?:[1-9]\~(\S+)?)\s+/m,''). gsub(/^(?::?[A-C]\~(\S+)?)\s+/m,''). gsub(/^%{1,3} .+/m,''). #removed even if contained in code block gsub(/
/m,' ') #en << s.scan(/~\{\s*(.+?)\s*\}~/m) s=s.gsub(/~\{.+?\}~/m,''). gsub(/ \s+/m,' ') ##special_character_escape(s) #p s if s =~/[^ \nA-Za-z0-9'"`?!#@$%^&*=+,.;:\[\]()<>{}‹›|\\\/~_-]/ s end txt_arr << arr << en txt=txt_arr.flatten.join("\n") txt=special_character_escape(txt) txt end def strip_markup(str) #define rules, make same as in dal clean str=str.gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]'). gsub(/(?: \\;|#{Mx[:nbsp]})+/,' '). gsub(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1'). #tables gsub(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' '). #tables gsub(/#{Mx[:tc_p]}/u,' '). #tables tidy later gsub(/<.+?>/,''). gsub(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] '). # else image names found in search gsub(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]'). # else image names found in search gsub(/\s\s+/,' '). strip end def unique_words(str) a=str.scan(/[a-zA-Z0-9\\\/_-]{2,}/) #a=str.scan(/\S+{2,}/) str=a.uniq.sort.join(' ') str end end end __END__ #+END_SRC * create drop import remove ** db_create.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/db_create.rb" # <> module SiSU_DbCreate require_relative 'db_columns' # db_columns.rb class Create < SiSU_DbColumns::Columns require_relative 'se' # se.rb @@dl=nil def initialize(opt,conn,file,sql_type=:pg) @opt,@conn,@file,@sql_type=opt,conn,file,sql_type @cX=SiSU_Screen::Ansi.new(@opt.act[:color_state][:set]).cX @comment=(@sql_type==:pg) \ ? (SiSU_DbCreate::Comment.new(@conn,@sql_type)) : nil @@dl ||=SiSU_Env::InfoEnv.new.digest.length end def available DBI.available_drivers.each do |driver| puts "Driver: #{driver}" DBI.data_sources(driver).each do |dsn| puts "\tDatasource: #{dsn}" end end end def create_db @env=SiSU_Env::InfoEnv.new(@opt.fns) tell=(@sql_type==:sqlite) \ ? SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], 'Create SQLite db tables in:', %{"#{@file}"} ) : SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], 'Create pgSQL db tables in:', %{"#{Db[:name_prefix]}#{@env.path.base_markup_dir_stub}"} ) if (@opt.act[:verbose][:set]==:on \ || @opt.act[:verbose_plus][:set]==:on \ || @opt.act[:maintenance][:set]==:on) tell.dark_grey_title_hi end SiSU_Env::SystemCall.new.create_pg_db(@env.path.base_markup_dir_stub) if @sql_type==:pg #watch use of path.base_markup_dir_stub instead of stub end def output_dir? dir=SiSU_Env::InfoEnv.new('') if @opt.act[:sqlite][:set]==:on dir.path.webserv_stub_ensure end end def create_table def conn_exec(sql) if @sql_type==:pg conn_exec_pg(sql) elsif @sql_type==:sqlite conn_exec_sqlite(sql) end end def conn_exec_pg(sql) begin @conn.exec_params(sql) rescue if @conn.is_a?(NilClass) errmsg="No pg connection (check pg dependencies)" if @opt.act[:no_stop][:set]==:on SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). error("#{errmsg}, proceeding without pg output (as requested)") else SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). error("#{errmsg}, STOPPING") exit end end end end def conn_exec_sqlite(sql) begin @conn.execute(sql) rescue if @conn.is_a?(NilClass) errmsg="No sqlite3 connection (check sqlite3 dependencies)" if @opt.act[:no_stop][:set]==:on SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). error("#{errmsg}, proceeding without sqlite output (as requested)") else SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). error("#{errmsg}, STOPPING") exit end end end end def metadata_and_text if (@opt.act[:verbose_plus][:set]==:on \ or @opt.act[:maintenance][:set]==:on) print %{ currently using sisu_dbi module to be populated from document files create tables metadata_and_text data import through ruby transfer } end create_metadata_and_text=%{ CREATE TABLE metadata_and_text ( tid BIGINT PRIMARY KEY, /* title */ #{column.title.create_column} #{column.title_main.create_column} #{column.title_sub.create_column} #{column.title_short.create_column} #{column.title_edition.create_column} #{column.title_note.create_column} #{column.title_language.create_column} #{column.title_language_char.create_column} /* creator */ #{column.creator_author.create_column} #{column.creator_author_honorific.create_column} #{column.creator_author_nationality.create_column} #{column.creator_editor.create_column} #{column.creator_contributor.create_column} #{column.creator_illustrator.create_column} #{column.creator_photographer.create_column} #{column.creator_translator.create_column} #{column.creator_prepared_by.create_column} #{column.creator_digitized_by.create_column} #{column.creator_audio.create_column} #{column.creator_video.create_column} /* language */ #{column.language_document.create_column} #{column.language_document_char.create_column} #{column.language_original.create_column} #{column.language_original_char.create_column} /* date */ #{column.date_added_to_site.create_column} #{column.date_available.create_column} #{column.date_created.create_column} #{column.date_issued.create_column} #{column.date_modified.create_column} #{column.date_published.create_column} #{column.date_valid.create_column} #{column.date_translated.create_column} #{column.date_original_publication.create_column} #{column.date_generated.create_column} /* publisher */ #{column.publisher.create_column} /* original */ #{column.original_publisher.create_column} #{column.original_language.create_column} #{column.original_language_char.create_column} #{column.original_source.create_column} #{column.original_institution.create_column} #{column.original_nationality.create_column} /* rights */ #{column.rights_all.create_column} #{column.rights_copyright_text.create_column} #{column.rights_copyright_translation.create_column} #{column.rights_copyright_illustrations.create_column} #{column.rights_copyright_photographs.create_column} #{column.rights_copyright_preparation.create_column} #{column.rights_copyright_digitization.create_column} #{column.rights_copyright_audio.create_column} #{column.rights_copyright_video.create_column} #{column.rights_license.create_column} /* classify */ #{column.classify_topic_register.create_column} #{column.classify_subject.create_column} #{column.classify_loc.create_column} #{column.classify_dewey.create_column} #{column.classify_keywords.create_column} /* identifier */ #{column.identifier_oclc.create_column} #{column.identifier_isbn.create_column} /* notes */ #{column.notes_abstract.create_column} #{column.notes_description.create_column} #{column.notes_comment.create_column} #{column.notes_history.create_column} #{column.notes_coverage.create_column} #{column.notes_relation.create_column} /* column.notes_source.create_column */ #{column.notes_type.create_column} #{column.notes_format.create_column} #{column.notes_prefix.create_column} #{column.notes_prefix_a.create_column} #{column.notes_prefix_b.create_column} #{column.notes_suffix.create_column} /* src */ #{column.src_filename.create_column} #{column.src_fingerprint.create_column} #{column.src_filesize.create_column} #{column.src_word_count.create_column} #{column.src_txt.create_column} /* misc */ #{column.fulltext.create_column} #{column.links.create_column.gsub(/,$/,'')} /* subj VARCHAR(64) NULL, */ /* contact VARCHAR(100) NULL, */ /* information VARCHAR(100) NULL, */ /* types CHAR(1) NULL, */ /* writing_focus_nationality VARCHAR(100) NULL, */ ); } conn_exec(create_metadata_and_text) @comment.psql.metadata_and_text if @comment end def doc_objects # create doc_objects base if (@opt.act[:verbose_plus][:set]==:on \ or @opt.act[:maintenance][:set]==:on) print %{ to be populated from documents files create tables doc_objects data import through ruby transfer } end create_doc_objects=%{ CREATE TABLE doc_objects ( lid BIGINT PRIMARY KEY, metadata_tid BIGINT REFERENCES metadata_and_text, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), clean TEXT NULL, body TEXT NULL, book_idx TEXT NULL, seg VARCHAR(256) NULL, lev_an VARCHAR(1), lev SMALLINT NULL, lev0 SMALLINT, lev1 SMALLINT, lev2 SMALLINT, lev3 SMALLINT, lev4 SMALLINT, lev5 SMALLINT, lev6 SMALLINT, lev7 SMALLINT, en_a SMALLINT NULL, en_z SMALLINT NULL, en_a_asterisk SMALLINT NULL, en_z_asterisk SMALLINT NULL, en_a_plus SMALLINT NULL, en_z_plus SMALLINT NULL, t_of VARCHAR(16), t_is VARCHAR(16), node VARCHAR(16) NULL, parent VARCHAR(16) NULL, digest_clean CHAR(#{@@dl}), digest_all CHAR(#{@@dl}), types CHAR(1) NULL ); } conn_exec(create_doc_objects) @comment.psql.doc_objects if @comment end def endnotes if (@opt.act[:verbose_plus][:set]==:on \ or @opt.act[:maintenance][:set]==:on) print %{ to be populated from document files create tables endnotes data import through ruby transfer } end create_endnotes=%{ CREATE TABLE endnotes ( nid BIGINT PRIMARY KEY, document_lid BIGINT REFERENCES doc_objects, nr SMALLINT, clean TEXT NULL, body TEXT NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), digest_clean CHAR(#{@@dl}), metadata_tid BIGINT REFERENCES metadata_and_text ); } conn_exec(create_endnotes) @comment.psql.endnotes if @comment end def endnotes_asterisk if (@opt.act[:verbose_plus][:set]==:on \ or @opt.act[:maintenance][:set]==:on) print %{ to be populated from document files create tables endnotes_asterisk data import through ruby transfer } end create_endnotes_asterisk=%{ CREATE TABLE endnotes_asterisk ( nid BIGINT PRIMARY KEY, document_lid BIGINT REFERENCES doc_objects, nr SMALLINT, clean TEXT NULL, body TEXT NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), digest_clean CHAR(#{@@dl}), metadata_tid BIGINT REFERENCES metadata_and_text ); } conn_exec(create_endnotes_asterisk) @comment.psql.endnotes_asterisk if @comment end def endnotes_plus if (@opt.act[:verbose_plus][:set]==:on \ or @opt.act[:maintenance][:set]==:on) print %{ to be populated from document files create tables endnotes_plus data import through ruby transfer } end create_endnotes_plus=%{ CREATE TABLE endnotes_plus ( nid BIGINT PRIMARY KEY, document_lid BIGINT REFERENCES doc_objects, nr SMALLINT, clean TEXT NULL, body TEXT NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), digest_clean CHAR(#{@@dl}), metadata_tid BIGINT REFERENCES metadata_and_text ); } conn_exec(create_endnotes_plus) @comment.psql.endnotes_plus if @comment end def urls # create doc_objects file links mapping if (@opt.act[:verbose_plus][:set]==:on \ or @opt.act[:maintenance][:set]==:on) print %{ currently using sisu_dbi module to be populated from doc_objects files create tables urls data import through ruby transfer } end create_urls=%{ CREATE TABLE urls ( metadata_tid BIGINT REFERENCES metadata_and_text, plaintext varchar(512), html_toc varchar(512), html_doc varchar(512), xhtml varchar(512), xml_sax varchar(512), xml_dom varchar(512), odf varchar(512), pdf_p varchar(512), pdf_l varchar(512), concordance varchar(512), latex_p varchar(512), latex_l varchar(512), digest varchar(512), manifest varchar(512), markup varchar(512), sisupod varchar(512) ); } conn_exec(create_urls) @comment.psql.urls if @comment end self end end class Comment < SiSU_DbColumns::Columns def initialize(conn,sql_type=:pg) @conn=conn if sql_type == :pg then psql end end def psql def conn_execute_array(sql_arr) @conn.transaction do |conn| sql_arr.each do |sql| conn.exec_params(sql) end end end def metadata_and_text sql_arr=[ %{COMMENT ON Table metadata_and_text IS 'contains SiSU metadata and fulltext for search (including source .sst if shared)';}, %{COMMENT ON COLUMN metadata_and_text.tid IS 'unique';}, %{#{column.title.column_comment}}, %{#{column.title_main.column_comment}}, %{#{column.title_sub.column_comment}}, %{#{column.title_short.column_comment}}, %{#{column.title_edition.column_comment}}, %{#{column.title_note.column_comment}}, %{#{column.title_language.column_comment}}, %{#{column.title_language_char.column_comment}}, %{#{column.creator_author.column_comment}}, %{#{column.creator_author_honorific.column_comment}}, %{#{column.creator_author_nationality.column_comment}}, %{#{column.creator_editor.column_comment}}, %{#{column.creator_contributor.column_comment}}, %{#{column.creator_illustrator.column_comment}}, %{#{column.creator_photographer.column_comment}}, %{#{column.creator_translator.column_comment}}, %{#{column.creator_prepared_by.column_comment}}, %{#{column.creator_digitized_by.column_comment}}, %{#{column.creator_audio.column_comment}}, %{#{column.creator_video.column_comment}}, %{#{column.language_document.column_comment}}, %{#{column.language_document_char.column_comment}}, %{#{column.language_original.column_comment}}, %{#{column.language_original_char.column_comment}}, %{#{column.date_added_to_site.column_comment}}, %{#{column.date_available.column_comment}}, %{#{column.date_created.column_comment}}, %{#{column.date_issued.column_comment}}, %{#{column.date_modified.column_comment}}, %{#{column.date_published.column_comment}}, %{#{column.date_valid.column_comment}}, %{#{column.date_translated.column_comment}}, %{#{column.date_original_publication.column_comment}}, %{#{column.date_generated.column_comment}}, %{#{column.publisher.column_comment}}, %{#{column.original_publisher.column_comment}}, %{#{column.original_language.column_comment}}, %{#{column.original_language_char.column_comment}}, %{#{column.original_source.column_comment}}, %{#{column.original_institution.column_comment}}, %{#{column.original_nationality.column_comment}}, %{#{column.rights_all.column_comment}}, %{#{column.rights_copyright_text.column_comment}}, %{#{column.rights_copyright_translation.column_comment}}, %{#{column.rights_copyright_illustrations.column_comment}}, %{#{column.rights_copyright_photographs.column_comment}}, %{#{column.rights_copyright_preparation.column_comment}}, %{#{column.rights_copyright_digitization.column_comment}}, %{#{column.rights_copyright_audio.column_comment}}, %{#{column.rights_copyright_video.column_comment}}, %{#{column.rights_license.column_comment}}, %{#{column.classify_topic_register.column_comment}}, %{#{column.classify_subject.column_comment}}, %{#{column.classify_loc.column_comment}}, %{#{column.classify_dewey.column_comment}}, %{#{column.classify_keywords.column_comment}}, %{#{column.identifier_oclc.column_comment}}, %{#{column.identifier_isbn.column_comment}}, %{#{column.notes_abstract.column_comment}}, %{#{column.notes_comment.column_comment}}, %{#{column.notes_description.column_comment}}, %{#{column.notes_history.column_comment}}, %{#{column.notes_coverage.column_comment}}, %{#{column.notes_relation.column_comment}}, %{#{column.notes_type.column_comment}}, %{#{column.notes_format.column_comment}}, %{#{column.notes_prefix.column_comment}}, %{#{column.notes_prefix_a.column_comment}}, %{#{column.notes_prefix_b.column_comment}}, %{#{column.notes_suffix.column_comment}}, %{#{column.src_filename.column_comment}}, %{#{column.src_fingerprint.column_comment}}, %{#{column.src_filesize.column_comment}}, %{#{column.src_word_count.column_comment}}, %{#{column.src_txt.column_comment}}, %{#{column.fulltext.column_comment}}, %{#{column.links.column_comment}}, ] conn_execute_array(sql_arr) end def doc_objects sql_arr=[ %{COMMENT ON Table doc_objects IS 'contains searchable text of SiSU document objects';}, %{COMMENT ON COLUMN doc_objects.lid IS 'unique';}, %{COMMENT ON COLUMN doc_objects.metadata_tid IS 'tie to title in metadata_and_text';}, %{COMMENT ON COLUMN doc_objects.lev_an IS 'doc level A-D 1-4';}, %{COMMENT ON COLUMN doc_objects.lev IS 'doc level 0-7 \d\~';}, %{COMMENT ON COLUMN doc_objects.seg IS 'segment name from level number 4 (lv 1)';}, %{COMMENT ON COLUMN doc_objects.ocn IS 'object citation number';}, %{COMMENT ON COLUMN doc_objects.en_a IS 'first endnote number in text object (eg. NULL or 34) (used with en_z to create range)';}, %{COMMENT ON COLUMN doc_objects.en_z IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a to create range)';}, %{COMMENT ON COLUMN doc_objects.en_a_asterisk IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_asterisk to create range)';}, %{COMMENT ON COLUMN doc_objects.en_z_asterisk IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_asterisk to create range)';}, %{COMMENT ON COLUMN doc_objects.en_a_plus IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_plus to create range)';}, %{COMMENT ON COLUMN doc_objects.en_z_plus IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_plus to create range)';}, %{COMMENT ON COLUMN doc_objects.types IS 'document types seg scroll';}, %{COMMENT ON COLUMN doc_objects.clean IS 'text object - substantive text: clean, stripped of markup';}, %{COMMENT ON COLUMN doc_objects.body IS 'text object - substantive text: light html markup';}, %{COMMENT ON COLUMN doc_objects.book_idx IS 'book index creation information for paragraph, if provided';}, %{COMMENT ON COLUMN doc_objects.lev0 IS 'document structure, level number 0';}, %{COMMENT ON COLUMN doc_objects.lev1 IS 'document structure, level number 1';}, %{COMMENT ON COLUMN doc_objects.lev2 IS 'document structure, level number 2';}, %{COMMENT ON COLUMN doc_objects.lev3 IS 'document structure, level number 3';}, %{COMMENT ON COLUMN doc_objects.lev4 IS 'document structure, level number 4';}, %{COMMENT ON COLUMN doc_objects.lev5 IS 'document structure, level number 5';}, %{COMMENT ON COLUMN doc_objects.lev6 IS 'document structure, level number 6';}, %{COMMENT ON COLUMN doc_objects.lev7 IS 'document structure, level number 7';}, %{COMMENT ON COLUMN doc_objects.t_of IS 'document structure, type of object (object is of)';}, %{COMMENT ON COLUMN doc_objects.t_is IS 'document structure, object is';}, %{COMMENT ON COLUMN doc_objects.node IS 'document structure, object node if heading';}, %{COMMENT ON COLUMN doc_objects.parent IS 'document structure, object parent (is a heading)';} ] conn_execute_array(sql_arr) end def endnotes sql_arr=[ %{COMMENT ON Table endnotes IS 'contains searchable text of SiSU documents endnotes';}, %{COMMENT ON COLUMN endnotes.nid IS 'unique';}, %{COMMENT ON COLUMN endnotes.document_lid IS 'ties to text block from which referenced';}, %{COMMENT ON COLUMN endnotes.nr IS 'endnote number ';}, %{COMMENT ON COLUMN endnotes.clean IS 'endnote substantive content, stripped of markup';}, %{COMMENT ON COLUMN endnotes.body IS 'endnote substantive content';}, %{COMMENT ON COLUMN endnotes.ocn IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, %{COMMENT ON COLUMN doc_objects.metadata_tid IS 'tie to title in metadata_and_text - unique for each document';} ] conn_execute_array(sql_arr) end def endnotes_asterisk sql_arr=[ %{COMMENT ON Table endnotes_asterisk IS 'contains searchable text of SiSU documents endnotes marked with asterisk';}, %{COMMENT ON COLUMN endnotes_asterisk.nid IS 'unique';}, %{COMMENT ON COLUMN endnotes_asterisk.document_lid IS 'ties to text block from which referenced';}, %{COMMENT ON COLUMN endnotes_asterisk.nr IS 'endnote number ';}, %{COMMENT ON COLUMN endnotes_asterisk.clean IS 'endnote substantive content, stripped of markup';}, %{COMMENT ON COLUMN endnotes_asterisk.body IS 'endnote substantive content';}, %{COMMENT ON COLUMN endnotes_asterisk.ocn IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, %{COMMENT ON COLUMN doc_objects.metadata_tid IS 'tie to title in metadata_and_text - unique for each document';} ] conn_execute_array(sql_arr) end def endnotes_plus sql_arr=[ %{COMMENT ON Table endnotes_plus IS 'contains searchable text of SiSU documents endnotes marked with plus';}, %{COMMENT ON COLUMN endnotes_plus.nid IS 'unique';}, %{COMMENT ON COLUMN endnotes_plus.document_lid IS 'ties to text block from which referenced';}, %{COMMENT ON COLUMN endnotes_plus.nr IS 'endnote number ';}, %{COMMENT ON COLUMN endnotes_plus.clean IS 'endnote substantive content, stripped of markup';}, %{COMMENT ON COLUMN endnotes_plus.body IS 'endnote substantive content';}, %{COMMENT ON COLUMN endnotes_plus.ocn IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, %{COMMENT ON COLUMN doc_objects.metadata_tid IS 'tie to title in metadata_and_text - unique for each document';}, ] conn_execute_array(sql_arr) end def urls sql_arr=[ %{COMMENT ON Table urls IS 'contains base url links to different SiSU output';}, %{COMMENT ON COLUMN doc_objects.metadata_tid IS 'tie to title in metadata_and_text - unique for each document, the mapping of rows is one to one';}, %{COMMENT ON COLUMN urls.plaintext IS 'plaintext utf-8';}, %{COMMENT ON COLUMN urls.html_toc IS 'table of contents for segmented html document';}, %{COMMENT ON COLUMN urls.html_doc IS 'html document (scroll)';}, %{COMMENT ON COLUMN urls.xhtml IS 'xhtml document (scroll)';}, %{COMMENT ON COLUMN urls.xml_sax IS 'xml sax oriented document (scroll)';}, %{COMMENT ON COLUMN urls.xml_dom IS 'xml dom oriented document (scroll)';}, %{COMMENT ON COLUMN urls.odf IS 'opendocument format text';}, %{COMMENT ON COLUMN urls.pdf_p IS 'pdf portrait';}, %{COMMENT ON COLUMN urls.pdf_l IS 'pdf landscape';}, %{COMMENT ON COLUMN urls.concordance IS 'rudimentary document index linked to html';}, %{COMMENT ON COLUMN urls.latex_p IS 'latex portrait';}, %{COMMENT ON COLUMN urls.latex_l IS 'latex_landscape';}, %{COMMENT ON COLUMN urls.markup IS 'markup';}, %{COMMENT ON COLUMN urls.sisupod IS 'SiSU document format .tgz (all SiSU information on document)';}, ] conn_execute_array(sql_arr) end self end end end __END__ #+END_SRC ** db_drop.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/db_drop.rb" # <> module SiSU_DbDrop require_relative 'utils_response' # utils_response.rb class Drop include SiSU_Response def initialize(opt,conn,db_info,sql_type) @opt,@conn,@db_info,@sql_type=opt,conn,db_info,sql_type case @sql_type when :sqlite cascade='' else cascade='CASCADE' end @drop_table=[ "DROP TABLE metadata_and_text #{cascade};", "DROP TABLE doc_objects #{cascade};", "DROP TABLE urls #{cascade};", "DROP TABLE endnotes #{cascade};", "DROP TABLE endnotes_asterisk #{cascade};", "DROP TABLE endnotes_plus #{cascade};", ] end def drop def tables #% drop all tables begin msg_sqlite="as not all disk space is recovered after dropping the database << #{@db_info.sqlite.db} >>, you may be better off deleting the file, and recreating it as necessary" case @sql_type when :sqlite puts msg_sqlite ans=response?('remove sql database?') if ans \ and File.exist?(@db_info.sqlite.db) @conn.close File.unlink(@db_info.sqlite.db) db=SiSU_Env::InfoDb.new conn=db.sqlite.conn_sqlite3 sdb=SiSU_DbDBI::Create.new(@opt,conn,@db_info,@sql_type) sdb_index=SiSU_DbDBI::Index.new(@opt,conn,@db_info,@sql_type) sdb.output_dir? begin sdb.create_db sdb.create_table.metadata_and_text sdb.create_table.doc_objects sdb.create_table.endnotes sdb.create_table.endnotes_asterisk sdb.create_table.endnotes_plus sdb.create_table.urls sdb_index.create_indexes rescue SiSU_Errors::Rescued.new($!,$@,'-D').location do __LINE__.to_s + ':' + __FILE__ end sdb.output_dir? end exit else @conn.transaction @drop_table.each do |d| begin @conn.exec_params(d) rescue next end end @conn.commit end when :pg @conn.transaction @drop_table.each do |d| begin @conn.exec_params(d) rescue next end end @conn.commit end rescue case @sql_type when :sqlite ans=response?('remove sql database?') if ans and File.exist?(@db_info.sqlite.db); File.unlink(@db_info.sqlite.db) end else @drop_table.each do |d| begin @conn.exec_params(d) rescue next end end end ensure end end def indexes def conn_execute_array(sql_arr) @conn.transaction do |conn| sql_arr.each do |sql| begin conn.exec_params(sql) rescue next end end end end def base #% drop base indexes print "\n drop documents common indexes\n" unless @opt.act[:quiet][:set]==:on sql_arr=[ %{DROP INDEX idx_title;}, %{DROP INDEX idx_author;}, %{DROP INDEX idx_filename;}, %{DROP INDEX idx_topics;}, %{DROP INDEX idx_ocn;}, %{DROP INDEX idx_digest_clean;}, %{DROP INDEX idx_digest_all;}, %{DROP INDEX idx_lev0;}, %{DROP INDEX idx_lev1;}, %{DROP INDEX idx_lev2;}, %{DROP INDEX idx_lev3;}, %{DROP INDEX idx_lev4;}, %{DROP INDEX idx_lev5;}, %{DROP INDEX idx_lev6;}, %{DROP INDEX idx_endnote_nr;}, %{DROP INDEX idx_digest_en;}, %{DROP INDEX idx_endnote_nr_asterisk;}, %{DROP INDEX idx_endnote_asterisk;}, %{DROP INDEX idx_digest_en_asterisk;}, %{DROP INDEX idx_endnote_nr_plus;}, %{DROP INDEX idx_endnote_plus;}, %{DROP INDEX idx_digest_en_plus}, ] conn_execute_array(sql_arr) end def text #% drop TEXT indexes, sqlite print "\n drop documents TEXT indexes\n" unless @opt.act[:quiet][:set]==:on sql_arr=[ %{DROP INDEX idx_clean;}, %{DROP INDEX idx_endnote}, ] conn_execute_array(sql_arr) end self end indexes.base @opt.act[:psql][:set]==:on ? '' : indexes.text self end end end __END__ #+END_SRC ** db_import.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/db_import.rb" # <> module SiSU_DbImport require_relative 'db_columns' # db_columns.rb require_relative 'db_load_tuple' # db_load_tuple.rb require_relative 'db_sqltxt' # db_sqltxt.rb require_relative 'html_lite_shared' # html_lite_shared.rb class Import < SiSU_DbText::Prepare include SiSU_Param include SiSU_Screen include SiSU_DbAction @@dl=nil @@hname=nil attr_accessor :tp def initialize(opt,conn,file_maint,sql_type=:pg) @opt,@conn,@file_maint,@sql_type=opt,conn,file_maint,sql_type @cX=SiSU_Screen::Ansi.new(@opt.act[:color_state][:set]).cX @env=SiSU_Env::InfoEnv.new(@opt.fns) @dal="#{@env.processing_path.ao}" @fnb=if @opt.fns.empty? \ or @opt.selections.str.empty? '' else @md=SiSU_Param::Parameters.new(@opt).get @md.fnb end @fnc="#{@dal}/#{@opt.fns}.content.rbm" @@seg,@@seg_full='','' #create? consider placing field just before clean text as opposed to seg which contains seg(.html) name info seg_full would contain seg info for levels 5 & 6 where available eg seg_full may be 7.3 (level 5) and 7.3.1 (level 6) where seg is 7 @col=Hash.new('') @col[:ocn]='' @counter={} @db=SiSU_Env::InfoDb.new if @sql_type==:sqlite @driver_sqlite3= (@conn.inspect.match(/^(.{10})/)[1] \ == @db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) \ ? true : false end sql='SELECT MAX(lid) FROM doc_objects' begin @col[:lid] ||=0 @col[:lid]=@driver_sqlite3 \ ? @conn.execute( sql ).join.to_i : @conn.exec( sql ).getvalue(0,0).to_i rescue if @opt.act[:maintenance][:set]==:on puts "#{__FILE__}:#{__LINE__}" end end @col[:lid]=0 if @col[:lid].nil? or @col[:lid].to_s.empty? sql='SELECT MAX(nid) FROM endnotes' begin @id_n=@driver_sqlite3 \ ? @conn.execute( sql ).join.to_i : @conn.exec( sql ).getvalue(0,0).to_i @id_n ||=0 rescue if @opt.act[:maintenance][:set]==:on puts "#{__FILE__}:#{__LINE__}" end end @id_n =0 if @col[:lid].nil? or @col[:lid].to_s.empty? @col[:lv0]=@col[:lv1]=@col[:lv2]=@col[:lv3]=@col[:lv4]=@col[:lv5]=@col[:lv6]=@col[:lv7]=0 @db=SiSU_Env::InfoDb.new @pdf_fn=SiSU_Env::FileOp.new(@md).base_filename @@dl ||=SiSU_Env::InfoEnv.new.digest.length end def marshal_load require_relative 'ao' # ao.rb @ao_array=SiSU_AO::Source.new(@opt).get # ao file drawn here if (@opt.act[:verbose][:set]==:on \ || @opt.act[:verbose_plus][:set]==:on \ || @opt.act[:maintenance][:set]==:on) SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], "#{@db.psql.db}::#{@opt.fns}" ).puts_blue end SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], 'Marshal Load', @fnc ).puts_grey if @opt.act[:verbose][:set]==:on select_first_match=%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.src_filename = '#{@md.fns}' AND metadata_and_text.language_document_char = '#{@opt.lng}' ;} # note, for .ssm: @md.fns (is set during runtime & is) != @opt.fns @md.opt.fns file_exist=if @sql_type==:sqlite begin @conn.get_first_value(select_first_match) rescue SQLite3::Exception => e # not tested puts "Exception occurred" puts e SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:yellow).mark( "\n" \ + 'Attempting to initialize db' + "\n" \ + 'Creating db tables' ) sdb={ create: SiSU_DbDBI::Create.new(@opt,@conn,@file_maint,@sql_type), index: SiSU_DbDBI::Index.new(@opt,@conn,@file_maint,@sql_type), } db_action(sdb).create end else begin @conn.exec(select_first_match).field_values("tid")[0] rescue PG::Error => e err=[ e.result.error_field( PG::Result::PG_DIAG_SEVERITY ), e.result.error_field( PG::Result::PG_DIAG_SQLSTATE ), e.result.error_field( PG::Result::PG_DIAG_MESSAGE_PRIMARY ), e.result.error_field( PG::Result::PG_DIAG_MESSAGE_DETAIL ), e.result.error_field( PG::Result::PG_DIAG_MESSAGE_HINT ), e.result.error_field( PG::Result::PG_DIAG_STATEMENT_POSITION ), e.result.error_field( PG::Result::PG_DIAG_INTERNAL_POSITION ), e.result.error_field( PG::Result::PG_DIAG_INTERNAL_QUERY ), e.result.error_field( PG::Result::PG_DIAG_CONTEXT ), e.result.error_field( PG::Result::PG_DIAG_SOURCE_FILE ), e.result.error_field( PG::Result::PG_DIAG_SOURCE_LINE ), e.result.error_field( PG::Result::PG_DIAG_SOURCE_FUNCTION ), ] p err if err[2] =~/relation "\S+?" does not exist/ \ or err.inspect =~/relation "\S+?" does not exist/ SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:yellow).mark( "\n" \ + err[2] + "\n" \ + 'Attempting to initialize db' + "\n" \ + 'Creating db tables' ) sdb={ create: SiSU_DbDBI::Create.new(@opt,@conn,@file_maint,@sql_type), index: SiSU_DbDBI::Index.new(@opt,@conn,@file_maint,@sql_type), } db_action(sdb).create retry end end end if not file_exist t_d=[] # transaction_data t_d << db_import_metadata t_d << db_import_documents(@ao_array) t_d << db_import_urls(@ao_array,@fnc) #import OID on/off t_d=t_d.flatten if (@opt.act[:verbose_plus][:set]==:on \ || @opt.act[:maintenance][:set]==:on) puts @conn.class if defined? @conn.class puts @conn.driver_name if defined? @conn.driver_name puts @conn.driver if defined? @conn.driver end begin #% sql if @sql_type==:sqlite @conn.transaction do |conn| t_d.each do |sql| conn.execute(sql) end end #also 'execute' works for sqlite #@conn.execute("BEGIN") # t_d.each do |sql| # @conn.execute(sql) # end #@conn.execute("COMMIT") else #'do' works for postgresql @conn.exec("BEGIN") t_d.each do |sql| @conn.exec(sql) end @conn.exec("COMMIT") end rescue SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do __LINE__.to_s + ':' + __FILE__ end sqlfn="#{@env.processing_path.sql}/#{@md.fnb}.sql" sql=File.new(sqlfn,'w') t_d.each {|i| sql.puts i} p sqlfn if @opt.act[:maintenance][:set]==:on puts sql p @conn.methods.sort puts "#{__FILE__}:#{__LINE__}" end ensure end else if file_exist @db=SiSU_Env::InfoDb.new puts "\nfile #{@opt.fns} in language code #{@opt.lng} already exists in database #{@db.psql.db}, use --update instead?" end end end def pf_db_import_transaction_open end def pf_db_import_transaction_close end def book_idx_hash_to_str(book_idx) book_idx=book_idx ? book_idx : '' book_idx_str,book_subidx_part='','' if not book_idx.empty? book_idx_str='' book_idx.each_pair do |k0,v0| book_idx_str << %{#{k0}+#{v0[:plus]}} book_subidx_part='' if v0[:sub].length > 0 v0[:sub].each do |subterms| subterms.each_pair do |k1,v1| book_subidx_part << %{\n #{k1}+#{v1[:plus]} | } end end book_idx_str=book_idx_str + ':' + book_subidx_part end end end book_idx_str end def db_import_metadata #% import documents - populate database if (@opt.act[:verbose][:set]==:on \ || @opt.act[:verbose_plus][:set]==:on \ || @opt.act[:maintenance][:set]==:on) print %{ #{@cX.grey}import documents dbi_unit #{@cX.off} } end @tp={} @md=SiSU_Param::Parameters.new(@opt).get #% sisutxt & fulltxt if FileTest.exist?(@md.fns) txt_arr=IO.readlines(@md.fns,'') src=txt_arr.join("\n") src=special_character_escape(src) @tp[:sisutxt_f],@tp[:sisutxt_i]='sisutxt, ',"'#{src}', " txt=clean_searchable_text_from_document_source(txt_arr) #txt=special_character_escape(txt) @tp[:fulltxt_f],@tp[:fulltxt_i]='fulltxt, ',"'#{txt}', " end #% title if defined? @md.title.full \ and @md.title.full=~/\S+/ # DublinCore 1 - title #@tp[:title]=@md.title.full #special_character_escape(@tp[:title]) #@tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', " sql='SELECT MAX(tid) FROM metadata_and_text;' begin @@id_t ||=0 id_t=@driver_sqlite3 \ ? @conn.execute( sql ).join.to_i # { |x| id_t=x.join.to_i } : @conn.exec( sql ).getvalue(0,0).to_i @@id_t=id_t if id_t rescue if @opt.act[:maintenance][:set]==:on puts "#{__FILE__} #{__LINE__}" end end @@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks @title: if (@opt.act[:verbose][:set]==:on \ || @opt.act[:verbose_plus][:set]==:on \ || @opt.act[:maintenance][:set]==:on) puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}} end end ################ CLEAR ############## SiSU_DbDBI::Test.new(self,@opt).verify #% import title names, filenames (tuple) t=SiSU_DbTuple::LoadMetadata.new(@conn,@@id_t,@md,@file_maint) tuple=t.tuple tuple end def db_import_documents(ao_array) #% import documents - populate main database table, import into substantive database tables (tuple) begin @col[:tid]=@@id_t @en,@en_ast,@en_pls,@tuple_array=[],[],[],[] @col[:en_a],@col[:en_z]=nil,nil ao_array.each do |data| data.obj=data.obj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1'). gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1'). gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1'). gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1'). gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1'). gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1'). gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1'). gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1'). gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1'). gsub(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 '). gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check @col[:seg]=@@seg if data.of ==:para \ || data.of ==:heading \ || data.of ==:heading_insert \ || data.of ==:block \ || data.of ==:group # regular text what of code-blocks grouped text etc. notedata=data.obj.dup #% :headings if data.is==:heading \ && (data.ln.inspect=~/[0-3]/) ( @col[:lev], txt,@col[:ocn], @col[:lev_an], @col[:ocnd],@col[:ocns], @col[:t_of],@col[:t_is], @col[:node],@col[:parent], @col[:digest_clean],@col[:digest_all]= data.ln, data.obj,data.ocn, data.lv, data.odv,data.osp, data.of,data.is, data.node,data.parent, '','' ) @col[:lid]+=1 txt=endnotes(txt).extract_any body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_minus @col[:body]=clean_document_objects_body(body) plaintext=@col[:body].dup plaintext=strip_markup(plaintext) @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext) book_idx=book_idx_hash_to_str(data.idx) @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx) if @en[0] then @en_a,@en_z=@en[0].first,@en[0].last end if @en_ast[0] then @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last end if @en_pls[0] then @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last end t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) @tuple_array << t.tuple case @col[:lev] when /0/ then @col[:lv0]+=1 when /1/ then @col[:lv1]+=1 when /2/ then @col[:lv2]+=1 when /3/ then @col[:lv3]+=1 when /4/ then @col[:lv4]+=1 end @col[:lev]=@col[:plaintext]=@col[:body]='' elsif data.is==:heading \ && data.ln==4 ( @@seg,txt, @col[:ocn],@col[:lev_an], @col[:ocnd],@col[:ocns], @col[:t_of],@col[:t_is], @col[:node],@col[:parent], @col[:digest_clean],@col[:digest_all]= data.name,data.obj, data.ocn,data.lv, data.odv,data.osp, data.of,data.is, data.node,data.parent, '','' ) @col[:seg]=@@seg @col[:lv4]+=1 @col[:lid]+=1 @col[:lev]=4 @hname=if @col[:seg] \ and not @col[:seg].to_s.empty? @@hname=@col[:seg].to_s else @@hname end @env=SiSU_Env::InfoEnv.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" txt=endnotes(txt).extract_any body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus @col[:body]=clean_document_objects_body(body) plaintext=@col[:body].dup plaintext=strip_markup(plaintext) @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext) book_idx=book_idx_hash_to_str(data.idx) @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) @tuple_array << t.tuple @col[:lev]=@col[:plaintext]=@col[:body]='' elsif data.is==:heading \ && data.ln==5 ( txt, @col[:ocn],@col[:lev_an], @col[:ocnd],@col[:ocns], @col[:t_of],@col[:t_is], @col[:node],@col[:parent], @col[:digest_clean],@col[:digest_all]= data.obj, data.ocn,data.lv, data.odv,data.osp, data.of,data.is, data.node,data.parent, '','' ) @@seg_full=data.name if data.is==:heading \ && data.ln==5 \ && data.name #check data.name @@seg ||='' #nil # watch @col[:seg]=@@seg @col[:lv5]+=1 @col[:lid]+=1 @col[:lev]=5 @hname=if @col[:seg] \ and not @col[:seg].to_s.empty? @@hname=@col[:seg].to_s else @@hname end @env=SiSU_Env::InfoEnv.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" txt=endnotes(txt).extract_any body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus @col[:body]=clean_document_objects_body(body) plaintext=@col[:body].dup plaintext=strip_markup(plaintext) @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext) book_idx=book_idx_hash_to_str(data.idx) @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) @tuple_array << t.tuple @col[:lev]=@col[:plaintext]=@col[:body]='' elsif data.is==:heading \ && data.ln==6 txt, @col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]= data.obj,data.ocn, data.lv, data.odv, data.osp, data.of, data.is, data.node, data.parent, '', '' @@seg_full=data.name if data.is==:heading && data.ln==6 && data.name #check data.name @@seg ||='' #nil # watch @col[:seg]=@@seg @col[:lv6]+=1 @col[:lid]+=1 @col[:lev]=6 @hname=if @col[:seg] \ and not @col[:seg].to_s.empty? @@hname=@col[:seg].to_s else @@hname end @env=SiSU_Env::InfoEnv.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" txt=endnotes(txt).extract_any body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus @col[:body]=clean_document_objects_body(body) plaintext=@col[:body].dup plaintext=strip_markup(plaintext) @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext) book_idx=book_idx_hash_to_str(data.idx) @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) @tuple_array << t.tuple @col[:lev]=@col[:plaintext]=@col[:body]='' elsif data.is==:heading \ && data.ln==7 txt, @col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]= data.obj,data.ocn, data.lv, data.odv, data.osp, data.of, data.is, data.node, data.parent, '', '' @@seg_full=data.name if data.is==:heading && data.ln==7 && data.name #check data.name @@seg ||='' #nil # watch @col[:seg]=@@seg @col[:lv7]+=1 @col[:lid]+=1 @col[:lev]=7 @hname=if @col[:seg] \ and not @col[:seg].to_s.empty? @@hname=@col[:seg].to_s else @@hname end @env=SiSU_Env::InfoEnv.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" txt=endnotes(txt).extract_any body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus @col[:body]=clean_document_objects_body(body) plaintext=@col[:body].dup plaintext=strip_markup(plaintext) @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext) book_idx=book_idx_hash_to_str(data.idx) @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) @tuple_array << t.tuple @col[:lev]=@col[:plaintext]=@col[:body]='' #% :structure :layout :comment elsif data.of==:structure \ || data.of==:layout \ || data.of==:comment #added watch #% : else #% regular text @col[:lid]+=1 ( txt='' txt,@col[:ocn], @col[:ocnd],@col[:ocns], @col[:t_of],@col[:t_is], @col[:node],@col[:parent], @col[:digest_clean],@col[:digest_all], @col[:lev]= data.obj,data.ocn, data.odv,data.osp, data.of,data.is, '',data.parent, '','', 9 ) @hname=if @col[:seg] \ and not @col[:seg].to_s.empty? @@hname=@col[:seg].to_s else @@hname end @env=SiSU_Env::InfoEnv.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" txt=endnotes(txt).extract_any if @sql_type==:pg \ and txt.size > (SiSU_DbColumns::ColumnSize.new.document_clean - 1) # examine pg build & remove limitation puts "\n\nTOO LARGE (TXT - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nTEXT BODY\n#{@col[:body].size} object #{@col[:ocn]} -> #{@col[:body].slice(0..500)}") end txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} end @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] body=if data.is==:table SiSU_FormatShared::CSS_Format.new(@md,data).html_table elsif data.is==:code SiSU_FormatShared::CSS_Format.new(@md,data).code elsif defined? data.indent \ and defined? data.hang \ and data.indent =~/[1-9]/ \ and data.indent == data.hang SiSU_FormatShared::CSS_Format.new(@md,data).indent(data.indent) elsif defined? data.indent \ and defined? data.hang \ and data.hang =~/[0-9]/ \ and data.indent != data.hang SiSU_FormatShared::CSS_Format.new(@md,data).hang_indent(data.hang,data.indent) else SiSU_FormatShared::CSS_Format.new(@md,data).norm end @col[:body]=clean_document_objects_body(body) plaintext=@col[:body].dup plaintext=strip_markup(plaintext) @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext) book_idx=book_idx_hash_to_str(data.idx) @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx) t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) @tuple_array << t.tuple @en,@en_ast,@en_pls=[],[],[] @col[:en_a]=@col[:en_z]=nil @col[:lev]=@col[:plaintext]=@col[:body]=@col[:words]='' end if notedata =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ #% import into database endnotes tables endnote_array=notedata.scan(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) endnote_array.each do |inf| if inf[/#{Mx[:en_a_o]}\d+.+?#{Mx[:en_a_c]}/] if inf[/#{Mx[:en_a_o]}(\d+)(.+?)#{Mx[:en_a_c]}/] nr,txt,digest_clean=$1,$2.strip,0 end @id_n ||=0 @id_n+=1 txt=special_character_escape(txt) body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt) txt=strip_markup(txt) if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") end txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} end if txt en={ type: 'endnotes', id: @id_n, lid: @col[:lid], nr: nr, txt: txt, body: body, ocn: @col[:ocn], ocnd: @col[:ocnd], ocns: @col[:ocns], id_t: @@id_t, hash: digest_clean } t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint) @tuple_array << t.tuple end end end word_mode=notedata.scan(/\S+/) end if notedata =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ #% import into database endnotes tables endnote_array=notedata.scan(/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) endnote_array.each do |inf| if inf[/#{Mx[:en_b_o]}\*\d+.+?#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 if inf[/#{Mx[:en_b_o]}[*](\d+)(.+?)#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 nr,txt,digest_clean=$1,$2.strip,0 end @id_n+=1 txt=special_character_escape(txt) body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt) txt=strip_markup(txt) if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") end txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} end if txt en={ type: 'endnotes_asterisk', id: @id_n, lid: @col[:lid], nr: nr, txt: txt, body: body, ocn: @col[:ocn], ocnd: @col[:ocnd], ocns: @col[:ocns], id_t: @@id_t, hash: digest_clean } t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint) @tuple_array << t.tuple end end end word_mode=notedata.scan(/\S+/) end if notedata =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ #% import into database endnotes tables endnote_array=notedata.scan(/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) endnote_array.each do |inf| if inf[/#{Mx[:en_b_o]}\+\d+.+?#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 if inf[/#{Mx[:en_b_o]}[+](\d+)(.+?)#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 nr,txt,digest_clean=$1,$2.strip,0 end @id_n+=1 txt=special_character_escape(txt) body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt) txt=strip_markup(txt) if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") end txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} end if txt en={ type: 'endnotes_plus', id: @id_n, lid: @col[:lid], nr: nr, txt: txt, body: body, ocn: @col[:ocn], ocnd: @col[:ocnd], ocns: @col[:ocns], id_t: @@id_t, hash: digest_clean } t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint) @tuple_array << t.tuple end end end word_mode=notedata.scan(/\S+/) end end end rescue SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do __LINE__.to_s + ':' + __FILE__ end ensure end @tuple_array end def endnotes(txt) @txt=txt def extract_any if @txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ endnotes(@txt).range @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ @txt=endnotes(@txt).clean_text end @txt end def standard (@txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) \ ? @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) : nil end def asterisk (@txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) \ ? @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/) : nil end def plus (@txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) \ ? @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/) : nil end def clean_text(base_url=nil) @txt=if base_url @txt.gsub(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,%{\\1}). gsub(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,%{\\1}). gsub(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,%{\\1}) else @txt.gsub(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,'\1'). gsub(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,'\1'). gsub(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,'\1') end @txt end def range @col[:en_a]=@col[:en_z]=nil if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}|#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/ word_array=@txt.scan(/\S+/) word_array.each do |w| if w[/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/] # not tested since change 2003w31 @col[:en_a]=$1 unless @col[:en_a] @col[:en_z]=@col[:en_a].dup unless @col[:en_a] @col[:en_z]=$1 if @col[:en_a] end end end @col end self end def db_import_urls(dbi_unit,content) #% import documents OID - populate database begin @fnc=content @env=SiSU_Env::InfoEnv.new(@opt.fns) f,u={},{} if @fnb.empty? \ or @fnb.nil? p 'file output path error' #remove end if FileTest.file?("#{@md.file.output_path.txt.dir}/#{@md.file.base_filename.txt}")==true f[:txt],u[:txt]='plaintext,', "'#{@md.file.output_path.txt.url}/#{@md.file.base_filename.txt}'," end if FileTest.file?("#{@md.file.output_path.html_seg.dir}/#{@md.file.base_filename.html_segtoc}")==true f[:html_toc],u[:html_toc]='html_toc,', "'#{@md.file.output_path.html_seg.url}/#{@md.file.base_filename.html_segtoc}'," end if FileTest.file?("#{@md.file.output_path.html_scroll.dir}/#{@md.file.base_filename.html_scroll}")==true f[:html_doc],u[:html_doc]='html_doc,', "'#{@md.file.output_path.html_scroll.url}/#{@md.file.base_filename.html_scroll}'," end if FileTest.file?("#{@md.file.output_path.xhtml.dir}/#{@md.file.base_filename.xhtml}")==true f[:xhtml],u[:xhtml]='xhtml,', "'#{@md.file.output_path.xhtml.url}/#{@md.file.base_filename.xhtml}'," end if FileTest.file?("#{@md.file.output_path.xml_sax.dir}/#{@md.file.base_filename.xml_sax}")==true f[:xml_sax],u[:xml_sax]='xml_sax,', "'#{@md.file.output_path.xml_sax.url}/#{@md.file.base_filename.xml_sax}'," end if FileTest.file?("#{@md.file.output_path.xml_dom.dir}/#{@md.file.base_filename.xml_dom}")==true f[:xml_dom],u[:xml_dom]='xml_dom,', "'#{@md.file.output_path.xml_dom.url}/#{@md.file.base_filename.xml_dom}'," end if FileTest.file?("#{@md.file.output_path.epub.dir}/#{@md.file.base_filename.epub}")==true f[:epub],u[:epub]='epub,', "'#{@md.file.output_path.epub.url}/#{@md.file.base_filename.epub}'," end if FileTest.file?("#{@md.file.output_path.odt.dir}/#{@md.file.base_filename.odt}")==true f[:odf],u[:odf]='odf,', "'#{@md.file.output_path.odt.url}/#{@md.file.base_filename.odt}'," end if FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_p_a4}")==true #\ #or FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_p_letter}")==true f[:pdf_p],u[:pdf_p]='pdf_p,', "'#{@md.file.output_path.pdf.url}/#{@pdf_fn.pdf_p_a4}'," end if FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_l_a4}")==true #\ #or FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_l_letter}")==true f[:pdf_l],u[:pdf_l]='pdf_l,', "'#{@md.file.output_path.pdf.url}/#{@pdf_fn.pdf_l_a4}'," end if FileTest.file?("#{@md.file.output_path.html_concordance.dir}/#{@md.file.base_filename.html_concordance}")==true f[:concordance],u[:concordance]='concordance,', "'#{@md.file.output_path.html_concordance.url}/#{@md.file.base_filename.html_concordance}'," end #if FileTest.file?("#{@md.file.output_path.x.dir}/#{@md.file.base_filename.x}")==true # f[:latex_p],u[:latex_p]='latex_p,', "'#{@md.file.output_path.x.url}/#{@md.file.base_filename.x}'," #end ##if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.tex")==true ## f[:latex_p],u[:latex_p]='latex_p,', "'#{base}/#{@fnb}/#{@opt.fns}.tex'," ##end #if FileTest.file?("#{@md.file.output_path.x.dir}/#{@md.file.base_filename.x}")==true # f[:latex_l],u[:latex_l]='latex_l,', "'#{@md.file.output_path.x.url}/#{@md.file.base_filename.x}'," #end ##if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.landscape.tex")==true ## f[:latex_l],u[:latex_l]='latex_l,', "'#{base}/#{@fnb}/#{@opt}.fns}.landscape.tex'," ##end if FileTest.file?("#{@md.file.output_path.digest.dir}/#{@md.file.base_filename.digest}")==true f[:digest],u[:digest]='digest,', "'#{@md.file.output_path.digest.url}/#{@md.file.base_filename.digest}'," end if FileTest.file?("#{@md.file.output_path.manifest.dir}/#{@md.file.base_filename.manifest}")==true #revisit, was to be text, this is html f[:manifest],u[:manifest]='manifest,', "'#{@md.file.output_path.manifest.url}/#{@md.file.base_filename.manifest}'," end if FileTest.file?("#{@md.file.output_path.src.dir}/#{@md.file.base_filename.src}")==true f[:markup],u[:markup]='markup,', "'#{@md.file.output_path.src.url}/#{@md.file.base_filename.src}'," end if FileTest.file?("#{@md.file.output_path.sisupod.dir}/#{@md.file.base_filename.sisupod}")==true f[:sisupod],u[:sisupod]='sisupod,', "'#{@md.file.output_path.sisupod.url}/#{@md.file.base_filename.sisupod}'," end t=SiSU_DbTuple::LoadUrls.new(@conn,f,u,@@id_t,@opt,@file_maint) tuple=t.tuple rescue SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do __LINE__.to_s + ':' + __FILE__ end ensure end tuple end end end __END__ #+END_SRC ** db_remove.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/db_remove.rb" # <> module SiSU_DbRemove class Remove include SiSU_DbAction def initialize(opt,conn,file,sql_type) @opt,@conn,@file,@sql_type=opt,conn,file,sql_type @md=SiSU_Param::Parameters.new(@opt).get @fnb=@md.fnb @db=SiSU_Env::InfoDb.new end def remove driver_sqlite3=if @sql_type==:sqlite (@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) \ ? true : false end del_id=if driver_sqlite3 begin remove_selected=%{ SELECT tid FROM metadata_and_text WHERE src_filename = '#{@md.fns}' AND metadata_and_text.language_document_char = '#{@opt.lng}' ;} # note, for .ssm: @md.fns (is set during runtime & is) != @opt.fns @md.opt.fns @conn.get_first_value(remove_selected).to_i rescue SQLite3::Exception => e #not tested puts "Exception occurred" SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia).mark(e.inspect) SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:yellow).mark( "\n" \ + 'Attempting to initialize db' + "\n" \ + 'Creating db tables' ) sdb={ create: SiSU_DbDBI::Create.new(@opt,@conn,@file_maint,@sql_type), index: SiSU_DbDBI::Index.new(@opt,@conn,@file_maint,@sql_type), } db_action(sdb).create end else begin remove_selected=%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.src_filename = '#{@md.fns}' AND metadata_and_text.language_document_char = '#{@opt.lng}' ;} # note, for .ssm: @md.fns (is set during runtime & is) != @opt.fns @md.opt.fns x=@conn.exec(remove_selected) x.field_values("tid")[0] rescue PG::Error => e err=[ e.result.error_field( PG::Result::PG_DIAG_SEVERITY ), e.result.error_field( PG::Result::PG_DIAG_SQLSTATE ), e.result.error_field( PG::Result::PG_DIAG_MESSAGE_PRIMARY ), e.result.error_field( PG::Result::PG_DIAG_MESSAGE_DETAIL ), e.result.error_field( PG::Result::PG_DIAG_MESSAGE_HINT ), e.result.error_field( PG::Result::PG_DIAG_STATEMENT_POSITION ), e.result.error_field( PG::Result::PG_DIAG_INTERNAL_POSITION ), e.result.error_field( PG::Result::PG_DIAG_INTERNAL_QUERY ), e.result.error_field( PG::Result::PG_DIAG_CONTEXT ), e.result.error_field( PG::Result::PG_DIAG_SOURCE_FILE ), e.result.error_field( PG::Result::PG_DIAG_SOURCE_LINE ), e.result.error_field( PG::Result::PG_DIAG_SOURCE_FUNCTION ), ] SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia).mark("\n" + err.inspect) if err[2] =~/relation "\S+?" does not exist/ \ or err.inspect =~/relation "\S+?" does not exist/ SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:yellow).mark( "\n" \ + err[2] + "\n" \ + 'Attempting to initialize db' + "\n" \ + 'Creating db tables' ) sdb={ create: SiSU_DbDBI::Create.new(@opt,@conn,@file_maint,@sql_type), index: SiSU_DbDBI::Index.new(@opt,@conn,@file_maint,@sql_type), } db_action(sdb).create end end end if del_id sql_entry=[ "DELETE FROM endnotes WHERE metadata_tid = '#{del_id}';", "DELETE FROM endnotes_asterisk WHERE metadata_tid = '#{del_id}';", "DELETE FROM endnotes_plus WHERE metadata_tid = '#{del_id}';", "DELETE FROM doc_objects WHERE metadata_tid = '#{del_id}';", "DELETE FROM urls WHERE metadata_tid = '#{del_id}';", "DELETE FROM metadata_and_text WHERE metadata_and_text.tid = '#{del_id}';", ] if driver_sqlite3 @conn.transaction sql_entry.each do |s| begin @conn.execute(s) rescue next end end @conn.commit if driver_sqlite3 else sql_entry.each do |s| begin @conn.exec_params(s) rescue next end end end if @opt.act[:maintenance][:set]==:on @file.puts sql_entry end else if (@opt.act[:verbose][:set]==:on \ || @opt.act[:verbose_plus][:set]==:on \ || @opt.act[:maintenance][:set]==:on) SiSU_Screen::Ansi.new( @opt.selections.str, "no such file in database #{@db.psql.db}::#{@opt.fns}" ).puts_grey end end end end end __END__ #+END_SRC * db_load_tuple.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/db_load_tuple.rb" # <> module SiSU_DbTuple require_relative 'db_columns' # db_columns.rb class LoadDocuments require_relative 'dp' # dp.rb include SiSU_Param def initialize(conn,col,opt,file_maint) @conn,@col,@opt,@file_maint=conn,col,opt,file_maint @col[:lev]=@col[:lev].to_i unless @col[:lev].inspect=~/^[0-7]/ \ or @col[:lev]==0..7 @col[:lev]=9 end @col[:ocn]=0 unless @col[:ocn].inspect=~/\d+/ @cX=SiSU_Screen::Ansi.new(@opt.act[:color_state][:set]).cX end def tuple #% import line sql_entry=if @col[:en_a] "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, book_idx, ocn, ocnd, ocns, seg, lev0, lev1, lev2, lev3, lev4, lev5, lev6, lev7, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:book_idx]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv0]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:lv7]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" else "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, book_idx, ocn, ocnd, ocns, seg, lev0, lev1, lev2, lev3, lev4, lev5, lev6, lev7, t_of, t_is, node, parent, digest_clean, digest_all) " + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:book_idx]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv0]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:lv7]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" end if @opt.act[:maintenance][:set]==:on puts @file_maint.inspect puts sql_entry @file_maint.puts sql_entry elsif @opt.act[:verbose_plus][:set]==:on puts sql_entry end if @opt.act[:verbose][:set]==:on if @col[:lev].inspect =~/[0-35-7]/ lev=case @col[:lev].inspect when /0/ then ':A' when /1/ then ':B' when /2/ then ':C' when /3/ then ':D' when /5/ then ' 2' when /6/ then ' 3' when /7/ then ' 4' end puts %{#{lev}>\t#{@col[:lv0]}\t#{@col[:lv1]}\t#{@col[:lv2]}\t#{@col[:lv3]}\t#{@col[:lv4]}\t#{@col[:lv5]}\t#{@col[:lv6]}\t#{@col[:lv7]}\t#{@col[:ocn]}\t#{@col[:node]}\t#{@col[:ocns]}} elsif @col[:lev].inspect =~/[4]/ puts %{ #{@cX.green}1>#{@cX.off}\t#{@col[:lv0]}\t#{@col[:lv1]}\t#{@col[:lv2]}\t#{@col[:lv3]}\t#{@col[:lv4]}\t#{@col[:lv5]}\t#{@col[:lv6]}\t#{@col[:lv7]}\t#{@col[:ocn]}\t#{@col[:node]}\t#{@col[:ocns]}\t#{@col[:seg]}} end end sql_entry end end class LoadMetadata #< SiSU_DbColumns::Columns def initialize(conn,id,md,file_maint) @conn,@id,@md,@file_maint=conn,id,md,file_maint @tp=SiSU_DbColumns::Columns.new(md) end def tuple sql_entry="INSERT INTO metadata_and_text ( #{@tp.column.title.tuple[0]} #{@tp.column.title_main.tuple[0]} #{@tp.column.title_sub.tuple[0]} #{@tp.column.title_short.tuple[0]} #{@tp.column.title_edition.tuple[0]} #{@tp.column.title_note.tuple[0]} #{@tp.column.title_language.tuple[0]} #{@tp.column.title_language_char.tuple[0]} #{@tp.column.creator_author.tuple[0]} #{@tp.column.creator_author_honorific.tuple[0]} #{@tp.column.creator_author_nationality.tuple[0]} #{@tp.column.creator_editor.tuple[0]} #{@tp.column.creator_contributor.tuple[0]} #{@tp.column.creator_illustrator.tuple[0]} #{@tp.column.creator_photographer.tuple[0]} #{@tp.column.creator_translator.tuple[0]} #{@tp.column.creator_prepared_by.tuple[0]} #{@tp.column.creator_digitized_by.tuple[0]} #{@tp.column.creator_audio.tuple[0]} #{@tp.column.creator_video.tuple[0]} #{@tp.column.language_document.tuple[0]} #{@tp.column.language_document_char.tuple[0]} #{@tp.column.language_original.tuple[0]} #{@tp.column.language_original_char.tuple[0]} #{@tp.column.date_added_to_site.tuple[0]} #{@tp.column.date_available.tuple[0]} #{@tp.column.date_created.tuple[0]} #{@tp.column.date_issued.tuple[0]} #{@tp.column.date_modified.tuple[0]} #{@tp.column.date_published.tuple[0]} #{@tp.column.date_valid.tuple[0]} #{@tp.column.date_translated.tuple[0]} #{@tp.column.date_original_publication.tuple[0]} #{@tp.column.date_generated.tuple[0]} #{@tp.column.publisher.tuple[0]} #{@tp.column.original_publisher.tuple[0]} #{@tp.column.original_language.tuple[0]} #{@tp.column.original_language_char.tuple[0]} #{@tp.column.original_source.tuple[0]} #{@tp.column.original_institution.tuple[0]} #{@tp.column.original_nationality.tuple[0]} #{@tp.column.rights_all.tuple[0]} #{@tp.column.rights_copyright_text.tuple[0]} #{@tp.column.rights_copyright_translation.tuple[0]} #{@tp.column.rights_copyright_illustrations.tuple[0]} #{@tp.column.rights_copyright_photographs.tuple[0]} #{@tp.column.rights_copyright_preparation.tuple[0]} #{@tp.column.rights_copyright_digitization.tuple[0]} #{@tp.column.rights_copyright_audio.tuple[0]} #{@tp.column.rights_copyright_video.tuple[0]} #{@tp.column.rights_license.tuple[0]} #{@tp.column.classify_topic_register.tuple[0]} #{@tp.column.classify_subject.tuple[0]} #{@tp.column.classify_loc.tuple[0]} #{@tp.column.classify_dewey.tuple[0]} #{@tp.column.classify_keywords.tuple[0]} #{@tp.column.identifier_oclc.tuple[0]} #{@tp.column.identifier_isbn.tuple[0]} #{@tp.column.notes_abstract.tuple[0]} #{@tp.column.notes_description.tuple[0]} #{@tp.column.notes_comment.tuple[0]} #{@tp.column.notes_history.tuple[0]} #{@tp.column.notes_format.tuple[0]} #{@tp.column.notes_relation.tuple[0]} #{@tp.column.notes_coverage.tuple[0]} #{@tp.column.notes_type.tuple[0]} #{@tp.column.notes_prefix.tuple[0]} #{@tp.column.notes_prefix_a.tuple[0]} #{@tp.column.notes_prefix_b.tuple[0]} #{@tp.column.notes_suffix.tuple[0]} #{@tp.column.src_filename.tuple[0]} #{@tp.column.src_fingerprint.tuple[0]} #{@tp.column.src_filesize.tuple[0]} #{@tp.column.src_word_count.tuple[0]} #{@tp.column.src_txt.tuple[0]} #{@tp.column.fulltext.tuple[0]} tid) " + "VALUES ( #{@tp.column.title.tuple[1]} #{@tp.column.title_main.tuple[1]} #{@tp.column.title_sub.tuple[1]} #{@tp.column.title_short.tuple[1]} #{@tp.column.title_edition.tuple[1]} #{@tp.column.title_note.tuple[1]} #{@tp.column.title_language.tuple[1]} #{@tp.column.title_language_char.tuple[1]} #{@tp.column.creator_author.tuple[1]} #{@tp.column.creator_author_honorific.tuple[1]} #{@tp.column.creator_author_nationality.tuple[1]} #{@tp.column.creator_editor.tuple[1]} #{@tp.column.creator_contributor.tuple[1]} #{@tp.column.creator_illustrator.tuple[1]} #{@tp.column.creator_photographer.tuple[1]} #{@tp.column.creator_translator.tuple[1]} #{@tp.column.creator_prepared_by.tuple[1]} #{@tp.column.creator_digitized_by.tuple[1]} #{@tp.column.creator_audio.tuple[1]} #{@tp.column.creator_video.tuple[1]} #{@tp.column.language_document.tuple[1]} #{@tp.column.language_document_char.tuple[1]} #{@tp.column.language_original.tuple[1]} #{@tp.column.language_original_char.tuple[1]} #{@tp.column.date_added_to_site.tuple[1]} #{@tp.column.date_available.tuple[1]} #{@tp.column.date_created.tuple[1]} #{@tp.column.date_issued.tuple[1]} #{@tp.column.date_modified.tuple[1]} #{@tp.column.date_published.tuple[1]} #{@tp.column.date_valid.tuple[1]} #{@tp.column.date_translated.tuple[1]} #{@tp.column.date_original_publication.tuple[1]} #{@tp.column.date_generated.tuple[1]} #{@tp.column.publisher.tuple[1]} #{@tp.column.original_publisher.tuple[1]} #{@tp.column.original_language.tuple[1]} #{@tp.column.original_language_char.tuple[1]} #{@tp.column.original_source.tuple[1]} #{@tp.column.original_institution.tuple[1]} #{@tp.column.original_nationality.tuple[1]} #{@tp.column.rights_all.tuple[1]} #{@tp.column.rights_copyright_text.tuple[1]} #{@tp.column.rights_copyright_translation.tuple[1]} #{@tp.column.rights_copyright_illustrations.tuple[1]} #{@tp.column.rights_copyright_photographs.tuple[1]} #{@tp.column.rights_copyright_preparation.tuple[1]} #{@tp.column.rights_copyright_digitization.tuple[1]} #{@tp.column.rights_copyright_audio.tuple[1]} #{@tp.column.rights_copyright_video.tuple[1]} #{@tp.column.rights_license.tuple[1]} #{@tp.column.classify_topic_register.tuple[1]} #{@tp.column.classify_subject.tuple[1]} #{@tp.column.classify_loc.tuple[1]} #{@tp.column.classify_dewey.tuple[1]} #{@tp.column.classify_keywords.tuple[1]} #{@tp.column.identifier_oclc.tuple[1]} #{@tp.column.identifier_isbn.tuple[1]} #{@tp.column.notes_abstract.tuple[1]} #{@tp.column.notes_comment.tuple[1]} #{@tp.column.notes_description.tuple[1]} #{@tp.column.notes_history.tuple[1]} #{@tp.column.notes_format.tuple[1]} #{@tp.column.notes_relation.tuple[1]} #{@tp.column.notes_coverage.tuple[1]} #{@tp.column.notes_type.tuple[1]} #{@tp.column.notes_prefix.tuple[1]} #{@tp.column.notes_prefix_a.tuple[1]} #{@tp.column.notes_prefix_b.tuple[1]} #{@tp.column.notes_suffix.tuple[1]} #{@tp.column.src_filename.tuple[1]} #{@tp.column.src_fingerprint.tuple[1]} #{@tp.column.src_filesize.tuple[1]} #{@tp.column.src_word_count.tuple[1]} #{@tp.column.src_txt.tuple[1]} #{@tp.column.fulltext.tuple[1]} #{@id} );" if @md.opt.act[:maintenance][:set]==:on puts "maintenance mode on: creating sql transaction file (for last transaction set (document) only):\n\t#{@file_maint.inspect}" @file_maint.puts sql_entry end sql_entry end end class LoadUrls def initialize(conn,f,u,id,opt,file_maint) @conn,@f,@u,@id,@opt,@file_maint=conn,f,u,id,opt,file_maint end def tuple sql_entry="INSERT INTO urls (#{@f[:txt]} #{@f[:html_toc]} #{@f[:html_doc]} #{@f[:xhtml]} #{@f[:xml_sax]} #{@f[:xml_dom]} #{@f[:odf]} #{@f[:pdf_p]} #{@f[:pdf_l]} #{@f[:concordance]} #{@f[:latex_p]} #{@f[:latex_l]} #{@f[:manifest]} #{@f[:digest]} #{@f[:markup]} #{@f[:sisupod]} metadata_tid) " + "VALUES (#{@u[:txt]} #{@u[:html_toc]} #{@u[:html_doc]} #{@u[:xhtml]} #{@u[:xml_sax]} #{@u[:xml_dom]} #{@u[:odf]} #{@u[:pdf_p]} #{@u[:pdf_l]} #{@u[:concordance]} #{@u[:latex_p]} #{@u[:latex_l]} #{@u[:manifest]} #{@u[:digest]} #{@u[:markup]} #{@u[:sisupod]} #{@id});" if @opt.act[:maintenance][:set]==:on @file_maint.puts sql_entry end sql_entry end end class LoadEndnotes def initialize(conn,en,opt,file_maint) @conn,@en,@opt,@file_maint=conn,en,opt,file_maint end def tuple sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, ocn, ocnd, ocns, metadata_tid, digest_clean) " + "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');" if @opt.act[:maintenance][:set]==:on @file_maint.puts sql_entry end sql_entry end end end __END__ #+END_SRC * db_select.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/db_select.rb" # <> module SiSU_DbAction def db_action(sdb) @sdb=sdb def createdb @sdb[:create].output_dir? begin @sdb[:create].create_db rescue @sdb[:create].output_dir? end end def drop @sdb[:drop].drop.tables end def create @sdb[:create].output_dir? begin @sdb[:create].create_table.metadata_and_text @sdb[:create].create_table.doc_objects @sdb[:create].create_table.endnotes @sdb[:create].create_table.endnotes_asterisk @sdb[:create].create_table.endnotes_plus @sdb[:create].create_table.urls @sdb[:index].create_indexes rescue SiSU_Errors::Rescued.new($!,$@,'--sqlite').location @sdb[:create].output_dir? do __LINE__.to_s + ':' + __FILE__ end end end def import db_exist? @sdb[:import].marshal_load tell=case @sql_type when :sqlite SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], "sqlite3 #{@db.sqlite.db} database?" ) when :pg SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], "pgaccess or psql #{@db.psql.db} database?" ) else '???' end tell.puts_grey if @opt.act[:verbose][:set]==:on end def remove db_exist? @sdb[:remove_doc].remove end def update remove import end self end end module SiSU_DbSelect class Case include SiSU_DbAction def initialize(opt,conn='',sql_type=:pg) @opt,@conn,@sql_type=opt,conn,sql_type @db=SiSU_Env::InfoDb.new @file_maint=sql_maintenance_file @sdb={ create: SiSU_DbDBI::Create.new(@opt,@conn,@file_maint,@sql_type), index: SiSU_DbDBI::Index.new(@opt,@conn,@file_maint,@sql_type), drop: SiSU_DbDBI::Drop.new(@opt,@conn,@db,@sql_type), } if (@opt.act[:psql_import][:set]==:on \ || @opt.act[:psql_update][:set]==:on) \ or (@opt.act[:sqlite_import][:set]==:on \ || @opt.act[:sqlite_update][:set]==:on) @sdb[:import]=SiSU_DbDBI::Import.new(@opt,@conn,@file_maint,@sql_type) @sdb[:remove_doc]=SiSU_DbDBI::Remove.new(@opt,@conn,@file_maint,@sql_type) elsif (@opt.act[:psql_remove][:set]==:on \ or @opt.act[:sqlite_remove][:set]==:on) @sdb[:remove_doc]=SiSU_DbDBI::Remove.new(@opt,@conn,@file_maint,@sql_type) end end def db_exist? if @sql_type==:sqlite \ and (not (FileTest.file?(@db.sqlite.db)) \ or FileTest.zero?(@db.sqlite.db)) puts %{no connection with sqlite database established, you may need to run:\n} \ + %{ sisu --sqlite --createall\n} \ + %{ before attempting to populate the database} SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:yellow).mark( "\n" \ + 'Attempting to initialize db' + "\n" \ + 'Creating db tables' ) db_action(@sdb).create end if @conn.is_a?(NilClass) if @sql_type==:sqlite puts %{no connection with sqlite database established, you may need to run:\n} \ + %{ sisu --sqlite --createall\n} \ + %{ before attempting to populate the database} SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:yellow).mark( "\n" \ + 'Attempting to initialize db' + "\n" \ + 'Creating db tables' ) db_action(@sdb).create @db.sqlite.db else puts %{no connection with pg database established, you may need to run:\n} \ + %{ createdb "#{@db.psql.db}"\n} \ + %{ after that don't forget to run:\n} \ + %{ sisu --pg --createall\n} \ + %{ before attempting to populate the database} @db.psql.db end exit end end def sql_maintenance_file file=if @opt.act[:maintenance][:set]==:on if @opt.fns and not @opt.fns.empty? @env=SiSU_Env::InfoEnv.new(@opt.fns) if @opt.fns if @sql_type ==:sqlite puts "\n#{@env.processing_path.sqlite}/#{@opt.fns}.sql" end @db=SiSU_Env::InfoDb.new @job="sqlite3 #{@db.sqlite.db} < #{@env.processing_path.sqlite}/#{@opt.fns}.sql" if @sql_type ==:sqlite File.new("#{@env.processing_path.sqlite}/#{@opt.fns}.sql",'w+') else File.new("#{@env.processing_path.postgresql}/#{@opt.fns}.sql",'w+') end elsif @opt.fns \ and (@opt.act[:sqlite_create][:set] ==:on \ || @opt.act[:psql_create][:set] ==:on) nil #sort variations later else nil end else nil end file end def cases if @opt.act[:psql_drop][:set] ==:on \ or @opt.act[:sqlite_drop][:set] ==:on db_action(@sdb).drop end if @opt.act[:psql_createdb][:set] ==:on \ or @opt.act[:sqlite_createdb][:set] ==:on db_action(@sdb).createdb end if @opt.act[:psql_create][:set] ==:on \ or @opt.act[:sqlite_create][:set] ==:on db_action(@sdb).create end if @opt.act[:psql_update][:set] ==:on \ or @opt.act[:sqlite_update][:set] ==:on db_action(@sdb).update else if @opt.act[:psql_remove][:set] ==:on \ or @opt.act[:sqlite_remove][:set] ==:on db_action(@sdb).remove end if @opt.act[:psql_import][:set] ==:on \ or @opt.act[:sqlite_import][:set] ==:on db_action(@sdb).import end end end end end __END__ #+END_SRC * structure ** db_columns.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/db_columns.rb" # <> module SiSU_DbColumns require_relative 'se' # se.rb require_relative 'db_sqltxt' # db_sqltxt.rb class Columns < SiSU_DbText::Prepare def initialize(md=nil) @md=md @db=SiSU_Env::InfoDb.new #watch @lang ||=SiSU_i18n::Languages.new if defined? md.opt.act \ and ((md.opt.act[:psql_import][:set]==:on \ || md.opt.act[:psql_update][:set]==:on) \ or (md.opt.act[:sqlite_import][:set]==:on \ || md.opt.act[:sqlite_update][:set]==:on)) \ and FileTest.exist?(md.fns) txt_arr=IO.readlines(md.fns,'') src=txt_arr.join("\n") if @db.share_source? @sisutxt=special_character_escape(src) else @sisutxt='' end @fulltext=clean_searchable_text_from_document_objects(txt_arr) else @sisutxt,@fulltext='','' end end #% structures #def column_define # def varchar(name,size) # "#{name} VARCHAR(#{size}) NULL," # end #end =begin #% title @title: :subtitle: :short: :edition: :language: :note: =end def column def title # DublinCore 1 - title def name 'title' end def create_column "#{name} VARCHAR(#{Db[:col_title]}) NOT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata full document title [DC1]';} end def tuple if defined? @md.title.full \ and @md.title.full=~/\S+/ txt=@md.title.full txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def title_main def name 'title_main' end def create_column "#{name} VARCHAR(#{Db[:col_title_part]}) NOT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata main document title';} end def tuple if defined? @md.title.main \ and @md.title.main=~/\S+/ txt=@md.title.main txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def title_sub def name 'title_sub' end def create_column "#{name} VARCHAR(#{Db[:col_title_part]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document subtitle';} end def tuple if defined? @md.title.sub \ and @md.title.sub=~/\S+/ txt=@md.title.sub txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def title_short def name 'title_short' end def create_column "#{name} VARCHAR(#{Db[:col_title_part]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document short title if any';} end def tuple if defined? @md.title.short \ and @md.title.short=~/\S+/ txt=@md.title.short txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def title_edition def name 'title_edition' end def create_column "#{name} VARCHAR(#{Db[:col_title_edition]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document edition (version)';} end def tuple if defined? @md.title.edition \ and @md.title.edition=~/\S+/ txt=@md.title.edition txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def title_note def name 'title_note' end def create_column "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document notes associated with title';} end def tuple if defined? @md.title.note \ and @md.title.note=~/\S+/ txt=@md.title.note txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def title_language def name 'title_language' end def create_column "#{name} VARCHAR(#{Db[:col_language]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document language [DC12]';} end def tuple if @lang.list[@md.opt.lng][:n] txt=@lang.list[@md.opt.lng][:n] txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def title_language_char # consider def name 'title_language_char' end def create_column "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document language iso code';} end def tuple if defined? @md.opt.lng \ and @md.opt.lng=~/\S+/ txt=@md.opt.lng txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end =begin #% creator @creator: :author: :editor: :contributor: :illustrator: :photographer: :translator: :prepared_by: :digitized_by: :audio: :video: =end def creator_author # DublinCore 2 - creator/author (author) def name 'creator_author' end def create_column "#{name} VARCHAR(#{Db[:col_name]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document author (creator) [DC2]';} end def tuple if defined? @md.creator.author_detail \ and @md.creator.author_detail.is_a?(Array) \ and @md.creator.author_detail.length > 0 txt='' @md.creator.author_detail.each do |h| txt=txt + %{#{h[:the]}, #{h[:others]}; } end txt=txt.gsub(/[;, ]+\s*$/,'') txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def creator_author_honorific # consider def name 'creator_author_hon' end def create_column "#{name} VARCHAR(#{Db[:col_creator_misc_short]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document author honorific (title e.g, Ms. Dr. Prof.)';} end def tuple if defined? @md.creator.author_hon \ and @md.creator.author_hon=~/\S+/ txt=@md.creator.author_hon txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def creator_author_nationality # consider def name 'creator_author_nationality' end def create_column "#{name} VARCHAR(#{Db[:col_creator_misc_short]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata nationality of document author (creator)';} end def tuple if defined? @md.creator.author_nationality_detail \ and @md.creator.author_nationality=~/\S+/ txt=@md.creator.author_nationality_detail txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def creator_editor def name 'creator_editor' end def create_column "#{name} VARCHAR(#{Db[:col_name]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document editor name(s)';} end def tuple if defined? @md.creator.editor_detail \ and @md.creator.editor_detail.is_a?(Array) \ and @md.creator.editor_detail.length > 0 txt='' @md.creator.editor_detail.each do |h| txt=txt + %{#{h[:the]}, #{h[:others]}; } end txt=txt.gsub(/[;, ]+\s*$/,'') txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def creator_contributor # DublinCore 6 - contributor def name 'creator_contributor' end def create_column "#{name} VARCHAR(#{Db[:col_name]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document contributor name(s) [DC6]';} end def tuple if defined? @md.creator.contributor_detail \ and @md.creator.contributor_detail.is_a?(Array) \ and @md.creator.contributor_detail.length > 0 txt='' @md.creator.contributor_detail.each do |h| txt=txt + %{#{h[:the]}, #{h[:others]}; } end txt=txt.gsub(/[;, ]+\s*$/,'') txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def creator_illustrator def name 'creator_illustrator' end def create_column "#{name} VARCHAR(#{Db[:col_name]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document illustrator name(s)';} end def tuple if defined? @md.creator.illustrator_detail \ and @md.creator.illustrator_detail.is_a?(Array) \ and @md.creator.illustrator_detail.length > 0 txt='' @md.creator.illustrator_detail.each do |h| txt=txt + %{#{h[:the]}, #{h[:others]}; } end txt=txt.gsub(/[;, ]+\s*$/,'') txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def creator_photographer def name 'creator_photographer' end def create_column "#{name} VARCHAR(#{Db[:col_name]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document photographer name(s)';} end def tuple if defined? @md.creator.photographer_detail \ and @md.creator.photographer_detail.is_a?(Array) \ and @md.creator.photographer_detail.length > 0 txt='' @md.creator.photographer_detail.each do |h| txt=txt + %{#{h[:the]}, #{h[:others]}; } end txt=txt.gsub(/[;, ]+\s*$/,'') txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def creator_translator def name 'creator_translator' end def create_column "#{name} VARCHAR(#{Db[:col_name]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document translator name(s)';} end def tuple if defined? @md.creator.translator_detail \ and @md.creator.translator_detail.is_a?(Array) \ and @md.creator.translator_detail.length > 0 txt='' @md.creator.translator_detail.each do |h| txt=txt + %{#{h[:the]}, #{h[:others]}; } end txt=txt.gsub(/[;, ]+\s*$/,'') txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def creator_prepared_by def name 'creator_prepared_by' end def create_column "#{name} VARCHAR(#{Db[:col_name]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document prepared by name(s)';} end def tuple if defined? @md.creator.prepared_by_detail \ and @md.creator.prepared_by_detail.is_a?(Array) \ and @md.creator.prepared_by_detail.length > 0 txt='' @md.creator.prepared_by_detail.each do |h| txt=txt + %{#{h[:the]}, #{h[:others]}; } end txt=txt.gsub(/[;, ]+\s*$/,'') txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def creator_digitized_by def name 'creator_digitized_by' end def create_column "#{name} VARCHAR(#{Db[:col_name]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document digitized by name(s)';} end def tuple if defined? @md.creator.digitized_by_detail \ and @md.creator.digitized_by_detail.is_a?(Array) \ and @md.creator.digitized_by_detail.length > 0 txt='' @md.creator.digitized_by_detail.each do |h| txt=txt + %{#{h[:the]}, #{h[:others]}; } end txt=txt.gsub(/[;, ]+\s*$/,'') txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def creator_audio def name 'creator_audio' end def create_column "#{name} VARCHAR(#{Db[:col_name]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document audio by name(s)';} end def tuple if defined? @md.creator.audio_detail \ and @md.creator.audio_detail.is_a?(Array) \ and @md.creator.audio_detail.length > 0 txt='' @md.creator.audio_detail.each do |h| txt=txt + %{#{h[:the]}, #{h[:others]}; } end txt=txt.gsub(/[;, ]+\s*$/,'') txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def creator_video def name 'creator_video' end def create_column "#{name} VARCHAR(#{Db[:col_name]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document video by name(s)';} end def tuple if defined? @md.creator.video_detail \ and @md.creator.video_detail.is_a?(Array) \ and @md.creator.video_detail.length > 0 txt='' @md.creator.video_detail.each do |h| txt=txt + %{#{h[:the]}, #{h[:others]}; } end txt=txt.gsub(/[;, ]+\s*$/,'') txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end =begin #% language #taken from other fields @title: :language: @original: :language: #not available --> #@language: # :document: # :original: =end def language_document def name 'language_document' end def create_column "#{name} VARCHAR(#{Db[:col_language]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document language';} end def tuple if @lang.list[@md.opt.lng][:n] txt=@lang.list[@md.opt.lng][:n] txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def language_document_char def name 'language_document_char' end def create_column "#{name} VARCHAR(#{Db[:col_language_char]}) NOT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document language';} end def tuple #modify check, is now required, SiSUv3d_ if defined? @md.opt.lng \ and @md.opt.lng=~/\S+/ txt=@md.opt.lng txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def language_original def name 'language_original' end def create_column "#{name} VARCHAR(#{Db[:col_language]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata original document/text language';} end def tuple if defined? @md.language.original \ and @md.language.original=~/\S+/ txt=@md.language.original txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def language_original_char def name 'language_original_char' end def create_column "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document language';} end def tuple if defined? @md.language.original_char \ and @md.language.original_char=~/\S+/ txt=@md.language.original_char txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end =begin #% date @date: :added_to_site: :available: :created: :issued: :modified: :published: :valid: :translated: :original_publication: =end def date_added_to_site def name 'date_added_to_site' end def create_column "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," #"#{name} DATE," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata date added to site';} end def tuple if defined? @md.date.added_to_site \ and @md.date.added_to_site=~/\S+/ txt=@md.date.added_to_site txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def date_available def name 'date_available' end def create_column "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata date added to site [DC]';} end def tuple if defined? @md.date.available \ and @md.date.available=~/\S+/ txt=@md.date.available txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def date_created def name 'date_created' end def create_column "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata date created [DC]';} end def tuple if defined? @md.date.created \ and @md.date.created=~/\S+/ txt=@md.date.created txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def date_issued def name 'date_issued' end def create_column "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata date issued [DC]';} end def tuple if defined? @md.date.issued \ and @md.date.issued=~/\S+/ txt=@md.date.issued txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def date_modified def name 'date_modified' end def create_column "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata date modified [DC]';} end def tuple if defined? @md.date.modified \ and @md.date.modified=~/\S+/ txt=@md.date.modified txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def date_published def name 'date_published' end def create_column "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata date published [DC7]';} end def tuple if defined? @md.date.published \ and @md.date.published=~/\S+/ txt=@md.date.published txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def date_valid def name 'date_valid' end def create_column "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata date valid [DC]';} end def tuple if defined? @md.date.valid \ and @md.date.valid=~/\S+/ txt=@md.date.valid txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def date_translated def name 'date_translated' end def create_column "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata date translated';} end def tuple if defined? @md.date.translated \ and @md.date.translated=~/\S+/ txt=@md.date.translated txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def date_original_publication def name 'date_original_publication' end def create_column "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata date of original publication';} end def tuple if defined? @md.date.original_publication \ and @md.date.original_publication=~/\S+/ txt=@md.date.original_publication txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def date_generated def name 'date_generated' end def create_column #choose other representation of time "#{name} VARCHAR(30) NULL," #"#{name} VARCHAR(10) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata date of sisu generation of document, automatically populated';} end def tuple #choose other representation of time if defined? @md.generated \ and @md.generated.to_s=~/\S+/ txt=@md.generated.to_s txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end =begin #% publisher @publisher: =end def publisher def name 'publisher' end def create_column "#{name} VARCHAR(#{Db[:col_name]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document publisher [DC5]';} end def tuple if defined? @md.publisher \ and @md.publisher=~/\S+/ txt=@md.publisher txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end ##% current # def current_publisher # def name # 'current_publisher' # end # def size # 10 # end # def create_column # "#{name} VARCHAR(#{current_publisher.size}) NULL," # end # def tuple # t=if defined? @md.current.publisher \ # and @md.current.publisher=~/\S+/ # txt=@md.current.publisher # txt=special_character_escape(txt) # "'#{txt}', " # end # end # self # end =begin #% original @original: :publisher: #:date: #repeated under date :language: :institution: :nationality: :source: =end def original_publisher def name 'original_publisher' end def create_column "#{name} VARCHAR(#{Db[:col_name]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document original publisher [DC5]';} end def tuple if defined? @md.original.publisher \ and @md.original.publisher=~/\S+/ txt=@md.original.publisher txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def original_language def name 'original_language' end def create_column "#{name} VARCHAR(#{Db[:col_language]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document original language';} end def tuple if defined? @md.original.language \ and @md.original.language=~/\S+/ txt=@md.original.language txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def original_language_char # consider def name 'original_language_char' end def create_column "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document original language iso character';} end def tuple if defined? @md.original.language_char \ and @md.original.language_char=~/\S+/ txt=@md.original.language_char txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def original_source def name 'original_source' end def create_column "#{name} VARCHAR(#{Db[:col_name]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document original source [DC11]';} end def tuple if defined? @md.original.source \ and @md.original.source=~/\S+/ txt=@md.original.source txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def original_institution def name 'original_institution' end def create_column "#{name} VARCHAR(#{Db[:col_name]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document original institution';} end def tuple if defined? @md.original.institution \ and @md.original.institution=~/\S+/ txt=@md.original.institution txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def original_nationality def name 'original_nationality' end def create_column "#{name} VARCHAR(#{Db[:col_language]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document original nationality';} end def tuple if defined? @md.original.nationality \ and @md.original.nationality=~/\S+/ txt=@md.original.nationality txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end =begin #% rights @rights: #:copyright: #mapped to :text: used where no other copyrights and included in :all: :text: :translation: :illustrations: :photographs: :preparation: :digitization: :audio: :video: :license: :all: =end def rights_all def name 'rights' end def create_column "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata rights associated with document (composite) [DC15]';} end def tuple if defined? @md.rights.all \ and @md.rights.all=~/\S+/ txt=@md.rights.all txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def rights_copyright_text def name 'rights_copyright_text' end def create_column "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata copyright associated for document text';} end def tuple if defined? @md.rights.copyright_text \ and @md.rights.copyright_text=~/\S+/ txt=@md.rights.copyright_text txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def rights_copyright_translation def name 'rights_copyright_translation' end def create_column "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata copyright associated for document text translation (if any)';} end def tuple if defined? @md.rights.copyright_translation \ and @md.rights.copyright_translation=~/\S+/ txt=@md.rights.copyright_translation txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def rights_copyright_illustrations def name 'rights_copyright_illustrations' end def create_column "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata copyright associated for document text illustrations (if any)';} end def tuple if defined? @md.rights.copyright_illustrations \ and @md.rights.copyright_illustrations=~/\S+/ txt=@md.rights.copyright_illustrations txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def rights_copyright_photographs def name 'rights_copyright_photographs' end def create_column "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata copyright associated for document text photographs (if any)';} end def tuple if defined? @md.rights.copyright_photographs \ and @md.rights.copyright_photographs=~/\S+/ txt=@md.rights.copyright_photographs txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def rights_copyright_preparation def name 'rights_copyright_preparation' end def create_column "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata copyright associated for document text preparation (if any)';} end def tuple if defined? @md.rights.copyright_preparation \ and @md.rights.copyright_preparation=~/\S+/ txt=@md.rights.copyright_preparation txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def rights_copyright_digitization def name 'rights_copyright_digitization' end def create_column "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata copyright associated for document text digitization (if any)';} end def tuple if defined? @md.rights.copyright_digitization \ and @md.rights.copyright_digitization=~/\S+/ txt=@md.rights.copyright_digitization txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def rights_copyright_audio def name 'rights_copyright_audio' end def create_column "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata copyright associated for document text audio (if any)';} end def tuple if defined? @md.rights.copyright_audio \ and @md.rights.copyright_audio=~/\S+/ txt=@md.rights.copyright_audio txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def rights_copyright_video def name 'rights_copyright_video' end def create_column "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata copyright associated for document text video (if any)';} end def tuple if defined? @md.rights.copyright_video \ and @md.rights.copyright_video=~/\S+/ txt=@md.rights.copyright_video txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def rights_license def name 'rights_license' end def create_column "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata license granted for use of document if any)';} end def tuple if defined? @md.rights.license \ and @md.rights.license=~/\S+/ txt=@md.rights.license txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end =begin #% identifier @identifier: :oclc: :isbn: =end def identifier_oclc def name 'identifier_oclc' end def create_column "#{name} VARCHAR(#{Db[:col_library]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata identifier document Online Computer Library Center number';} end def tuple if defined? @md.identifier.oclc \ and @md.identifier.oclc=~/\S+/ txt=@md.identifier.oclc txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def identifier_isbn def name 'identifier_isbn' end def create_column "#{name} VARCHAR(#{Db[:col_small]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata identifier document isbn (if any)';} end def tuple if defined? @md.identifier.isbn \ and @md.identifier.isbn=~/\S+/ txt=@md.identifier.isbn txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end =begin #% classify @classify: :topic_register: :subject: :keywords: :type: :loc: :dewey: =end def classify_topic_register def name 'classify_topic_register' end def create_column "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata classify document topic register (semi-structured document subject information)';} end def tuple if defined? @md.classify.topic_register \ and @md.classify.topic_register=~/\S+/ txt=@md.classify.topic_register txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def classify_subject def name 'classify_subject' end def create_column "#{name} VARCHAR(#{Db[:col_txt_long]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata classify document subject matter [DC3]';} end def tuple if defined? @md.classify.subject \ and @md.classify.subject=~/\S+/ txt=@md.classify.subject txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def classify_loc def name 'classify_loc' end def create_column "#{name} VARCHAR(#{Db[:col_library]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata classify document Library of Congress';} end def tuple if defined? @md.classify.loc \ and @md.classify.loc=~/\S+/ txt=@md.classify.loc txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def classify_dewey def name 'classify_dewey' end def create_column "#{name} VARCHAR(#{Db[:col_library]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata classify document Dewey';} end def tuple if defined? @md.classify.dewey \ and @md.classify.dewey=~/\S+/ txt=@md.classify.dewey txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def classify_keywords def name 'classify_keywords' end def create_column "#{name} VARCHAR(#{Db[:col_txt_long]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata classify document keywords';} end def tuple if defined? @md.classify.keywords \ and @md.classify.keywords=~/\S+/ txt=@md.classify.keywords txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end =begin #% notes @notes: :abstract: :description: :comment: :coverage: :relation: :format: :history: :prefix: :prefix_a: :prefix_b: :suffix: =end def notes_abstract def name 'notes_abstract' end def create_column "#{name} TEXT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document notes abstract';} end def tuple if defined? @md.notes.abstract \ and @md.notes.abstract=~/\S+/ txt=@md.notes.abstract txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def notes_description def name 'notes_description' end def create_column "#{name} TEXT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document notes description [DC4]';} end def tuple if defined? @md.notes.description \ and @md.notes.description=~/\S+/ txt=@md.notes.description txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def notes_comment def name 'notes_comment' end def create_column "#{name} TEXT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document notes comment';} end def tuple if defined? @md.notes.comment \ and @md.notes.comment=~/\S+/ txt=@md.notes.comment txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def notes_coverage def name 'notes_coverage' end def create_column "#{name} VARCHAR(#{Db[:col_txt_short]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata classify document coverage [DC14]';} end def tuple if defined? @md.classify.coverage \ and @md.classify.coverage=~/\S+/ txt=@md.classify.coverage txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def notes_relation def name 'notes_relation' end def create_column "#{name} VARCHAR(#{Db[:col_txt_short]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata classify document relation [DC13]';} end def tuple if defined? @md.classify.relation \ and @md.classify.relation=~/\S+/ txt=@md.classify.relation txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def notes_history #check, consider removal def name 'notes_history' end def create_column "#{name} VARCHAR(#{Db[:col_txt_long]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document notes history';} end def tuple if defined? @md.notes.history \ and @md.notes.history=~/\S+/ txt=@md.notes.history txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def notes_type #check def name 'notes_type' end def create_column "#{name} VARCHAR(#{Db[:col_txt_long]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata notes document type [DC8]';} end def tuple if defined? @md.notes.type \ and @md.notes.type=~/\S+/ txt=@md.notes.type txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def notes_format def name 'notes_format' end def create_column "#{name} VARCHAR(#{Db[:col_txt_long]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata classify document format [DC9]';} end def tuple if defined? @md.classify.format \ and @md.classify.format=~/\S+/ txt=@md.classify.format txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def notes_prefix def name 'notes_prefix' end def create_column "#{name} TEXT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document notes prefix';} end def tuple if defined? @md.notes.prefix \ and @md.notes.prefix=~/\S+/ txt=@md.notes.prefix txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def notes_prefix_a def name 'notes_prefix_a' end def create_column "#{name} TEXT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document notes prefix_a';} end def tuple if defined? @md.notes.prefix_a \ and @md.notes.prefix_a=~/\S+/ txt=@md.notes.prefix_a txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def notes_prefix_b def name 'notes_prefix_b' end def create_column "#{name} TEXT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document notes prefix_b';} end def tuple if defined? @md.notes.prefix_b \ and @md.notes.prefix_b=~/\S+/ txt=@md.notes.prefix_b txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def notes_suffix def name 'notes_suffix' end def create_column # keep text "#{name} TEXT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document notes suffix';} end def tuple if defined? @md.notes.suffix \ and @md.notes.suffix=~/\S+/ txt=@md.notes.suffix txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end =begin #% src =end def src_filename def name 'src_filename' end def create_column "#{name} VARCHAR(#{Db[:col_filename]}) NOT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'sisu markup source text filename';} end def tuple if defined? @md.fns \ and @md.fns=~/\S+/ txt=@md.fns txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def src_fingerprint def name 'src_fingerprint' #hash/digest, sha512, sha256 or md5 end def create_column "#{name} VARCHAR(#{Db[:col_digest]}) NULL," #"#{name} TEXT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'sisu markup source text fingerprint, hash digest sha512, sha256 or md5';} end def tuple if defined? @md.dgst \ and @md.dgst.is_a?(Array) \ and @md.dgst[1]=~/\S+/ txt=@md.dgst[1] ["#{name}, ","'#{txt}', "] else ['',''] end end self end def src_filesize def name 'src_filesize' end def create_column "#{name} VARCHAR(#{Db[:col_filesize]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'sisu markup source text file size';} end def tuple if defined? @md.filesize \ and @md.filesize=~/\S+/ txt=@md.filesize txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def src_word_count def name 'src_word_count' end def create_column "#{name} TEXT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'sisu markup source text word count';} end def tuple if defined? @md.wc_words \ and @md.wc_words.to_s=~/\S+/ txt=@md.wc_words txt=special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] end end self end def src_txt # consider naming sisusrc def name 'src_text' end def create_column "#{name} TEXT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'sisu markup source text (if shared)';} end def tuple if ((@md.opt.act[:psql_import][:set]==:on \ || @md.opt.act[:psql_update][:set]==:on) \ or (@md.opt.act[:sqlite_import][:set]==:on \ || @md.opt.act[:sqlite_update][:set]==:on)) \ and FileTest.exist?(@md.fns) ["#{name}, ","'#{@sisutxt}', "] else ['',''] end end self end =begin #% misc @links: =end def fulltext def name 'fulltext' end def create_column "#{name} TEXT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'document full text clean, searchable';} end def tuple if ((@md.opt.act[:psql_import][:set]==:on \ || @md.opt.act[:psql_update][:set]==:on) \ or (@md.opt.act[:sqlite_import][:set]==:on \ || @md.opt.act[:sqlite_update][:set]==:on)) \ and FileTest.exist?(@md.fns) ["#{name}, ","'#{@fulltext}', "] else ['',''] end end self end def links def name 'links' end def create_column "#{name} TEXT NULL," #"#{name} VARCHAR(#{links.size}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} IS 'metadata document links';} end #def tuple # #BUG HERE - links is an array of paired values :say :url # if defined? @md.links \ # and @md.links=~/\S+/ # txt=@md.links # txt=special_character_escape(txt) # ["#{name}, ","'#{txt}', "] # else ['',''] # end #end self end self end end class ColumnSize def document_clean # restriction not necessary 60000 end def document_body 16000 end def document_seg 120 end def document_seg_full 120 end def endnote_clean # restriction not necessary 60000 end def endnote_body 16000 end end end __END__ #+END_SRC ** db_indexes.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/db_indexes.rb" # <> module SiSU_DbIndex class Index # create documents Indexes def initialize(opt,conn='',sql_type) def initialize(opt,conn,file,sql_type) @opt,@conn,@file,@sql_type=opt,conn,file,sql_type end def create_indexes # check added from pg not tested def conn_execute_sql_pg(conn,sql) conn.exec_params(sql) end def conn_execute_sql_sqlite(conn,sql) conn.execute(sql) end def conn_execute_sql(conn,sql) if @sql_type==:pg conn_execute_sql_pg(conn,sql) elsif @sql_type==:sqlite conn_execute_sql_sqlite(conn,sql) else end end def conn_execute_array(sql_arr) begin @conn.transaction do |conn| sql_arr.each do |sql| conn_execute_sql(conn,sql) end end rescue if @conn.is_a?(NilClass) errmsg="No sqlite3 connection (check sqlite3 dependencies)" if @opt.act[:no_stop][:set]==:on SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). error("#{errmsg}, proceeding without sqlite output (as requested)") else SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). error("#{errmsg}, STOPPING") exit end end end end def base if (@opt.act[:verbose_plus][:set]==:on \ || @opt.act[:maintenance][:set]==:on) print "\n create documents common indexes\n" end sql_arr=[ %{CREATE INDEX idx_ocn ON doc_objects(ocn);}, %{CREATE INDEX idx_digest_clean ON doc_objects(digest_clean);}, %{CREATE INDEX idx_digest_all ON doc_objects(digest_all);}, %{CREATE INDEX idx_lev0 ON doc_objects(lev0);}, %{CREATE INDEX idx_lev1 ON doc_objects(lev1);}, %{CREATE INDEX idx_lev2 ON doc_objects(lev2);}, %{CREATE INDEX idx_lev3 ON doc_objects(lev3);}, %{CREATE INDEX idx_lev4 ON doc_objects(lev4);}, %{CREATE INDEX idx_lev5 ON doc_objects(lev5);}, %{CREATE INDEX idx_lev6 ON doc_objects(lev6);}, %{CREATE INDEX idx_endnote_nr ON endnotes(nr);}, %{CREATE INDEX idx_digest_en ON endnotes(digest_clean);}, %{CREATE INDEX idx_endnote_nr_asterisk ON endnotes_asterisk(nr);}, %{CREATE INDEX idx_endnote_asterisk ON endnotes_asterisk(clean);}, %{CREATE INDEX idx_digest_en_asterisk ON endnotes_asterisk(digest_clean);}, %{CREATE INDEX idx_endnote_nr_plus ON endnotes_plus(nr);}, %{CREATE INDEX idx_endnote_plus ON endnotes_plus(clean);}, %{CREATE INDEX idx_digest_en_plus ON endnotes_plus(digest_clean);}, %{CREATE INDEX idx_title ON metadata_and_text(title);}, %{CREATE INDEX idx_author ON metadata_and_text(creator_author);}, %{CREATE INDEX idx_filename ON metadata_and_text(src_filename);}, %{CREATE INDEX idx_language ON metadata_and_text(language_document_char);}, %{CREATE INDEX idx_topics ON metadata_and_text(classify_topic_register)}, ] conn_execute_array(sql_arr) end def text if (@opt.act[:verbose_plus][:set]==:on \ || @opt.act[:maintenance][:set]==:on) print "\n create documents TEXT indexes\n" end sql_arr=[ %{CREATE INDEX idx_clean ON doc_objects(clean);}, %{CREATE INDEX idx_endnote ON endnotes(clean);} ] conn_execute_array(sql_arr) end base @opt.act[:psql][:set]==:on ? '' : text end end end __END__ #+END_SRC * db_tests.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/db_tests.rb" # <> module SiSU_DbTests class Test def initialize(info,opt) @ck,@opt=info,opt unless @opt.act[:quiet][:set]==:on puts @ck.tp[:fns] if @ck.tp[:fns] and not @ck.tp[:fns].empty? puts @ck.tp[:title] if @ck.tp[:title] and not @ck.tp[:title].empty? puts @ck.tp[:creator] if @ck.tp[:creator] and not @ck.tp[:creator].empty? end end def verify unless @opt.act[:quiet][:set]==:on puts @ck.tp[:fns].length.to_s + ' checklength ' + @ck.tp[:fns] if @ck.tp[:fns] and @ck.tp[:fns].length >@ck.lt_filename puts @ck.tp[:title].length.to_s + ' checklength ' + @ck.tp[:title] if @ck.tp[:title] and @ck.tp[:title].length >@ck.lt_title puts @ck.tp[:subtitle].length.to_s + ' checklength ' + @ck.tp[:subtitle] if @ck.tp[:subtitle] and @ck.tp[:subtitle].length >@ck.lt_subtitle puts @ck.tp[:creator].length.to_s + ' checklength ' + @ck.tp[:creator] if @ck.tp[:creator] and @ck.tp[:creator].length >@ck.lt_creator puts @ck.tp[:author_title].length.to_s + ' checklength ' + @ck.tp[:author_title] if @ck.tp[:author_title] and @ck.tp[:author_title].length >@ck.lt_author_title puts @ck.tp[:illustrator].length.to_s + ' checklength ' + @ck.tp[:illustrator] if @ck.tp[:illustrator] and @ck.tp[:illustrator].length >@ck.lt_illustrator puts @ck.tp[:translator].length.to_s + ' checklength ' + @ck.tp[:translator] if @ck.tp[:translator] and @ck.tp[:translator].length >@ck.lt_translator puts @ck.tp[:prepared_by].length.to_s + ' checklength ' + @ck.tp[:prepared_by] if @ck.tp[:prepared_by] and @ck.tp[:prepared_by].length >@ck.lt_prepared_by puts @ck.tp[:digitized_by].length.to_s + ' checklength ' + @ck.tp[:digitized_by] if @ck.tp[:digitized_by] and @ck.tp[:digitized_by].length >@ck.lt_digitized_by puts @ck.tp[:subject].length.to_s + ' checklength ' + @ck.tp[:subject] if @ck.tp[:subject] and @ck.tp[:subject].length >@ck.lt_subject puts @ck.tp[:description].length.to_s + ' checklength ' + @ck.tp[:description] if @ck.tp[:description] and @ck.tp[:description].length >@ck.lt_description puts @ck.tp[:publisher].length.to_s + ' checklength ' + @ck.tp[:publisher] if @ck.tp[:publisher] and @ck.tp[:publisher].length >@ck.lt_publisher puts @ck.tp[:contributor].length.to_s + ' checklength ' + @ck.tp[:contributor] if @ck.tp[:contributor] and @ck.tp[:contributor].length >@ck.lt_contributor puts @ck.tp[:date].length.to_s + ' checklength ' + @ck.tp[:date] if @ck.tp[:date] and @ck.tp[:date].length >@ck.lt_date puts @ck.tp[:date_created].length.to_s + ' checklength ' + @ck.tp[:date_created] if @ck.tp[:date_created] and @ck.tp[:date_created].length >@ck.lt_date puts @ck.tp[:date_issued].length.to_s + ' checklength ' + @ck.tp[:date_issued] if @ck.tp[:date_issued] and @ck.tp[:date_issued].length >@ck.lt_date puts @ck.tp[:date_valid].length.to_s + ' checklength ' + @ck.tp[:date_valid] if @ck.tp[:date_valid] and @ck.tp[:date_valid].length >@ck.lt_date puts @ck.tp[:date_available].length.to_s + ' checklength ' + @ck.tp[:date_available] if @ck.tp[:date_available] and @ck.tp[:date_available].length >@ck.lt_date puts @ck.tp[:date_modified].length.to_s + ' checklength ' + @ck.tp[:date_modified] if @ck.tp[:date_modified] and @ck.tp[:date_modified].length >@ck.lt_date puts @ck.tp[:date_translated].length.to_s + ' checklength ' + @ck.tp[:date_translated] if @ck.tp[:date_translated] and @ck.tp[:date_translated].length >@ck.lt_date puts @ck.tp[:date_added_to_site].length.to_s + ' checklength ' + @ck.tp[:date_added_to_site] if @ck.tp[:date_added_to_site] and @ck.tp[:date_added_to_site].length >@ck.lt_date puts @ck.tp[:type].length.to_s + ' checklength ' + @ck.tp[:type] if @ck.tp[:type] and @ck.tp[:type].length >@ck.lt_type puts @ck.tp[:format].length.to_s + ' checklength ' + @ck.tp[:format] if @ck.tp[:format] and @ck.tp[:format].length >@ck.lt_format puts @ck.tp[:identifier].length.to_s + ' checklength ' + @ck.tp[:identifier] if @ck.tp[:identifier] and @ck.tp[:identifier].length >@ck.lt_identifier puts @ck.tp[:source].length.to_s + ' checklength ' + @ck.tp[:source] if @ck.tp[:source] and @ck.tp[:source].length >@ck.lt_source puts @ck.tp[:language].length.to_s + ' checklength ' + @ck.tp[:language] if @ck.tp[:language] and @ck.tp[:language].length >@ck.lt_language puts @ck.tp[:language_original].length.to_s + ' checklength ' + @ck.tp[:language_original] if @ck.tp[:language_original] and @ck.tp[:language_original].length >@ck.lt_language_original puts @ck.tp[:relation].length.to_s + ' checklength ' + @ck.tp[:relation] if @ck.tp[:relation] and @ck.tp[:relation].length >@ck.lt_relation puts @ck.tp[:coverage].length.to_s + ' checklength ' + @ck.tp[:coverage] if @ck.tp[:coverage] and @ck.tp[:coverage].length >@ck.lt_coverage puts @ck.tp[:rights].length.to_s + ' checklength ' + @ck.tp[:rights] if @ck.tp[:rights] and @ck.tp[:rights].length >@ck.lt_rights puts @ck.tp[:copyright].length.to_s + ' checklength ' + @ck.tp[:copyright] if @ck.tp[:copyright] and @ck.tp[:copyright].length >@ck.lt_copyright puts @ck.tp[:owner].length.to_s + ' checklength ' + @ck.tp[:owner] if @ck.tp[:owner] and @ck.tp[:owner].length >@ck.lt_owner puts @ck.tp[:keywords].length.to_s + ' checklength ' + @ck.tp[:keywords] if @ck.tp[:keywords] and @ck.tp[:keywords].length >@ck.lt_keywords puts @ck.tp[:abstract].length.to_s + ' checklength ' + @ck.tp[:abstract] if @ck.tp[:abstract] and @ck.tp[:abstract].length >@ck.lt_abstract puts @ck.tp[:comment].length.to_s + ' checklength ' + @ck.tp[:comment] if @ck.tp[:comment] and @ck.tp[:comment].length >@ck.lt_comment puts @ck.tp[:loc].length.to_s + ' checklength ' + @ck.tp[:loc] if @ck.tp[:loc] and @ck.tp[:loc].length >@ck.lt_loc puts @ck.tp[:dewey].length.to_s + ' checklength ' + @ck.tp[:dewey] if @ck.tp[:dewey] and @ck.tp[:dewey].length >@ck.lt_dewey puts @ck.tp[:isbn].length.to_s + ' checklength ' + @ck.tp[:isbn] if @ck.tp[:isbn] and @ck.tp[:isbn].length >@ck.lt_isbn puts @ck.tp[:pg].length.to_s + ' checklength ' + @ck.tp[:pg] if @ck.tp[:pg] and @ck.tp[:pg].length >@ck.lt_pg puts @ck.tp[:topic_register].length.to_s + ' checklength ' + @ck.tp[:topic_register] if @ck.tp[:topic_register] and @ck.tp[:topci_register].length >@ck.lt_topic_register puts @ck.tp[:date] if @ck.tp[:date] and not @ck.tp[:date].empty? and @ck.tp[:date] !~/\d\d-\d\d-\d\d/ end end end end __END__ #+END_SRC * document header #+NAME: sisu_document_header #+BEGIN_SRC text encoding: utf-8 - Name: SiSU - Description: documents, structuring, processing, publishing, search db sql - Author: Ralph Amissah - Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2019, 2020, 2021, Ralph Amissah, All Rights Reserved. - License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: - SiSU uses: - Standard SiSU markup syntax, - Standard SiSU meta-markup syntax, and the - Standard SiSU object citation numbering and system - Homepages: - Git #+END_SRC