From 12479b344f23b23c51f7525bb3dd52b79ba4814e Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Fri, 16 Apr 2010 10:10:43 -0400 Subject: db, shared_html_lite, link back to footnote/endnote reference, fix --- lib/sisu/v2/shared_html_lite.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/shared_html_lite.rb b/lib/sisu/v2/shared_html_lite.rb index d3455a9a..a972e379 100644 --- a/lib/sisu/v2/shared_html_lite.rb +++ b/lib/sisu/v2/shared_html_lite.rb @@ -170,7 +170,7 @@ module SiSU_Format_Shared txt=markup_note(en) < -#{nr}. #{txt} +#{nr}. #{txt}

GSUB end @@ -265,7 +265,7 @@ GSUB @tab="\t" @attrib=attrib @txt=txt - @lv=@notenumber=lv.to_s + @lv=lv.to_s @hname=hname.to_s @id=@ocn=id end -- cgit v1.2.3 From aa47d1db8596aa65746db05d369441d1def62aa4 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Fri, 16 Apr 2010 10:12:46 -0400 Subject: db sql table and column structure changes, name prefix "sisu_v2a_" resulting sisu version bump 2.1.0, plus other lesser fixes [Note: it is necessary to create new database and tables and to populate them] * db (sql) database table name and column structure changes, new pgsql db name prefix "sisu_v2a_" (version bump), continue to review (db_columns, db_create, db_import, db_sqltxt) * db remove and update fix, match filename for removal with = (not LIKE or ~) * db sqlite, issue with --recreate, bugfix (db_drop) --- lib/sisu/v2/constants.rb | 18 +- lib/sisu/v2/db_columns.rb | 2072 +++++++++++++++++++++++++++++++++++++++--- lib/sisu/v2/db_create.rb | 463 +++++----- lib/sisu/v2/db_drop.rb | 80 +- lib/sisu/v2/db_import.rb | 321 +------ lib/sisu/v2/db_indexes.rb | 24 +- lib/sisu/v2/db_load_tuple.rb | 176 +++- lib/sisu/v2/db_remove.rb | 10 +- lib/sisu/v2/db_select.rb | 18 +- lib/sisu/v2/db_sqltxt.rb | 115 +++ 10 files changed, 2581 insertions(+), 716 deletions(-) create mode 100644 lib/sisu/v2/db_sqltxt.rb (limited to 'lib') diff --git a/lib/sisu/v2/constants.rb b/lib/sisu/v2/constants.rb index 9abe9c0b..3fcb1e3a 100644 --- a/lib/sisu/v2/constants.rb +++ b/lib/sisu/v2/constants.rb @@ -111,7 +111,6 @@ Rx[:meta]=/#{Mx[:meta_o]}(\S+?)#{Mx[:meta_c]}/ Dx[:url_o]='‹'; Dx[:url_c]='›' Dx[:url_o_xml]='<'; Dx[:url_c_xml]='>' Dx[:rel_o]='‹'; Dx[:rel_c]='›' -Db[:name_prefix]="SiSU#{SiSU_version_dir}_" Tex[:backslash]="\\\\" Tex[:backslash]="\\\\" Tex[:tilde]='\\\\\\~' @@ -132,6 +131,23 @@ Px[:lv4]= '-' Px[:lv5]= '.' Px[:lv6]= '.' #Px[:lv5_6]= '.' +Db[:name_prefix]="SiSU#{SiSU_version_dir}a_" +Db[:name_prefix_db]="sisu_#{SiSU_version_dir}a_" +Db[:col_title]=800 +Db[:col_title_part]=400 +Db[:col_title_edition]=10 +Db[:col_name]=600 +Db[:col_creator_misc_short]=100 +Db[:col_language]=100 +Db[:col_language_char]=3 +Db[:col_date_text]=10 +Db[:col_classify_txt_long]=600 +Db[:col_classify_txt_short]=600 +Db[:col_classify_short]=200 +Db[:col_classify_identify]=256 +Db[:col_classify_library]=30 +Db[:col_classify_small]=16 +Db[:col_filename]=256 __END__ consider: 〔comment〕 diff --git a/lib/sisu/v2/db_columns.rb b/lib/sisu/v2/db_columns.rb index 1849a442..ee66c59e 100644 --- a/lib/sisu/v2/db_columns.rb +++ b/lib/sisu/v2/db_columns.rb @@ -58,154 +58,1934 @@ =end module SiSU_DB_columns - class Column_size - def lt_title - 600 - end - def lt_subtitle - 600 - end - def lt_author - 600 - end - def lt_author_title - 100 - end - def lt_author_nationality - 100 - end - def lt_illustrator - 600 - end - def lt_translator - 600 - end - def lt_prepared_by - 600 - end - def lt_digitized_by - 600 - end - def lt_subject - 600 - end - def lt_date - 10 - end - def lt_type - 600 - end - def lt_description - 2000 - end - def lt_publisher - 600 - end - def lt_contributor - 600 - end - def lt_format - 600 - end - def lt_identifier - 256 - end - def lt_source - 200 - end - def lt_language - 30 - end - def lt_language_char - 3 - end - def lt_language_original - 30 - end - def lt_language_original_char - 3 - end - def lt_relation - 100 - end - def lt_coverage - 100 - end - def lt_rights - 2000 - end - def lt_copyright - 2000 - end - def lt_owner - 600 - end - def lt_keywords - 600 - end - def lt_comment - 600 - end - def lt_loc - 30 - end - def lt_dewey - 30 - end - def lt_isbn - 16 - end - def lt_pg - 16 - end - def lt_abstract - 600 - end - def lt_skin - 100 + require "#{SiSU_lib}/sysenv" # sysenv.rb + require "#{SiSU_lib}/db_sqltxt" # db_sqltxt.rb + class Columns < SiSU_DB_text::Prepare + def initialize(md=nil) + @md=md + @db=SiSU_Env::Info_db.new #watch + if defined? md.mod \ + and md.mod.inspect=~/import|update/ \ + and FileTest.exist?(md.fns) + txt_arr=IO.readlines(md.fns,'') + src=txt_arr.join("\n") + if @db.share_source? + @sisutxt=special_character_escape(src) + else @sisutxt='' + end + @fulltext=clean_searchable_text(txt_arr) + else @sisutxt,@fulltext='','' + end end - def lt_markup - 100 - end - def lt_links - 100 - end - def lt_information - 100 - end - def lt_contact - 100 - end - def lt_suffix - 600 - end - def lt_filename - 256 - end - def lt_types - 1 - end - def lt_subj - 64 - end - def lt_orig_pub - 400 - end - def lt_orig_pub_date - 400 - end - def lt_orig_pub_institution - 200 - end - def lt_orig_pub_nationality - 200 - end - def lt_writing_focus_nationality - 100 - end - def lt_topic_register - 2000 +#% structures + #def column_define + # def varchar(name,size) + # "#{name} VARCHAR(#{size}) NULL," + # end + #end +=begin +#% title +@title: + :subtitle: + :short: + :edition: + :language: + :note: +=end + def column + def title # DublinCore 1 - title + def name + 'title' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata full document title [DC1]';} + end + def tuple + t=if defined? @md.title.full \ + and @md.title.full=~/\S+/ + txt=@md.title.full + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_main + def name + 'title_main' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title_part]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata main document title';} + end + def tuple + t=if defined? @md.title.main \ + and @md.title.main=~/\S+/ + txt=@md.title.main + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_sub + def name + 'title_sub' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title_part]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document subtitle';} + end + def tuple + t=if defined? @md.title.sub \ + and @md.title.sub=~/\S+/ + txt=@md.title.sub + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_short + def name + 'title_short' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title_part]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document short title if any';} + end + def tuple + t=if defined? @md.title.short \ + and @md.title.short=~/\S+/ + txt=@md.title.short + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_edition + def name + 'title_edition' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title_edition]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document edition (version)';} + end + def tuple + t=if defined? @md.title.edition \ + and @md.title.edition=~/\S+/ + txt=@md.title.edition + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_note + def name + 'title_note' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes associated with title';} + end + def tuple + t=if defined? @md.title.note \ + and @md.title.note=~/\S+/ + txt=@md.title.note + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_language + def name + 'title_language' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language [DC12]';} + end + def tuple + t=if defined? @md.title.language \ + and @md.title.language=~/\S+/ + txt=@md.title.language + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_language_char # consider + def name + 'title_language_char' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language iso code';} + end + def tuple + t=if defined? @md.title.language_char \ + and @md.title.language_char=~/\S+/ + txt=@md.title.language_char + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% creator +@creator: + :author: + :contributor: + :illustrator: + :photographer: + :translator: + :prepared_by: + :digitized_by: + :audio: + :video: +=end + def creator_author # DublinCore 2 - creator/author (author) + def name + 'creator_author' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document author (creator) [DC2]';} + end + def tuple + t=if defined? @md.creator.author_detail \ + and @md.creator.author_detail.class==Array \ + and @md.creator.author_detail.length > 0 + txt='' + @md.creator.author_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_author_honorific # consider + def name + 'creator_author_hon' + end + def create_column + "#{name} VARCHAR(#{Db[:col_creator_misc_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document author honorific (title e.g, Ms. Dr. Prof.)';} + end + def tuple + t=if defined? @md.creator.author_hon \ + and @md.creator.author_hon=~/\S+/ + txt=@md.creator.author_hon + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_author_nationality # consider + def name + 'creator_author_nationality' + end + def create_column + "#{name} VARCHAR(#{Db[:col_creator_misc_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata nationality of document author (creator)';} + end + def tuple + t=if defined? @md.creator.author_nationality_detail \ + and @md.creator.author_nationality=~/\S+/ + txt=@md.creator.author_nationality_detail + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_contributor # DublinCore 6 - contributor + def name + 'creator_contributor' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document contributor name(s) [DC6]';} + end + def tuple + t=if defined? @md.creator.contributor_detail \ + and @md.creator.contributor_detail.class==Array \ + and @md.creator.contributor_detail.length > 0 + txt=@md.creator.contributor_detail #dc + txt='' + @md.creator.contributor_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_illustrator + def name + 'creator_illustrator' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document illustrator name(s)';} + end + def tuple + t=if defined? @md.creator.illustrator_detail \ + and @md.creator.illustrator_detail.class==Array \ + and @md.creator.illustrator_detail.length > 0 + txt=@md.creator.illustrator_detail + txt='' + @md.creator.illustrator_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_photographer + def name + 'creator_photographer' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document photographer name(s)';} + end + def tuple + t=if defined? @md.creator.photographer_detail \ + and @md.creator.photographer_detail.class==Array \ + and @md.creator.photographer_detail.length > 0 + txt=@md.creator.photographer_detail + txt='' + @md.creator.photographer_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_translator + def name + 'creator_translator' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document translator name(s)';} + end + def tuple + t=if defined? @md.creator.translator_detail \ + and @md.creator.translator_detail.class==Array \ + and @md.creator.translator_detail.length > 0 + txt='' + @md.creator.translator_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_prepared_by + def name + 'creator_prepared_by' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document prepared by name(s)';} + end + def tuple + t=if defined? @md.creator.prepared_by_detail \ + and @md.creator.prepared_by_detail.class==Array \ + and @md.creator.prepared_by_detail.length > 0 + txt=@md.creator.prepared_by_detail + txt='' + @md.creator.prepared_by_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_digitized_by + def name + 'creator_digitized_by' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document digitized by name(s)';} + end + def tuple + t=if defined? @md.creator.digitized_by_detail \ + and @md.creator.digitized_by_detail.class==Array \ + and @md.creator.digitized_by_detail.length > 0 + txt=@md.creator.digitized_by_detail + txt='' + @md.creator.digitized_by_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_audio + def name + 'creator_audio' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document audio by name(s)';} + end + def tuple + t=if defined? @md.creator.audio_detail \ + and @md.creator.audio_detail.class==Array \ + and @md.creator.audio_detail.length > 0 + txt=@md.creator.audio_detail + txt='' + @md.creator.audio_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_video + def name + 'creator_video' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document video by name(s)';} + end + def tuple + t=if defined? @md.creator.video_detail \ + and @md.creator.video_detail.class==Array \ + and @md.creator.video_detail.length > 0 + txt='' + @md.creator.video_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% language +#taken from other fields +@title: + :language: +@original: + :language: +#not available --> +#@language: +# :document: +# :original: +=end + def language_document + def name + 'language_document' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language';} + end + def tuple + t=if defined? @md.language.document \ + and @md.language.document=~/\S+/ + txt=@md.language.document + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def language_document_char + def name + 'language_document_char' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language';} + end + def tuple + t=if defined? @md.language.document_char \ + and @md.language.document_char=~/\S+/ + txt=@md.language.document_char + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def language_original + def name + 'language_original' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata original document/text language';} + end + def tuple + t=if defined? @md.language.original \ + and @md.language.original=~/\S+/ + txt=@md.language.original + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def language_original_char + def name + 'language_original_char' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language';} + end + def tuple + t=if defined? @md.language.original_char \ + and @md.language.original_char=~/\S+/ + txt=@md.language.original_char + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% date +@date: + :added_to_site: + :available: + :created: + :issued: + :modified: + :published: + :valid: + :translated: + :original_publication: +=end + def date_added_to_site + def name + 'date_added_to_site' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + #"#{name} DATE," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date added to site';} + end + def tuple + t=if defined? @md.date.added_to_site \ + and @md.date.added_to_site=~/\S+/ + txt=@md.date.added_to_site + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_available + def name + 'date_available' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date added to site [DC]';} + end + def tuple + t=if defined? @md.date.available \ + and @md.date.available=~/\S+/ + txt=@md.date.available + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_created + def name + 'date_created' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date created [DC]';} + end + def tuple + t=if defined? @md.date.created \ + and @md.date.created=~/\S+/ + txt=@md.date.created + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_issued + def name + 'date_issued' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date issued [DC]';} + end + def tuple + t=if defined? @md.date.issued \ + and @md.date.issued=~/\S+/ + txt=@md.date.issued + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_modified + def name + 'date_modified' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date modified [DC]';} + end + def tuple + t=if defined? @md.date.modified \ + and @md.date.modified=~/\S+/ + txt=@md.date.modified + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_published + def name + 'date_published' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date published [DC7]';} + end + def tuple + t=if defined? @md.date.published \ + and @md.date.published=~/\S+/ + txt=@md.date.published + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_valid + def name + 'date_valid' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date valid [DC]';} + end + def tuple + t=if defined? @md.date.valid \ + and @md.date.valid=~/\S+/ + txt=@md.date.valid + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_translated + def name + 'date_translated' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date translated';} + end + def tuple + t=if defined? @md.date.translated \ + and @md.date.translated=~/\S+/ + txt=@md.date.translated + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_original_publication + def name + 'date_original_publication' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date of original publication';} + end + def tuple + t=if defined? @md.date.original_publication \ + and @md.date.original_publication=~/\S+/ + txt=@md.date.original_publication + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_generated + def name + 'date_generated' + end + def create_column #choose other representation of time + "#{name} VARCHAR(30) NULL," + #"#{name} VARCHAR(10) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date of sisu generation of document, automatically populated';} + end + def tuple #choose other representation of time + t=if defined? @md.generated \ + and @md.generated.to_s=~/\S+/ + txt=@md.generated.to_s + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% publisher +@publisher: +=end + def publisher + def name + 'publisher' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document publisher [DC5]';} + end + def tuple + t=if defined? @md.publisher \ + and @md.publisher=~/\S+/ + txt=@md.publisher + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +##% current +# def current_publisher +# def name +# 'current_publisher' +# end +# def size +# 10 +# end +# def create_column +# "#{name} VARCHAR(#{current_publisher.size}) NULL," +# end +# def tuple +# t=if defined? @md.current.publisher \ +# and @md.current.publisher=~/\S+/ +# txt=@md.current.publisher +# special_character_escape(txt) +# "'#{txt}', " +# end +# end +# self +# end +=begin +#% original +@original: + :publisher: + #:date: #repeated under date + :language: + :institution: + :nationality: + :source: +=end + def original_publisher + def name + 'original_publisher' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original publisher [DC5]';} + end + def tuple + t=if defined? @md.original.publisher \ + and @md.original.publisher=~/\S+/ + txt=@md.original.publisher + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_language + def name + 'original_language' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original language';} + end + def tuple + t=if defined? @md.original.language \ + and @md.original.language=~/\S+/ + txt=@md.original.language + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_language_char # consider + def name + 'original_language_char' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original language iso character';} + end + def tuple + t=if defined? @md.original.language_char \ + and @md.original.language_char=~/\S+/ + txt=@md.original.language_char + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_source + def name + 'original_source' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original source [DC11]';} + end + def tuple + t=if defined? @md.original.source \ + and @md.original.source=~/\S+/ + txt=@md.original.source + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_institution + def name + 'original_institution' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original institution';} + end + def tuple + t=if defined? @md.original.institution \ + and @md.original.institution=~/\S+/ + txt=@md.original.institution + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_nationality + def name + 'original_nationality' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original nationality';} + end + def tuple + t=if defined? @md.original.nationality \ + and @md.original.nationality=~/\S+/ + txt=@md.original.nationality + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% rights +@rights: + #:copyright: #mapped to :text: used where no other copyrights and included in :all: + :text: + :translation: + :illustrations: + :photographs: + :preparation: + :digitization: + :audio: + :video: + :license: + :all: +=end + def rights_all + def name + 'rights' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata rights associated with document (composite) [DC15]';} + end + def tuple + t=if defined? @md.rights.all \ + and @md.rights.all=~/\S+/ + txt=@md.rights.all + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_text + def name + 'rights_copyright_text' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text';} + end + def tuple + t=if defined? @md.rights.copyright_text \ + and @md.rights.copyright_text=~/\S+/ + txt=@md.rights.copyright_text + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_translation + def name + 'rights_copyright_translation' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text translation (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_translation \ + and @md.rights.copyright_translation=~/\S+/ + txt=@md.rights.copyright_translation + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_illustrations + def name + 'rights_copyright_illustrations' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text illustrations (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_illustrations \ + and @md.rights.copyright_illustrations=~/\S+/ + txt=@md.rights.copyright_illustrations + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_photographs + def name + 'rights_copyright_photographs' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text photographs (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_photographs \ + and @md.rights.copyright_photographs=~/\S+/ + txt=@md.rights.copyright_photographs + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_preparation + def name + 'rights_copyright_preparation' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text preparation (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_preparation \ + and @md.rights.copyright_preparation=~/\S+/ + txt=@md.rights.copyright_preparation + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_digitization + def name + 'rights_copyright_digitization' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text digitization (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_digitization \ + and @md.rights.copyright_digitization=~/\S+/ + txt=@md.rights.copyright_digitization + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_audio + def name + 'rights_copyright_audio' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text audio (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_audio \ + and @md.rights.copyright_audio=~/\S+/ + txt=@md.rights.copyright_audio + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_video + def name + 'rights_copyright_video' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text video (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_video \ + and @md.rights.copyright_video=~/\S+/ + txt=@md.rights.copyright_video + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_license + def name + 'rights_license' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata license granted for use of document if any)';} + end + def tuple + t=if defined? @md.rights.license \ + and @md.rights.license=~/\S+/ + txt=@md.rights.license + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% classify +@classify: + :topic_register: + :coverage: + :format: + :identifier: + :keywords: + :relation: + :subject: + :type: + :loc: + :dewey: + :pg: + :isbn: +=end + def classify_topic_register + def name + 'classify_topic_register' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_long]}) NULL," + #"#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document topic register (semi-structured document subject information)';} + end + def tuple + t=if defined? @md.classify.topic_register \ + and @md.classify.topic_register=~/\S+/ + txt=@md.classify.topic_register + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_subject + def name + 'classify_subject' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document subject matter [DC3]';} + end + def tuple + t=if defined? @md.classify.subject \ + and @md.classify.subject=~/\S+/ + txt=@md.classify.subject + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_type #check + def name + 'classify_type' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document type [DC8]';} + end + def tuple + t=if defined? @md.classify.type \ + and @md.classify.type=~/\S+/ + txt=@md.classify.type + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_loc + def name + 'classify_loc' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_library]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document library of congress (if available)';} + end + def tuple + t=if defined? @md.classify.loc \ + and @md.classify.loc=~/\S+/ + txt=@md.classify.loc + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_dewey + def name + 'classify_dewey' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_library]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document dewey (if available)';} + end + def tuple + t=if defined? @md.classify.dewey \ + and @md.classify.dewey=~/\S+/ + txt=@md.classify.dewey + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_pg + def name + 'classify_pg' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_small]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document project gutenberg (if any)';} + end + def tuple + t=if defined? @md.classify.pg \ + and @md.classify.pg=~/\S+/ + txt=@md.classify.pg + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_isbn + def name + 'classify_isbn' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_small]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document isbn (if any)';} + end + def tuple + t=if defined? @md.classify.isbn \ + and @md.classify.isbn=~/\S+/ + txt=@md.classify.isbn + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_format + def name + 'classify_format' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document format [DC9]';} + end + def tuple + t=if defined? @md.classify.format \ + and @md.classify.format=~/\S+/ + txt=@md.classify.format + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_identifier + def name + 'classify_identifier' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_identify]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document identifier [DC10]';} + end + def tuple + t=if defined? @md.classify.identifier \ + and @md.classify.identifier=~/\S+/ + txt=@md.classify.identifier + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_relation + def name + 'classify_relation' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document relation [DC13]';} + end + def tuple + t=if defined? @md.classify.relation \ + and @md.classify.relation=~/\S+/ + txt=@md.classify.relation + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_coverage + def name + 'classify_coverage' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document coverage [DC14]';} + end + def tuple + t=if defined? @md.classify.coverage \ + and @md.classify.coverage=~/\S+/ + txt=@md.classify.coverage + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_keywords + def name + 'classify_keywords' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document keywords';} + end + def tuple + t=if defined? @md.classify.keywords \ + and @md.classify.keywords=~/\S+/ + txt=@md.classify.keywords + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% notes +@notes: + :abstract: + :comment: + :description: + :history: + :prefix: + :prefix_a: + :prefix_b: + :suffix: +=end + def notes_abstract + def name + 'notes_abstract' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes abstract';} + end + def tuple + t=if defined? @md.notes.abstract \ + and @md.notes.abstract=~/\S+/ + txt=@md.notes.abstract + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_comment + def name + 'notes_comment' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes comment';} + end + def tuple + t=if defined? @md.notes.comment \ + and @md.notes.comment=~/\S+/ + txt=@md.notes.comment + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_description + def name + 'notes_description' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes description [DC4]';} + end + def tuple + t=if defined? @md.notes.description \ + and @md.notes.description=~/\S+/ + txt=@md.notes.description + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_history #check, consider removal + def name + 'notes_history' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes history';} + end + def tuple + t=if defined? @md.notes.history \ + and @md.notes.history=~/\S+/ + txt=@md.notes.history + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_prefix + def name + 'notes_prefix' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes prefix';} + end + def tuple + t=if defined? @md.notes.prefix \ + and @md.notes.prefix=~/\S+/ + txt=@md.notes.prefix + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_prefix_a + def name + 'notes_prefix_a' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes prefix_a';} + end + def tuple + t=if defined? @md.notes.prefix_a \ + and @md.notes.prefix_a=~/\S+/ + txt=@md.notes.prefix_a + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_prefix_b + def name + 'notes_prefix_b' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes prefix_b';} + end + def tuple + t=if defined? @md.notes.prefix_b \ + and @md.notes.prefix_b=~/\S+/ + txt=@md.notes.prefix_b + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_suffix + def name + 'notes_suffix' + end + def create_column # keep text + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes suffix';} + end + def tuple + t=if defined? @md.notes.suffix \ + and @md.notes.suffix=~/\S+/ + txt=@md.notes.suffix + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% misc +@make: + :skin: +@links: +=end + def filename + def name + 'filename' + end + def create_column + "#{name} VARCHAR(#{Db[:col_filename]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document filename';} + end + def tuple + t=if defined? @md.fns \ + and @md.fns=~/\S+/ + txt=@md.fns + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def sisutxt # consider naming sisusrc + def name + 'sisutxt' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'sisu markup text (if shared)';} + end + def tuple + t=if @md.mod.inspect=~/import|update/ \ + and FileTest.exist?(@md.fns) + ["#{name}, ","'#{@sisutxt}', "] + else ['',''] + end + end + self + end + def fulltext + def name + 'fulltext' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'document full text clean, searchable';} + end + def tuple + t=if @md.mod.inspect=~/import|update/ \ + and FileTest.exist?(@md.fns) + ["#{name}, ","'#{@fulltext}', "] + else ['',''] + end + end + self + end + def word_count + def name + 'word_count' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'document word count';} + end + def tuple + t=if defined? @md.wc_words \ + and @md.wc_words=~/\S+/ + txt=@md.wc_words + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def digest + def name + 'dgst' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'document hash digest sha256 (or md5)';} + end + def tuple + t=if defined? @md.dgst \ + and @md.dgst=~/\S+/ + txt=@md.dgst + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def skin_name #check + def name + 'skin_name' + end + def create_column + "#{name} VARCHAR(#{Db[:col_filename]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document skin name';} + end + def tuple + t=if defined? @md.notes.skin_name \ + and @md.notes.skin_name=~/\S+/ + txt=@md.notes.skin_name + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def skin # you likely want a separate table for skins + def name + 'skin' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document skin';} + end + def tuple + t=if defined? @md.skin \ + and @md.skin=~/\S+/ + txt=@md.skin + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def links + def name + 'links' + end + def create_column + "#{name} TEXT NULL," + #"#{name} VARCHAR(#{links.size}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document links';} + end + def tuple + t=if defined? @md.notes.links \ + and @md.notes.links=~/\S+/ + txt=@md.notes.links + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + self end + end + class Column_size def document_clean # restriction not necessary 60000 end diff --git a/lib/sisu/v2/db_create.rb b/lib/sisu/v2/db_create.rb index feba670c..8ed638dc 100644 --- a/lib/sisu/v2/db_create.rb +++ b/lib/sisu/v2/db_create.rb @@ -59,7 +59,7 @@ =end module SiSU_DB_create require "#{SiSU_lib}/db_columns" # db_columns.rb - class Create < SiSU_DB_columns::Column_size + class Create < SiSU_DB_columns::Columns require "#{SiSU_lib}/sysenv" # sysenv.rb @@dl=nil def initialize(opt,conn,file,sql_type='pg') @@ -82,7 +82,7 @@ module SiSU_DB_create @env=SiSU_Env::Info_env.new(@opt.fns) tell=SiSU_Screen::Ansi.new(@opt.cmd,'invert','Create PG db:',%{"SiSU_#{@env.path.stub_pwd}"}) tell.colorize unless @opt.cmd =~/q/ - SiSU_Env::System_call.new.create_pg_db(@env.path.stub_pwd) #watch use of path.stub_pwd instead of stub + SiSU_Env::System_call.new.create_pg_db(@env.path.stub_pwd) if @sql_type=='pg' #watch use of path.stub_pwd instead of stub end def output_dir? dir=SiSU_Env::Info_env.new('') @@ -91,102 +91,129 @@ module SiSU_DB_create end end def create_table - def metadata + def metadata_and_text print %{ currently using sisu dbi module - to be populated from documents files - create tables metadata + to be populated from document files + create tables metadata_and_text data import through ruby transfer } unless @opt.cmd =~/q/ @conn.execute(%{ - CREATE TABLE metadata ( + CREATE TABLE metadata_and_text ( tid BIGINT PRIMARY KEY, - title VARCHAR(#{lt_title}) NULL, - subtitle VARCHAR(#{lt_subtitle}) NULL, - author VARCHAR(#{lt_author}) NULL, -/* plan to replace creator field, currently used, with author field */ - creator VARCHAR(#{lt_author}) NULL, - author_title VARCHAR(#{lt_author_title}) NULL, - author_nationality VARCHAR(#{lt_author_nationality}) NULL, - illustrator VARCHAR(#{lt_illustrator}) NULL, - translator VARCHAR(#{lt_translator}) NULL, - subject VARCHAR(#{lt_subject}) NULL, - date VARCHAR(#{lt_date}) NULL, - date_added_to_site VARCHAR(#{lt_date}) NULL, - date_created VARCHAR(#{lt_date}) NULL, - date_issued VARCHAR(#{lt_date}) NULL, - date_available VARCHAR(#{lt_date}) NULL, - date_valid VARCHAR(#{lt_date}) NULL, - date_modified VARCHAR(#{lt_date}) NULL, - date_translated VARCHAR(#{lt_date}) NULL, -/* date DATE, */ -/* date_added_to_site DATE, */ -/* date_created DATE, */ -/* date_issued DATE, */ -/* date_available DATE, */ -/* date_valid DATE, */ -/* date_modified DATE, */ -/* date_translated DATE, */ - type VARCHAR(#{lt_type}) NULL, - description VARCHAR(#{lt_description}) NULL, - publisher VARCHAR(#{lt_publisher}) NULL, - contributor VARCHAR(#{lt_contributor}) NULL, - prepared_by VARCHAR(#{lt_prepared_by}) NULL, - digitized_by VARCHAR(#{lt_digitized_by}) NULL, - format VARCHAR(#{lt_format}) NULL, - identifier VARCHAR(#{lt_identifier}) NULL, - source VARCHAR(#{lt_source}) NULL, - language VARCHAR(#{lt_language}) NULL, - language_original VARCHAR(#{lt_language_original}) NULL, - relation VARCHAR(#{lt_relation}) NULL, - coverage VARCHAR(#{lt_coverage}) NULL, - rights VARCHAR(#{lt_rights}) NULL, - copyright VARCHAR(#{lt_copyright}) NULL, - owner VARCHAR(#{lt_owner}) NULL, - keywords VARCHAR(#{lt_keywords}) NULL, - comment VARCHAR(#{lt_comment}) NULL, - loc VARCHAR(#{lt_loc}) NULL, - dewey VARCHAR(#{lt_dewey}) NULL, - isbn VARCHAR(#{lt_isbn}) NULL, - pg VARCHAR(#{lt_pg}) NULL, - abstract VARCHAR(#{lt_abstract}) NULL, - prefix_a TEXT NULL, - prefix_b TEXT NULL, - skin VARCHAR(#{lt_skin}) NULL, - markup VARCHAR(#{lt_markup}) NULL, - links VARCHAR(#{lt_links}) NULL, - information VARCHAR(#{lt_information}) NULL, - contact VARCHAR(#{lt_contact}) NULL, - suffix VARCHAR(#{lt_suffix}) NULL, - filename VARCHAR(#{lt_filename}) NULL UNIQUE, - types CHAR(#{lt_types}) NULL, - subj VARCHAR(#{lt_subj}) NULL, - original_publication VARCHAR(#{lt_orig_pub}) NULL, - original_publication_date VARCHAR(#{lt_orig_pub_date}) NULL, - original_publication_institution VARCHAR(#{lt_orig_pub_institution}) NULL, - original_publication_nationality VARCHAR(#{lt_orig_pub_nationality}) NULL, - writing_focus_nationality VARCHAR(#{lt_writing_focus_nationality}) NULL, - topic_register VARCHAR(#{lt_topic_register}) NULL + /* title */ + #{column.title.create_column} + #{column.title_main.create_column} + #{column.title_sub.create_column} + #{column.title_short.create_column} + #{column.title_edition.create_column} + #{column.title_note.create_column} + #{column.title_language.create_column} + #{column.title_language_char.create_column} + /* creator */ + #{column.creator_author.create_column} + #{column.creator_author_honorific.create_column} + #{column.creator_author_nationality.create_column} + #{column.creator_contributor.create_column} + #{column.creator_illustrator.create_column} + #{column.creator_photographer.create_column} + #{column.creator_translator.create_column} + #{column.creator_prepared_by.create_column} + #{column.creator_digitized_by.create_column} + #{column.creator_audio.create_column} + #{column.creator_video.create_column} + /* language */ + #{column.language_document.create_column} + #{column.language_document_char.create_column} + #{column.language_original.create_column} + #{column.language_original_char.create_column} + /* date */ + #{column.date_added_to_site.create_column} + #{column.date_available.create_column} + #{column.date_created.create_column} + #{column.date_issued.create_column} + #{column.date_modified.create_column} + #{column.date_published.create_column} + #{column.date_valid.create_column} + #{column.date_translated.create_column} + #{column.date_original_publication.create_column} + #{column.date_generated.create_column} + /* publisher */ + #{column.publisher.create_column} + /* original */ + #{column.original_publisher.create_column} + #{column.original_language.create_column} + #{column.original_language_char.create_column} + #{column.original_source.create_column} + #{column.original_institution.create_column} + #{column.original_nationality.create_column} + /* rights */ + #{column.rights_all.create_column} + #{column.rights_copyright_text.create_column} + #{column.rights_copyright_translation.create_column} + #{column.rights_copyright_illustrations.create_column} + #{column.rights_copyright_photographs.create_column} + #{column.rights_copyright_preparation.create_column} + #{column.rights_copyright_digitization.create_column} + #{column.rights_copyright_audio.create_column} + #{column.rights_copyright_video.create_column} + #{column.rights_license.create_column} + /* classify */ + #{column.classify_topic_register.create_column} + #{column.classify_subject.create_column} + #{column.classify_type.create_column} + #{column.classify_loc.create_column} + #{column.classify_dewey.create_column} + #{column.classify_pg.create_column} + #{column.classify_isbn.create_column} + #{column.classify_format.create_column} + #{column.classify_identifier.create_column} + #{column.classify_relation.create_column} + #{column.classify_coverage.create_column} + #{column.classify_keywords.create_column} + /* notes */ + #{column.notes_abstract.create_column} + #{column.notes_comment.create_column} + #{column.notes_description.create_column} + #{column.notes_history.create_column} + #{column.notes_prefix.create_column} + #{column.notes_prefix_a.create_column} + #{column.notes_prefix_b.create_column} + #{column.notes_suffix.create_column} + /* misc */ + #{column.filename.create_column} + #{column.sisutxt.create_column} + #{column.fulltext.create_column} + #{column.word_count.create_column} + #{column.digest.create_column} + #{column.skin_name.create_column} + #{column.skin.create_column} + #{column.links.create_column.gsub(/,$/,'')} +/* subj VARCHAR(64) NULL, */ +/* contact VARCHAR(100) NULL, */ +/* information VARCHAR(100) NULL, */ +/* types CHAR(1) NULL, */ +/* writing_focus_nationality VARCHAR(100) NULL, */ ); }) - @comment.psql.metadata if @comment + @comment.psql.metadata_and_text if @comment end - def documents # create documents base + def doc_objects # create doc_objects base print %{ to be populated from documents files - create tables documents document_trade document_env + create tables doc_objects data import through ruby transfer } unless @opt.cmd =~/q/ @conn.execute(%{ - CREATE TABLE documents ( + CREATE TABLE doc_objects ( lid BIGINT PRIMARY KEY, - metadata_tid BIGINT REFERENCES metadata, + metadata_tid BIGINT REFERENCES metadata_and_text, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), clean TEXT NULL, body TEXT NULL, - seg VARCHAR(#{document_seg}) NULL, + seg VARCHAR(120) NULL, lev_an VARCHAR(1), lev SMALLINT NULL, lev1 SMALLINT, @@ -210,7 +237,7 @@ module SiSU_DB_create types CHAR(1) NULL ); }) - @comment.psql.documents if @comment + @comment.psql.doc_objects if @comment end def endnotes print %{ @@ -221,7 +248,7 @@ module SiSU_DB_create @conn.execute(%{ CREATE TABLE endnotes ( nid BIGINT PRIMARY KEY, - document_lid BIGINT REFERENCES documents, + document_lid BIGINT REFERENCES doc_objects, nr SMALLINT, clean TEXT NULL, body TEXT NULL, @@ -229,7 +256,7 @@ module SiSU_DB_create ocnd VARCHAR(6), ocns VARCHAR(6), digest_clean CHAR(#{@@dl}), - metadata_tid BIGINT REFERENCES metadata + metadata_tid BIGINT REFERENCES metadata_and_text ); }) @comment.psql.endnotes if @comment @@ -243,7 +270,7 @@ module SiSU_DB_create @conn.execute(%{ CREATE TABLE endnotes_asterisk ( nid BIGINT PRIMARY KEY, - document_lid BIGINT REFERENCES documents, + document_lid BIGINT REFERENCES doc_objects, nr SMALLINT, clean TEXT NULL, body TEXT NULL, @@ -251,7 +278,7 @@ module SiSU_DB_create ocnd VARCHAR(6), ocns VARCHAR(6), digest_clean CHAR(#{@@dl}), - metadata_tid BIGINT REFERENCES metadata + metadata_tid BIGINT REFERENCES metadata_and_text ); }) @comment.psql.endnotes_asterisk if @comment @@ -265,7 +292,7 @@ module SiSU_DB_create @conn.execute(%{ CREATE TABLE endnotes_plus ( nid BIGINT PRIMARY KEY, - document_lid BIGINT REFERENCES documents, + document_lid BIGINT REFERENCES doc_objects, nr SMALLINT, clean TEXT NULL, body TEXT NULL, @@ -273,21 +300,21 @@ module SiSU_DB_create ocnd VARCHAR(6), ocns VARCHAR(6), digest_clean CHAR(#{@@dl}), - metadata_tid BIGINT REFERENCES metadata + metadata_tid BIGINT REFERENCES metadata_and_text ); }) @comment.psql.endnotes_plus if @comment end - def urls # create documents file links mapping + def urls # create doc_objects file links mapping print %{ currently using sisu dbi module - to be populated from documents files + to be populated from doc_objects files create tables urls data import through ruby transfer } unless @opt.cmd =~/q/ @conn.execute(%{ CREATE TABLE urls ( - metadata_tid BIGINT REFERENCES metadata, + metadata_tid BIGINT REFERENCES metadata_and_text, plaintext varchar(512), html_toc varchar(512), html_doc varchar(512), @@ -311,7 +338,7 @@ module SiSU_DB_create self end end - class Comment + class Comment < SiSU_DB_columns::Columns def initialize(conn,sql_type='pg') @conn=conn if sql_type =~ /pg/; psql @@ -325,162 +352,146 @@ module SiSU_DB_create end end end - def metadata + def metadata_and_text sql_arr=[ - %{COMMENT ON Table metadata - IS 'contains SiSU documents metadata with metadata';}, - %{COMMENT ON COLUMN metadata.tid + %{COMMENT ON Table metadata_and_text + IS 'contains SiSU metadata and fulltext for search (including source .sst if shared)';}, + %{COMMENT ON COLUMN metadata_and_text.tid IS 'unique';}, - %{COMMENT ON COLUMN metadata.filename - IS 'document filename';}, - %{COMMENT ON COLUMN metadata.title - IS 'metadata title (dublin core element 1)';}, - %{COMMENT ON COLUMN metadata.subtitle - IS 'document subtitle';}, - %{COMMENT ON COLUMN metadata.creator - IS 'metadata creator (dublin core element 2)';}, - %{COMMENT ON COLUMN metadata.author - IS 'metadata author (dublin core element 2)';}, - %{COMMENT ON COLUMN metadata.illustrator - IS 'metadata illustrator';}, - %{COMMENT ON COLUMN metadata.translator - IS 'metadata translator';}, - %{COMMENT ON COLUMN metadata.subject - IS 'metadata subject (dublin core element 3)';}, - %{COMMENT ON COLUMN metadata.date - IS 'metadata date (dublin core element 7)';}, - %{COMMENT ON COLUMN metadata.date_created - IS 'metadata date created (dublin core)';}, - %{COMMENT ON COLUMN metadata.date_issued - IS 'metadata date of issue (dublin core)';}, - %{COMMENT ON COLUMN metadata.date_available - IS 'metadata date available (dublin core)';}, - %{COMMENT ON COLUMN metadata.date_valid - IS 'metadata date valid (dublin core)';}, - %{COMMENT ON COLUMN metadata.date_modified - IS 'metadata date modified (dublin core)';}, - %{COMMENT ON COLUMN metadata.type - IS 'metadata type (dublin core element 8)';}, - %{COMMENT ON COLUMN metadata.description - IS 'metadata description (dublin core element 4)';}, - %{COMMENT ON COLUMN metadata.publisher - IS 'metadata publisher (dublin core element 5)';}, - %{COMMENT ON COLUMN metadata.contributor - IS 'metadata contributor (dublin core element 6)';}, - %{COMMENT ON COLUMN metadata.prepared_by - IS 'metadata markup prepared by';}, - %{COMMENT ON COLUMN metadata.digitized_by - IS 'metadata digitized by';}, - %{COMMENT ON COLUMN metadata.format - IS 'metadata format (dublin core element 9)';}, - %{COMMENT ON COLUMN metadata.identifier - IS 'metadata identifier (dublin core element 10)';}, - %{COMMENT ON COLUMN metadata.source - IS 'metadata source (dublin core element 11)';}, - %{COMMENT ON COLUMN metadata.language - IS 'metadata language (dublin core element 12)';}, - %{COMMENT ON COLUMN metadata.language_original - IS 'metadata original language';}, - %{COMMENT ON COLUMN metadata.relation - IS 'metadata (dublin core element 13)';}, - %{COMMENT ON COLUMN metadata.coverage - IS 'metadata coverage (dublin core element 14)';}, - %{COMMENT ON COLUMN metadata.rights - IS 'metadata rights / copyright / license (dublin core element 15)';}, - %{COMMENT ON COLUMN metadata.owner - IS 'metadata owner';}, - %{COMMENT ON COLUMN metadata.keywords - IS 'metadata keywords';}, - %{COMMENT ON COLUMN metadata.comment - IS 'metadata comment';}, - %{COMMENT ON COLUMN metadata.abstract - IS 'metadata abstract';}, - %{COMMENT ON COLUMN metadata.loc - IS 'metadata library of congress';}, - %{COMMENT ON COLUMN metadata.dewey - IS 'metadata dewey';}, - %{COMMENT ON COLUMN metadata.isbn - IS 'metadata isbn';}, - %{COMMENT ON COLUMN metadata.pg - IS 'metadata project gutenberg number';}, - %{COMMENT ON COLUMN metadata.prefix_a - IS 'metadata prefix';}, - %{COMMENT ON COLUMN metadata.prefix_b - IS 'metadata prefix';}, - %{COMMENT ON COLUMN metadata.skin - IS 'metadata sisu skin';}, - %{COMMENT ON COLUMN metadata.markup - IS 'metadata markup source';}, - %{COMMENT ON COLUMN metadata.links - IS 'metadata links';}, - %{COMMENT ON COLUMN metadata.information - IS 'metadata information';}, - %{COMMENT ON COLUMN metadata.contact - IS 'metadata contact';}, - %{COMMENT ON COLUMN metadata.suffix - IS 'metadata sisu suffix (output related)';}, - %{COMMENT ON COLUMN metadata.filename - IS 'metadata source filename';}, - %{COMMENT ON COLUMN metadata.types - IS 'document types scroll 1, seg 2, both 3';}, - %{COMMENT ON COLUMN metadata.subj - IS 'subject areas - no way to populate at present as not mapped';}, + %{#{column.title.column_comment}}, + %{#{column.title_main.column_comment}}, + %{#{column.title_sub.column_comment}}, + %{#{column.title_short.column_comment}}, + %{#{column.title_edition.column_comment}}, + %{#{column.title_note.column_comment}}, + %{#{column.title_language.column_comment}}, + %{#{column.title_language_char.column_comment}}, + %{#{column.creator_author.column_comment}}, + %{#{column.creator_author_honorific.column_comment}}, + %{#{column.creator_author_nationality.column_comment}}, + %{#{column.creator_contributor.column_comment}}, + %{#{column.creator_illustrator.column_comment}}, + %{#{column.creator_photographer.column_comment}}, + %{#{column.creator_translator.column_comment}}, + %{#{column.creator_prepared_by.column_comment}}, + %{#{column.creator_digitized_by.column_comment}}, + %{#{column.creator_audio.column_comment}}, + %{#{column.creator_video.column_comment}}, + %{#{column.language_document.column_comment}}, + %{#{column.language_document_char.column_comment}}, + %{#{column.language_original.column_comment}}, + %{#{column.language_original_char.column_comment}}, + %{#{column.date_added_to_site.column_comment}}, + %{#{column.date_available.column_comment}}, + %{#{column.date_created.column_comment}}, + %{#{column.date_issued.column_comment}}, + %{#{column.date_modified.column_comment}}, + %{#{column.date_published.column_comment}}, + %{#{column.date_valid.column_comment}}, + %{#{column.date_translated.column_comment}}, + %{#{column.date_original_publication.column_comment}}, + %{#{column.date_generated.column_comment}}, + %{#{column.publisher.column_comment}}, + %{#{column.original_publisher.column_comment}}, + %{#{column.original_language.column_comment}}, + %{#{column.original_language_char.column_comment}}, + %{#{column.original_source.column_comment}}, + %{#{column.original_institution.column_comment}}, + %{#{column.original_nationality.column_comment}}, + %{#{column.rights_all.column_comment}}, + %{#{column.rights_copyright_text.column_comment}}, + %{#{column.rights_copyright_translation.column_comment}}, + %{#{column.rights_copyright_illustrations.column_comment}}, + %{#{column.rights_copyright_photographs.column_comment}}, + %{#{column.rights_copyright_preparation.column_comment}}, + %{#{column.rights_copyright_digitization.column_comment}}, + %{#{column.rights_copyright_audio.column_comment}}, + %{#{column.rights_copyright_video.column_comment}}, + %{#{column.rights_license.column_comment}}, + %{#{column.classify_topic_register.column_comment}}, + %{#{column.classify_subject.column_comment}}, + %{#{column.classify_type.column_comment}}, + %{#{column.classify_loc.column_comment}}, + %{#{column.classify_dewey.column_comment}}, + %{#{column.classify_pg.column_comment}}, + %{#{column.classify_isbn.column_comment}}, + %{#{column.classify_format.column_comment}}, + %{#{column.classify_identifier.column_comment}}, + %{#{column.classify_relation.column_comment}}, + %{#{column.classify_coverage.column_comment}}, + %{#{column.classify_keywords.column_comment}}, + %{#{column.notes_abstract.column_comment}}, + %{#{column.notes_comment.column_comment}}, + %{#{column.notes_description.column_comment}}, + %{#{column.notes_history.column_comment}}, + %{#{column.notes_prefix.column_comment}}, + %{#{column.notes_prefix_a.column_comment}}, + %{#{column.notes_prefix_b.column_comment}}, + %{#{column.notes_suffix.column_comment}}, + %{#{column.filename.column_comment}}, + %{#{column.sisutxt.column_comment}}, + %{#{column.fulltext.column_comment}}, + %{#{column.word_count.column_comment}}, + %{#{column.digest.column_comment}}, + %{#{column.skin_name.column_comment}}, + %{#{column.skin.column_comment}}, + %{#{column.links.column_comment}}, ] conn_execute_array(sql_arr) end - def documents + def doc_objects sql_arr=[ - %{COMMENT ON Table documents - IS 'contains searchable text of SiSU documents';}, - %{COMMENT ON COLUMN documents.lid + %{COMMENT ON Table doc_objects + IS 'contains searchable text of SiSU document objects';}, + %{COMMENT ON COLUMN doc_objects.lid IS 'unique';}, - %{COMMENT ON COLUMN documents.metadata_tid - IS 'tie to title in metadata';}, - %{COMMENT ON COLUMN documents.lev_an + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text';}, + %{COMMENT ON COLUMN doc_objects.lev_an IS 'doc level A-C 1-6';}, - %{COMMENT ON COLUMN documents.lev + %{COMMENT ON COLUMN doc_objects.lev IS 'doc level 1-6 \d\~';}, - %{COMMENT ON COLUMN documents.seg + %{COMMENT ON COLUMN doc_objects.seg IS 'segment name from level number 4 (lv 1)';}, - %{COMMENT ON COLUMN documents.ocn + %{COMMENT ON COLUMN doc_objects.ocn IS 'object citation number';}, - %{COMMENT ON COLUMN documents.en_a + %{COMMENT ON COLUMN doc_objects.en_a IS 'first endnote number in text object (eg. NULL or 34) (used with en_z to create range)';}, - %{COMMENT ON COLUMN documents.en_z + %{COMMENT ON COLUMN doc_objects.en_z IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a to create range)';}, - %{COMMENT ON COLUMN documents.en_a_asterisk + %{COMMENT ON COLUMN doc_objects.en_a_asterisk IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_asterisk to create range)';}, - %{COMMENT ON COLUMN documents.en_z_asterisk + %{COMMENT ON COLUMN doc_objects.en_z_asterisk IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_asterisk to create range)';}, - %{COMMENT ON COLUMN documents.en_a_plus + %{COMMENT ON COLUMN doc_objects.en_a_plus IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_plus to create range)';}, - %{COMMENT ON COLUMN documents.en_z_plus + %{COMMENT ON COLUMN doc_objects.en_z_plus IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_plus to create range)';}, - %{COMMENT ON COLUMN documents.types + %{COMMENT ON COLUMN doc_objects.types IS 'document types seg scroll';}, - %{COMMENT ON COLUMN documents.clean + %{COMMENT ON COLUMN doc_objects.clean IS 'text object - substantive text: clean, stripped of markup';}, - %{COMMENT ON COLUMN documents.body + %{COMMENT ON COLUMN doc_objects.body IS 'text object - substantive text: light html markup';}, - %{COMMENT ON COLUMN documents.lev1 + %{COMMENT ON COLUMN doc_objects.lev1 IS 'document structure, level number 1';}, - %{COMMENT ON COLUMN documents.lev2 + %{COMMENT ON COLUMN doc_objects.lev2 IS 'document structure, level number 2';}, - %{COMMENT ON COLUMN documents.lev3 + %{COMMENT ON COLUMN doc_objects.lev3 IS 'document structure, level number 3';}, - %{COMMENT ON COLUMN documents.lev4 + %{COMMENT ON COLUMN doc_objects.lev4 IS 'document structure, level number 4';}, - %{COMMENT ON COLUMN documents.lev5 + %{COMMENT ON COLUMN doc_objects.lev5 IS 'document structure, level number 5';}, - %{COMMENT ON COLUMN documents.lev6 + %{COMMENT ON COLUMN doc_objects.lev6 IS 'document structure, level number 6';}, - %{COMMENT ON COLUMN documents.t_of + %{COMMENT ON COLUMN doc_objects.t_of IS 'document structure, type of object (object is of)';}, - %{COMMENT ON COLUMN documents.t_is + %{COMMENT ON COLUMN doc_objects.t_is IS 'document structure, object is';}, - %{COMMENT ON COLUMN documents.node + %{COMMENT ON COLUMN doc_objects.node IS 'document structure, object node if heading';}, - %{COMMENT ON COLUMN documents.parent + %{COMMENT ON COLUMN doc_objects.parent IS 'document structure, object parent (is a heading)';} ] conn_execute_array(sql_arr) @@ -501,15 +512,15 @@ module SiSU_DB_create IS 'endnote substantive content';}, %{COMMENT ON COLUMN endnotes.ocn IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, - %{COMMENT ON COLUMN documents.metadata_tid - IS 'tie to title in metadata - unique for each document';} + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document';} ] conn_execute_array(sql_arr) end def endnotes_asterisk sql_arr=[ %{COMMENT ON Table endnotes_asterisk - IS 'contains searchable text of SiSU documents endnotes asterisk';}, + IS 'contains searchable text of SiSU documents endnotes marked with asterisk';}, %{COMMENT ON COLUMN endnotes_asterisk.nid IS 'unique';}, %{COMMENT ON COLUMN endnotes_asterisk.document_lid @@ -522,15 +533,15 @@ module SiSU_DB_create IS 'endnote substantive content';}, %{COMMENT ON COLUMN endnotes_asterisk.ocn IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, - %{COMMENT ON COLUMN documents.metadata_tid - IS 'tie to title in metadata - unique for each document';} + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document';} ] conn_execute_array(sql_arr) end def endnotes_plus sql_arr=[ %{COMMENT ON Table endnotes_plus - IS 'contains searchable text of SiSU documents endnotes';}, + IS 'contains searchable text of SiSU documents endnotes marked with plus';}, %{COMMENT ON COLUMN endnotes_plus.nid IS 'unique';}, %{COMMENT ON COLUMN endnotes_plus.document_lid @@ -543,8 +554,8 @@ module SiSU_DB_create IS 'endnote substantive content';}, %{COMMENT ON COLUMN endnotes_plus.ocn IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, - %{COMMENT ON COLUMN documents.metadata_tid - IS 'tie to title in metadata - unique for each document';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document';}, ] conn_execute_array(sql_arr) end @@ -552,8 +563,8 @@ module SiSU_DB_create sql_arr=[ %{COMMENT ON Table urls IS 'contains base url links to different SiSU output';}, - %{COMMENT ON COLUMN documents.metadata_tid - IS 'tie to title in metadata - unique for each document, the mapping of rows is one to one';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document, the mapping of rows is one to one';}, %{COMMENT ON COLUMN urls.plaintext IS 'plaintext utf-8';}, %{COMMENT ON COLUMN urls.html_toc diff --git a/lib/sisu/v2/db_drop.rb b/lib/sisu/v2/db_drop.rb index 673c5f8f..7189da56 100644 --- a/lib/sisu/v2/db_drop.rb +++ b/lib/sisu/v2/db_drop.rb @@ -70,8 +70,8 @@ module SiSU_DB_drop cascade='CASCADE' end @drop_table=[ - "DROP TABLE metadata #{cascade};", - "DROP TABLE documents #{cascade};", + "DROP TABLE metadata_and_text #{cascade};", + "DROP TABLE doc_objects #{cascade};", "DROP TABLE urls #{cascade};", "DROP TABLE endnotes #{cascade};", "DROP TABLE endnotes_asterisk #{cascade};", @@ -84,15 +84,35 @@ module SiSU_DB_drop msg_sqlite="as not all disk space is recovered after dropping the database << #{@db_info.sqlite.db} >>, you may be better off deleting the file, and recreating it as necessary" case @sql_type when /sqlite/ - @conn.transaction - @drop_table.each do |d| - @conn.execute(d) - end - @conn.commit puts msg_sqlite ans=@ans.response?('remove sql database?') - if ans and File.exist?(@db_info.sqlite.db) + if ans \ + and File.exist?(@db_info.sqlite.db) + @conn.close File.unlink(@db_info.sqlite.db) + db=SiSU_Env::Info_db.new + conn=db.sqlite.conn_sqlite3 + sdb=SiSU_DB_DBI::Create.new(@opt,conn,@db_info,@sql_type) + sdb_index=SiSU_DB_DBI::Index.new(@opt,conn,@db_info,@sql_type) + sdb.output_dir? + begin + sdb.create_db + sdb.create_table.metadata_and_text + sdb.create_table.doc_objects + sdb.create_table.endnotes + sdb.create_table.endnotes_asterisk + sdb.create_table.endnotes_plus + sdb.create_table.urls + sdb_index.create_indexes + rescue; SiSU_Errors::Info_error.new($!,$@,'-D').error; @sdb.output_dir? + end + exit + else + @conn.transaction + @drop_table.each do |d| + @conn.execute(d) + end + @conn.commit end else @drop_table.each do |d| @@ -115,33 +135,33 @@ module SiSU_DB_drop end def indexes #% drop all indexes #@conn.do(%{ - # DROP INDEX object_nr ON documents(ocn); - # DROP INDEX body ON documents(body); - # DROP INDEX clean ON documents(clean); - # DROP INDEX lev1 ON documents(lev1); - # DROP INDEX lev2 ON documents(lev2); - # DROP INDEX lev3 ON documents(lev3); - # DROP INDEX lev4 ON documents(lev4); - # DROP INDEX lev5 ON documents(lev5); - # DROP INDEX lev6 ON documents(lev6); + # DROP INDEX object_nr ON doc_objects(ocn); + # DROP INDEX body ON doc_objects(body); + # DROP INDEX clean ON doc_objects(clean); + # DROP INDEX lev1 ON doc_objects(lev1); + # DROP INDEX lev2 ON doc_objects(lev2); + # DROP INDEX lev3 ON doc_objects(lev3); + # DROP INDEX lev4 ON doc_objects(lev4); + # DROP INDEX lev5 ON doc_objects(lev5); + # DROP INDEX lev6 ON doc_objects(lev6); # DROP INDEX endnote_nr ON endnotes(nr); # DROP INDEX endnote ON endnotes(body); - # DROP INDEX title ON metadata(title); - # DROP INDEX filename ON metadata(filename) + # DROP INDEX title ON metadata_and_text(title); + # DROP INDEX filename ON metadata_and_text(filename) # /* - # DROP INDEX object_nr ON documents(ocn) CASCADE; - # DROP INDEX body ON documents(body) CASCADE; - # DROP INDEX clean ON documents(clean) CASCADE; - # DROP INDEX lev1 ON documents(lev1) CASCADE; - # DROP INDEX lev2 ON documents(lev2) CASCADE; - # DROP INDEX lev3 ON documents(lev3) CASCADE; - # DROP INDEX lev4 ON documents(lev4) CASCADE; - # DROP INDEX lev5 ON documents(lev5) CASCADE; - # DROP INDEX lev6 ON documents(lev6) CASCADE; + # DROP INDEX object_nr ON doc_objects(ocn) CASCADE; + # DROP INDEX body ON doc_objects(body) CASCADE; + # DROP INDEX clean ON doc_objects(clean) CASCADE; + # DROP INDEX lev1 ON doc_objects(lev1) CASCADE; + # DROP INDEX lev2 ON doc_objects(lev2) CASCADE; + # DROP INDEX lev3 ON doc_objects(lev3) CASCADE; + # DROP INDEX lev4 ON doc_objects(lev4) CASCADE; + # DROP INDEX lev5 ON doc_objects(lev5) CASCADE; + # DROP INDEX lev6 ON doc_objects(lev6) CASCADE; # DROP INDEX endnote_nr ON endnotes(nr) CASCADE; # DROP INDEX endnote ON endnotes(body) CASCADE; - # DROP INDEX title ON metadata(title) CASCADE; - # DROP INDEX filename ON metadata(filename) CASCADE + # DROP INDEX title ON metadata_and_text(title) CASCADE; + # DROP INDEX filename ON metadata_and_text(filename) CASCADE # */ #}) end diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb index 1f795e68..5610a1d0 100644 --- a/lib/sisu/v2/db_import.rb +++ b/lib/sisu/v2/db_import.rb @@ -60,9 +60,10 @@ module SiSU_DB_import require "#{SiSU_lib}/db_columns" # db_columns.rb require "#{SiSU_lib}/db_load_tuple" # db_load_tuple.rb + require "#{SiSU_lib}/db_sqltxt" # db_sqltxt.rb require "#{SiSU_lib}/shared_html_lite" # shared_html_lite.rb require 'sqlite3' - class Import < SiSU_DB_columns::Column_size + class Import < SiSU_DB_text::Prepare include SiSU_Param include SiSU_Screen @@dl=nil @@ -86,7 +87,7 @@ module SiSU_DB_import @counter={} @db=SiSU_Env::Info_db.new @driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) ? true : false - sql='SELECT MAX(lid) FROM documents' + sql='SELECT MAX(lid) FROM doc_objects' begin @col[:lid] ||=0 @col[:lid]=if @driver_sqlite3 @@ -122,7 +123,7 @@ module SiSU_DB_import tell.print_grey if @opt.cmd =~/v/ file_exist=if @sql_type=~/sqlite/; nil else - @conn.select_one(%{ SELECT metadata.tid FROM metadata WHERE metadata.filename ~ '#{@opt.fns}'; }) + @conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; }) end if (@sql_type!~/sqlite/ and not file_exist) \ or @sql_type=~/sqlite/ @@ -192,28 +193,6 @@ module SiSU_DB_import end end end - def special_character_escape(str) - str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") - str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"
\n") - str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check - str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2') - str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') - str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1') - str - end - def strip_markup(str) #define rules, make same as in dal clean - str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') - str.gsub!(/(?: \\;|#{Mx[:nbsp]})+/,' ') - str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1') #tables - str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ') #tables - str.gsub!(/#{Mx[:tc_p]}/u,' ') #tables tidy later - str.gsub!(/<.+?>/,'') - str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] ') # else image names found in search - str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]') # else image names found in search - str.gsub!(/\s\s+/,' ') - str.strip! - str - end def pf_db_import_transaction_open end def pf_db_import_transaction_close @@ -222,12 +201,23 @@ module SiSU_DB_import print %{ #{@cX.grey}import documents dbi_unit #{@cX.off} } unless @opt.cmd =~/q/ @tp={} @md=SiSU_Param::Parameters.new(@opt).get +#% sisutxt & fulltxt + if FileTest.exist?(@md.fns) + txt_arr=IO.readlines(@md.fns,'') + src=txt_arr.join("\n") + src=special_character_escape(src) + @tp[:sisutxt_f],@tp[:sisutxt_i]='sisutxt, ',"'#{src}', " + txt=clean_searchable_text(txt_arr) + #special_character_escape(txt) + @tp[:fulltxt_f],@tp[:fulltxt_i]='fulltxt, ',"'#{txt}', " + end +#% title if defined? @md.title.full \ and @md.title.full=~/\S+/ # DublinCore 1 - title - @tp[:title]=@md.title.full - special_character_escape(@tp[:title]) - @tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', " - sql='SELECT MAX(tid) FROM metadata' + #@tp[:title]=@md.title.full + #special_character_escape(@tp[:title]) + #@tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', " + sql='SELECT MAX(tid) FROM metadata_and_text' begin @@id_t ||=0 id_t=if @driver_sqlite3 @@ -242,220 +232,9 @@ module SiSU_DB_import @@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks @title: puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}} unless @opt.cmd =~/q/ end - if defined? @md.creator.author \ - and @md.creator.author=~/\S+/ # DublinCore 2 - creator/author (author) - txt=@md.creator.author #dc - special_character_escape(txt) - @tp[:creator_f],@tp[:creator_i]='creator, ',"'#{txt}', " - end - if defined? @md.creator.contributor \ - and @md.creator.contributor=~/\S+/ # DublinCore 6 - contributor - txt=@md.creator.contributor #dc - special_character_escape(txt) - @tp[:contributor_f],@tp[:contributor_i]='contributor, ',"'#{txt}', " - end - if defined? @md.creator.translator \ - and @md.creator.translator=~/\S+/ - txt=@md.creator.translator - special_character_escape(txt) - @tp[:translator_f],@tp[:translator_i]='translator, ',"'#{txt}', " - end - if defined? @md.creator.illustrator \ - and @md.creator.illustrator=~/\S+/ - txt=@md.creator.illustrator - special_character_escape(txt) - @tp[:illustrator_f],@tp[:illustrator_i]='illustrator, ',"'#{txt}', " - end - if defined? @md.publisher \ - and @md.publisher - txt=@md.publisher #dc - special_character_escape(txt) - @tp[:publisher_f],@tp[:publisher_i]='publisher, ',"'#{txt}', " - end - if defined? @md.creator.prepared_by \ - and @md.creator.prepared_by=~/\S+/ - txt=@md.creator.prepared_by - special_character_escape(txt) - @tp[:prepared_by_f],@tp[:prepared_by_i]='prepared_by, ',"'#{txt}', " - end - if defined? @md.creator.digitized_by \ - and @md.creator.digitized_by=~/\S+/ - txt=@md.creator.digitized_by - special_character_escape(txt) - @tp[:digitized_by_f],@tp[:digitized_by_i]='digitized_by, ',"'#{txt}', " - end - if defined? @md.classify.subject \ - and @md.classify.subject=~/\S+/ # DublinCore 3 - subject (us library of congress, eric or udc, or schema???) - txt=@md.classify.subject #dc - special_character_escape(txt) - @tp[:subject_f],@tp[:subject_i]='subject, ',"'#{txt}', " - end - if defined? @md.notes.description \ - and @md.notes.description=~/\S+/ # DublinCore 4 - description - txt=@md.notes.description #dc - special_character_escape(txt) - @tp[:description_f],@tp[:description_i]='description, ',"'#{txt}', " - end - if defined? @md.classify.subject \ - and @md.classify.subject=~/\S+/ # DublinCore 8 - type (genre eg. report, convention etc) - txt=@md.classify.abstract - special_character_escape(txt) - @tp[:abstract_f],@tp[:abstract_i]='abstract, ',"'#{txt}', " - end - if defined? @md.rights.all \ - and @md.rights.all=~/\S+/ # DublinCore 15 - rights - txt=@md.rights.all #dc - special_character_escape(txt) - @tp[:rights_f],@tp[:rights_i]='rights, ',"'#{txt}', " - end - if defined? @md.date.published \ - and @md.date.published=~/\S+/ # DublinCore 7 - date year-mm-dd - txt=@md.date.published #dc - special_character_escape(txt) - @tp[:date_f],@tp[:date_i]='date, ',"'#{txt}', " - end - if defined? @md.date.created \ - and @md.date.created=~/\S+/ - txt=@md.date.created #dc - special_character_escape(txt) - @tp[:date_created_f],@tp[:date_created_i]='date_created, ',"'#{txt}', " - end - if defined? @md.date.issued \ - and @md.date.issued=~/\S+/ - txt=@md.date.issued #dc - special_character_escape(txt) - @tp[:date_issued_f],@tp[:date_issued_i]='date_issued, ',"'#{txt}', " - end - if defined? @md.date.available \ - and @md.date.available=~/\S+/ - txt=@md.date.available #dc - special_character_escape(txt) - @tp[:date_available_f],@tp[:date_available_i]='date_available, ',"'#{txt}', " - end - if defined? @md.date.modified \ - and @md.date.modified=~/\S+/ - txt=@md.date.modified #dc - special_character_escape(txt) - @tp[:date_modified_f],@tp[:date_modified_i]='date_modified, ',"'#{txt}', " - end - if defined? @md.date.valid \ - and @md.date.valid=~/\S+/ - txt=@md.date.valid #dc - special_character_escape(txt) - @tp[:date_valid_f],@tp[:date_valid_i]='date_valid, ',"'#{txt}', " - end - if defined? @md.title.language \ - and @md.title.language=~/\S+/ - txt=@md.title.language - special_character_escape(txt) - @tp[:language_f],@tp[:language_i]='language, ',"'#{txt}', " - end - if defined? @md.original.language \ - and @md.original.language=~/\S+/ - txt=@md.original.language - special_character_escape(txt) - @tp[:language_original_f],@tp[:language_original_i]='language_original, ',"'#{txt}', " - end - if defined? @md.classify.format \ - and @md.classify.format=~/\S+/ # DublinCore 9 - format (use your mime type) - txt=@md.classify.format #dc - special_character_escape(txt) - @tp[:format_f],@tp[:format_i]='format, ',"'#{txt}', " - end - if defined? @md.classify.identifier \ - and @md.classify.identifier=~/\S+/ # DublinCore 10 - identifier (your identifier, could use urn which is free) - txt=@md.classify.identifier #dc - special_character_escape(txt) - @tp[:identifier_f],@tp[:identifier_i]='identifier, ',"'#{txt}', " - end - if defined? @md.original.source \ - and @md.original.source=~/\S+/ # DublinCore 11 - source (document source) - txt=@md.original.source #dc - special_character_escape(txt) - @tp[:source_f],@tp[:source_i]='source, ',"'#{txt}', " - end - if defined? @md.classify.relation \ - and @md.classify.relation=~/\S+/ # DublinCore 13 - relation - txt=@md.classify.relation #dc - special_character_escape(txt) - @tp[:relation_f],@tp[:relation_i]='relation, ',"'#{txt}', " - end - if defined? @md.classify.coverage \ - and @md.classify.coverage=~/\S+/ # DublinCore 14 - coverage - txt=@md.classify.coverage #dc - special_character_escape(txt) - @tp[:coverage_f],@tp[:coverage_i]='coverage, ',"'#{txt}', " - end - if defined? @md.classify.keywords \ - and @md.classify.keywords=~/\S+/ - txt=@md.classify.keywords - special_character_escape(txt) - @tp[:keywords_f],@tp[:keywords_i]='keywords, ',"'#{txt}', " - end - if defined? @md.notes.comment \ - and @md.notes.comment=~/\S+/ - txt=@md.notes.comments - special_character_escape(txt) - @tp[:comments_f],@tp[:comments_i]='comments, ',"'#{txt}', " - end - if defined? @md.classify.loc \ - and @md.classify.loc=~/\S+/ - txt=@md.classify.loc - special_character_escape(txt) - @tp[:cls_loc_f],@tp[:cls_loc_i]='cls_loc, ',"'#{txt}', " - end - if defined? @md.classify.dewey \ - and @md.classify.dewey=~/\S+/ - txt=@md.classify.dewey - special_character_escape(txt) - @tp[:cls_dewey_f],@tp[:cls_dewey_i]='cls_dewey, ',"'#{txt}', " - end - if defined? @md.classify.pg \ - and @md.classify.pg=~/\S+/ - txt=@md.classify.pg - special_character_escape(txt) - @tp[:cls_pg_f],@tp[:cls_pg_i]='cls_pg, ',"'#{txt}', " - end - if defined? @md.classify.isbn \ - and @md.classify.isbn=~/\S+/ - txt=@md.classify.isbn - special_character_escape(txt) - @tp[:cls_isbn_f],@tp[:cls_isbn_i]='cls_isbn, ',"'#{txt}', " - end - if defined? @md.notes.prefix_a \ - and @md.notes.prefix_a=~/\S+/ - txt=@md.notes.prefix_a - special_character_escape(txt) - @tp[:prefix_a_f],@tp[:prefix_a_i]='prefix_a, ',"'#{txt}', " - end - if defined? @md.notes.prefix_b \ - and @md.notes.prefix_b=~/\S+/ - txt=@md.notes.prefix_b - special_character_escape(txt) - @tp[:prefix_b_f],@tp[:prefix_b_i]='prefix_b, ',"'#{txt}', " - end - if defined? @md.fns \ - and @md.fns=~/\S+/ - txt=@md.fns - special_character_escape(txt) - @tp[:fns_f],@tp[:fns_i]="filename, ","'#{txt}', " - end - if @md.wc_words; txt=@md.wc_words - @tp[:wc_words_f],@tp[:wc_words_i]='wc_words, ',"'#{txt}', " - end - if defined? @md.dgst \ - and @md.dgst.class==Array - txt=@md.dgst[1] - @tp[:dgst_f],@tp[:dgst_i]='dgst, ',"'#{txt}', " - end - if @md.sc_date; txt=@md.sc_date - @tp[:sc_date_f],@tp[:sc_date_i]='sc_date, ',"'#{txt}', " - end - if @md.generated; txt=@md.generated - @tp[:generated_f],@tp[:generated_i]='generated, ',"'#{@txt}', " - end + ################ CLEAR ############## SiSU_DB_DBI::Test.new(self,@opt).verify #% import title names, filenames (tuple) - t=SiSU_DB_tuple::Load_metadata.new(@conn,@tp,@@id_t,@opt,@file) + t=SiSU_DB_tuple::Load_metadata.new(@conn,@@id_t,@md,@file) tuple=t.tuple tuple end @@ -482,13 +261,7 @@ module SiSU_DB_import and data.ln.inspect=~/[123]/ @col[:lev],txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.ln,data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'','' @col[:lid]+=1 - if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ - endnotes(txt).range - @en << endnotes(txt).standard if txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ - @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ - @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_o]}/ - txt=endnotes(txt).clean_text - end + txt=endnotes(txt).extract_any @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_minus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup @@ -521,13 +294,7 @@ module SiSU_DB_import end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" - if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ - endnotes(txt).range - @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ - @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ - @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ - txt=endnotes(txt).clean_text(@base_url) - end + txt=endnotes(txt).extract_any @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup @@ -553,13 +320,7 @@ module SiSU_DB_import end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" - if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ - endnotes(txt).range - @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ - @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ - @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ - txt=endnotes(txt).clean_text(@base_url) - end + txt=endnotes(txt).extract_any @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup @@ -585,13 +346,7 @@ module SiSU_DB_import end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" - if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ - endnotes(txt).range - @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ - @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ - @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ - txt=endnotes(txt).clean_text(@base_url) - end + txt=endnotes(txt).extract_any @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup @@ -613,15 +368,9 @@ module SiSU_DB_import end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" - if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ - endnotes(txt).range - @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ - @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ - @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ - txt=endnotes(txt).clean_text(@base_url) - end + txt=endnotes(txt).extract_any if @sql_type=~/pg/ \ - and txt.size > (document_clean - 1) #% examine pg build & remove limitation + and txt.size > (SiSU_DB_columns::Column_size.new.document_clean - 1) #% examine pg build & remove limitation puts "\n\nTOO LARGE (TXT - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nTEXT BODY\n#{@col[:body].size} object #{@col[:ocn]} -> #{@col[:body].slice(0..500)}") @@ -660,7 +409,7 @@ module SiSU_DB_import #special_character_escape(body) #special_character_escape(txt) strip_markup(txt) - if txt.size > (endnote_clean - 1) + if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") @@ -698,7 +447,7 @@ module SiSU_DB_import special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) strip_markup(txt) - if txt.size > (endnote_clean - 1) + if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") @@ -736,7 +485,7 @@ module SiSU_DB_import special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) strip_markup(txt) - if txt.size > (endnote_clean - 1) + if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") @@ -772,6 +521,16 @@ module SiSU_DB_import end def endnotes(txt) @txt=txt + def extract_any + if @txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ + endnotes(@txt).range + @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ + @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ + @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_o]}/ + @txt=endnotes(@txt).clean_text + end + @txt + end def standard x=if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/; @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) else nil diff --git a/lib/sisu/v2/db_indexes.rb b/lib/sisu/v2/db_indexes.rb index a64fb362..3cbcc20c 100644 --- a/lib/sisu/v2/db_indexes.rb +++ b/lib/sisu/v2/db_indexes.rb @@ -73,15 +73,15 @@ module SiSU_DB_index def base print "\n create documents common indexes\n" unless @opt.cmd =~/q/ sql_arr=[ - %{CREATE INDEX object_nr ON documents(ocn);}, - %{CREATE INDEX digest_clean ON documents(digest_clean);}, - %{CREATE INDEX digest_all ON documents(digest_all);}, - %{CREATE INDEX lev1 ON documents(lev1);}, - %{CREATE INDEX lev2 ON documents(lev2);}, - %{CREATE INDEX lev3 ON documents(lev3);}, - %{CREATE INDEX lev4 ON documents(lev4);}, - %{CREATE INDEX lev5 ON documents(lev5);}, - %{CREATE INDEX lev6 ON documents(lev6);}, + %{CREATE INDEX object_nr ON doc_objects(ocn);}, + %{CREATE INDEX digest_clean ON doc_objects(digest_clean);}, + %{CREATE INDEX digest_all ON doc_objects(digest_all);}, + %{CREATE INDEX lev1 ON doc_objects(lev1);}, + %{CREATE INDEX lev2 ON doc_objects(lev2);}, + %{CREATE INDEX lev3 ON doc_objects(lev3);}, + %{CREATE INDEX lev4 ON doc_objects(lev4);}, + %{CREATE INDEX lev5 ON doc_objects(lev5);}, + %{CREATE INDEX lev6 ON doc_objects(lev6);}, %{CREATE INDEX endnote_nr ON endnotes(nr);}, %{CREATE INDEX digest_en ON endnotes(digest_clean);}, %{CREATE INDEX endnote_nr_asterisk ON endnotes_asterisk(nr);}, @@ -90,15 +90,15 @@ module SiSU_DB_index %{CREATE INDEX endnote_nr_plus ON endnotes_plus(nr);}, %{CREATE INDEX endnote_plus ON endnotes_plus(clean);}, %{CREATE INDEX digest_en_plus ON endnotes_plus(digest_clean);}, - %{CREATE INDEX title ON metadata(title);}, - %{CREATE INDEX filename ON metadata(filename)}, + %{CREATE INDEX title ON metadata_and_text(title);}, + %{CREATE INDEX filename ON metadata_and_text(filename)}, ] conn_execute_array(sql_arr) end def text print "\n create documents text indexes\n" unless @opt.cmd =~/q/ sql_arr=[ - %{CREATE INDEX clean ON documents(clean);}, + %{CREATE INDEX clean ON doc_objects(clean);}, %{CREATE INDEX endnote ON endnotes(clean);} ] conn_execute_array(sql_arr) diff --git a/lib/sisu/v2/db_load_tuple.rb b/lib/sisu/v2/db_load_tuple.rb index 2fc3a455..cc00b74a 100644 --- a/lib/sisu/v2/db_load_tuple.rb +++ b/lib/sisu/v2/db_load_tuple.rb @@ -58,6 +58,7 @@ =end module SiSU_DB_tuple + require "#{SiSU_lib}/db_columns" # db_columns.rb class Load_documents require "#{SiSU_lib}/param" # param.rb include SiSU_Param @@ -78,10 +79,10 @@ module SiSU_DB_tuple end def tuple #% import line sql_entry=if @col[:en_a] - "INSERT INTO documents (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + + "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" else - "INSERT INTO documents (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " + + "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" end if @opt.cmd =~/M/ @@ -113,13 +114,174 @@ module SiSU_DB_tuple sql_entry end end - class Load_metadata - def initialize(conn,tp,id,opt,file) - @conn,@tp,@id,@opt,@file=conn,tp,id,opt,file + class Load_metadata #< SiSU_DB_columns::Columns + def initialize(conn,id,md,file) + @conn,@id,@opt,@file=conn,id,md,file + @tp=SiSU_DB_columns::Columns.new(md) end def tuple - sql_entry="INSERT INTO metadata (#{@tp[:fns_f]} #{@tp[:suffix_f]} #{@tp[:title_f]} #{@tp[:subtitle_f]} #{@tp[:creator_f]} #{@tp[:illustrator_f]} #{@tp[:translator_f]} #{@tp[:subject_f]} #{@tp[:description_f]} #{@tp[:publisher_f]} #{@tp[:contributor_f]} #{@tp[:prepared_by_f]} #{@tp[:digitized_by_f]} #{@tp[:date_f]} #{@tp[:date_created_f]} #{@tp[:date_issued_f]} #{@tp[:date_valid_f]} #{@tp[:date_available_f]} #{@tp[:date_modified_f]} #{@tp[:type_f]} #{@tp[:format_f]} #{@tp[:identifier_f]} #{@tp[:source_f]} #{@tp[:language_f]} #{@tp[:language_original_f]} #{@tp[:relation_f]} #{@tp[:coverage_f]} #{@tp[:rights_f]} #{@tp[:copyright_f]} #{@tp[:owner_f]} #{@tp[:keywords_f]} #{@tp[:abstract_f]} #{@tp[:comment_f]} #{@tp[:loc_f]} #{@tp[:dewey_f]} #{@tp[:isbn_f]} #{@tp[:pg_f]} #{@tp[:prefix_a_f]} #{@tp[:prefix_b_f]} tid) " + - "VALUES (#{@tp[:fns_i]} #{@tp[:suffix_i]} #{@tp[:title_i]} #{@tp[:subtitle_i]} #{@tp[:creator_i]} #{@tp[:illustrator_i]} #{@tp[:translator_i]} #{@tp[:subject_i]} #{@tp[:description_i]} #{@tp[:publisher_i]} #{@tp[:contributor_i]} #{@tp[:prepared_by_i]} #{@tp[:digitized_by_i]} #{@tp[:date_i]} #{@tp[:date_created_i]} #{@tp[:date_issued_i]} #{@tp[:date_valid_i]} #{@tp[:date_available_i]} #{@tp[:date_modified_i]} #{@tp[:type_i]} #{@tp[:format_i]} #{@tp[:identifier_i]} #{@tp[:source_i]} #{@tp[:language_i]} #{@tp[:language_original_i]} #{@tp[:relation_i]} #{@tp[:coverage_i]} #{@tp[:rights_i]} #{@tp[:copyright_i]} #{@tp[:owner_i]} #{@tp[:keywords_i]} #{@tp[:abstract_i]} #{@tp[:comment_i]} #{@tp[:loc_i]} #{@tp[:dewey_i]} #{@tp[:isbn_i]} #{@tp[:pg_i]} #{@tp[:prefix_a_i]} #{@tp[:prefix_b_i]} #{@id});" + sql_entry="INSERT INTO metadata_and_text ( +#{@tp.column.title.tuple[0]} +#{@tp.column.title_main.tuple[0]} +#{@tp.column.title_sub.tuple[0]} +#{@tp.column.title_short.tuple[0]} +#{@tp.column.title_edition.tuple[0]} +#{@tp.column.title_note.tuple[0]} +#{@tp.column.title_language.tuple[0]} +#{@tp.column.title_language_char.tuple[0]} +#{@tp.column.creator_author.tuple[0]} +#{@tp.column.creator_author_honorific.tuple[0]} +#{@tp.column.creator_author_nationality.tuple[0]} +#{@tp.column.creator_contributor.tuple[0]} +#{@tp.column.creator_illustrator.tuple[0]} +#{@tp.column.creator_photographer.tuple[0]} +#{@tp.column.creator_translator.tuple[0]} +#{@tp.column.creator_prepared_by.tuple[0]} +#{@tp.column.creator_digitized_by.tuple[0]} +#{@tp.column.creator_audio.tuple[0]} +#{@tp.column.creator_video.tuple[0]} +#{@tp.column.language_document.tuple[0]} +#{@tp.column.language_document_char.tuple[0]} +#{@tp.column.language_original.tuple[0]} +#{@tp.column.language_original_char.tuple[0]} +#{@tp.column.date_added_to_site.tuple[0]} +#{@tp.column.date_available.tuple[0]} +#{@tp.column.date_created.tuple[0]} +#{@tp.column.date_issued.tuple[0]} +#{@tp.column.date_modified.tuple[0]} +#{@tp.column.date_published.tuple[0]} +#{@tp.column.date_valid.tuple[0]} +#{@tp.column.date_translated.tuple[0]} +#{@tp.column.date_original_publication.tuple[0]} +#{@tp.column.date_generated.tuple[0]} +#{@tp.column.publisher.tuple[0]} +#{@tp.column.original_publisher.tuple[0]} +#{@tp.column.original_language.tuple[0]} +#{@tp.column.original_language_char.tuple[0]} +#{@tp.column.original_source.tuple[0]} +#{@tp.column.original_institution.tuple[0]} +#{@tp.column.original_nationality.tuple[0]} +#{@tp.column.rights_all.tuple[0]} +#{@tp.column.rights_copyright_text.tuple[0]} +#{@tp.column.rights_copyright_translation.tuple[0]} +#{@tp.column.rights_copyright_illustrations.tuple[0]} +#{@tp.column.rights_copyright_photographs.tuple[0]} +#{@tp.column.rights_copyright_preparation.tuple[0]} +#{@tp.column.rights_copyright_digitization.tuple[0]} +#{@tp.column.rights_copyright_audio.tuple[0]} +#{@tp.column.rights_copyright_video.tuple[0]} +#{@tp.column.rights_license.tuple[0]} +#{@tp.column.classify_topic_register.tuple[0]} +#{@tp.column.classify_subject.tuple[0]} +#{@tp.column.classify_type.tuple[0]} +#{@tp.column.classify_loc.tuple[0]} +#{@tp.column.classify_dewey.tuple[0]} +#{@tp.column.classify_pg.tuple[0]} +#{@tp.column.classify_isbn.tuple[0]} +#{@tp.column.classify_format.tuple[0]} +#{@tp.column.classify_identifier.tuple[0]} +#{@tp.column.classify_relation.tuple[0]} +#{@tp.column.classify_coverage.tuple[0]} +#{@tp.column.classify_keywords.tuple[0]} +#{@tp.column.notes_abstract.tuple[0]} +#{@tp.column.notes_comment.tuple[0]} +#{@tp.column.notes_description.tuple[0]} +#{@tp.column.notes_history.tuple[0]} +#{@tp.column.notes_prefix.tuple[0]} +#{@tp.column.notes_prefix_a.tuple[0]} +#{@tp.column.notes_prefix_b.tuple[0]} +#{@tp.column.notes_suffix.tuple[0]} +#{@tp.column.filename.tuple[0]} +#{@tp.column.sisutxt.tuple[0]} +#{@tp.column.fulltext.tuple[0]} +#{@tp.column.word_count.tuple[0]} +#{@tp.column.digest.tuple[0]} +#{@tp.column.skin_name.tuple[0]} +#{@tp.column.skin.tuple[0]} +#{@tp.column.links.tuple[0]} +tid) +" + + "VALUES ( +#{@tp.column.title.tuple[1]} +#{@tp.column.title_main.tuple[1]} +#{@tp.column.title_sub.tuple[1]} +#{@tp.column.title_short.tuple[1]} +#{@tp.column.title_edition.tuple[1]} +#{@tp.column.title_note.tuple[1]} +#{@tp.column.title_language.tuple[1]} +#{@tp.column.title_language_char.tuple[1]} +#{@tp.column.creator_author.tuple[1]} +#{@tp.column.creator_author_honorific.tuple[1]} +#{@tp.column.creator_author_nationality.tuple[1]} +#{@tp.column.creator_contributor.tuple[1]} +#{@tp.column.creator_illustrator.tuple[1]} +#{@tp.column.creator_photographer.tuple[1]} +#{@tp.column.creator_translator.tuple[1]} +#{@tp.column.creator_prepared_by.tuple[1]} +#{@tp.column.creator_digitized_by.tuple[1]} +#{@tp.column.creator_audio.tuple[1]} +#{@tp.column.creator_video.tuple[1]} +#{@tp.column.language_document.tuple[1]} +#{@tp.column.language_document_char.tuple[1]} +#{@tp.column.language_original.tuple[1]} +#{@tp.column.language_original_char.tuple[1]} +#{@tp.column.date_added_to_site.tuple[1]} +#{@tp.column.date_available.tuple[1]} +#{@tp.column.date_created.tuple[1]} +#{@tp.column.date_issued.tuple[1]} +#{@tp.column.date_modified.tuple[1]} +#{@tp.column.date_published.tuple[1]} +#{@tp.column.date_valid.tuple[1]} +#{@tp.column.date_translated.tuple[1]} +#{@tp.column.date_original_publication.tuple[1]} +#{@tp.column.date_generated.tuple[1]} +#{@tp.column.publisher.tuple[1]} +#{@tp.column.original_publisher.tuple[1]} +#{@tp.column.original_language.tuple[1]} +#{@tp.column.original_language_char.tuple[1]} +#{@tp.column.original_source.tuple[1]} +#{@tp.column.original_institution.tuple[1]} +#{@tp.column.original_nationality.tuple[1]} +#{@tp.column.rights_all.tuple[1]} +#{@tp.column.rights_copyright_text.tuple[1]} +#{@tp.column.rights_copyright_translation.tuple[1]} +#{@tp.column.rights_copyright_illustrations.tuple[1]} +#{@tp.column.rights_copyright_photographs.tuple[1]} +#{@tp.column.rights_copyright_preparation.tuple[1]} +#{@tp.column.rights_copyright_digitization.tuple[1]} +#{@tp.column.rights_copyright_audio.tuple[1]} +#{@tp.column.rights_copyright_video.tuple[1]} +#{@tp.column.rights_license.tuple[1]} +#{@tp.column.classify_topic_register.tuple[1]} +#{@tp.column.classify_subject.tuple[1]} +#{@tp.column.classify_type.tuple[1]} +#{@tp.column.classify_loc.tuple[1]} +#{@tp.column.classify_dewey.tuple[1]} +#{@tp.column.classify_pg.tuple[1]} +#{@tp.column.classify_isbn.tuple[1]} +#{@tp.column.classify_format.tuple[1]} +#{@tp.column.classify_identifier.tuple[1]} +#{@tp.column.classify_relation.tuple[1]} +#{@tp.column.classify_coverage.tuple[1]} +#{@tp.column.classify_keywords.tuple[1]} +#{@tp.column.notes_abstract.tuple[1]} +#{@tp.column.notes_comment.tuple[1]} +#{@tp.column.notes_description.tuple[1]} +#{@tp.column.notes_history.tuple[1]} +#{@tp.column.notes_prefix.tuple[1]} +#{@tp.column.notes_prefix_a.tuple[1]} +#{@tp.column.notes_prefix_b.tuple[1]} +#{@tp.column.notes_suffix.tuple[1]} +#{@tp.column.filename.tuple[1]} +#{@tp.column.sisutxt.tuple[1]} +#{@tp.column.fulltext.tuple[1]} +#{@tp.column.word_count.tuple[1]} +#{@tp.column.digest.tuple[1]} +#{@tp.column.skin_name.tuple[1]} +#{@tp.column.skin.tuple[1]} +#{@tp.column.links.tuple[1]} +#{@id} +);" if @opt.cmd =~/M/ puts "maintenance mode on: creating sql transaction file (for last transaction set (document) only):\n\t#{@file.inspect}" @file.puts sql_entry diff --git a/lib/sisu/v2/db_remove.rb b/lib/sisu/v2/db_remove.rb index 99640cdf..0a51b892 100644 --- a/lib/sisu/v2/db_remove.rb +++ b/lib/sisu/v2/db_remove.rb @@ -68,19 +68,19 @@ module SiSU_DB_remove def remove driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) ? true : false del_id=if driver_sqlite3 - @conn.get_first_value(%{ SELECT tid FROM metadata WHERE filename LIKE '#{@opt.fns}'; }).to_i + @conn.get_first_value(%{ SELECT tid FROM metadata_and_text WHERE filename = '#{@opt.fns}'; }).to_i else - x=@conn.select_one(%{ SELECT tid FROM metadata WHERE filename LIKE '#{@opt.fns}'; }) - del=x ? (x.join.to_i) : nil + x=@conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; }) + x ? (x.join.to_i) : nil end if del_id sql_entry=[ "DELETE FROM endnotes WHERE metadata_tid = '#{del_id}';", "DELETE FROM endnotes_asterisk WHERE metadata_tid = '#{del_id}';", "DELETE FROM endnotes_plus WHERE metadata_tid = '#{del_id}';", - "DELETE FROM documents WHERE metadata_tid = '#{del_id}';", + "DELETE FROM doc_objects WHERE metadata_tid = '#{del_id}';", "DELETE FROM urls WHERE metadata_tid = '#{del_id}';", - "DELETE FROM metadata WHERE tid = '#{del_id}';", + "DELETE FROM metadata_and_text WHERE metadata_and_text.tid = '#{del_id}';", ] if driver_sqlite3 @conn.transaction diff --git a/lib/sisu/v2/db_select.rb b/lib/sisu/v2/db_select.rb index 33441b45..1ac9195f 100644 --- a/lib/sisu/v2/db_select.rb +++ b/lib/sisu/v2/db_select.rb @@ -69,6 +69,8 @@ module SiSU_DB_select if @opt.mod.inspect =~/update|import/ @sdb_import=SiSU_DB_DBI::Import.new(@opt,@conn,@file,@sql_type) @sdb_remove_doc=SiSU_DB_DBI::Remove.new(@opt,@conn,@file) + elsif @opt.mod.inspect =~/remove/ + @sdb_remove_doc=SiSU_DB_DBI::Remove.new(@opt,@conn,@file) end end def sql_maintenance_file @@ -98,8 +100,8 @@ module SiSU_DB_select when /^--(?:init(?:ialize)?|create(?:all)?)$/ @sdb.output_dir? begin - @sdb.create_table.metadata - @sdb.create_table.documents + @sdb.create_table.metadata_and_text + @sdb.create_table.doc_objects @sdb.create_table.endnotes @sdb.create_table.endnotes_asterisk @sdb.create_table.endnotes_plus @@ -110,8 +112,8 @@ module SiSU_DB_select when /^--createtable(s)?$/ @sdb.output_dir? begin - @sdb.create_table.metadata - @sdb.create_table.documents + @sdb.create_table.metadata_and_text + @sdb.create_table.doc_objects @sdb.create_table.endnotes @sdb.create_table.endnotes_asterisk @sdb.create_table.endnotes_plus @@ -123,8 +125,8 @@ module SiSU_DB_select @sdb.output_dir? begin @sdb_no.drop.tables - @sdb.create_table.metadata - @sdb.create_table.documents + @sdb.create_table.metadata_and_text + @sdb.create_table.doc_objects @sdb.create_table.endnotes @sdb.create_table.endnotes_asterisk @sdb.create_table.endnotes_plus @@ -135,13 +137,13 @@ module SiSU_DB_select when /^--cr(eate)?lex$/ @sdb.output_dir? begin - @sdb.create_table.documents + @sdb.create_table.doc_objects rescue; @sdb.output_dir? end when /^--cr(eate)?metadata$/ @sdb.output_dir? begin - @sdb.create_table.metadata + @sdb.create_table.metadata_and_text rescue; @sdb.output_dir? end when /^--import$/ diff --git a/lib/sisu/v2/db_sqltxt.rb b/lib/sisu/v2/db_sqltxt.rb new file mode 100644 index 00000000..f120b95f --- /dev/null +++ b/lib/sisu/v2/db_sqltxt.rb @@ -0,0 +1,115 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + #___# + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_DB_text + class Prepare + def special_character_escape(str) + str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") + str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"
\n") + str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check + str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2') + str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') + str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1') + str + end + def clean_searchable_text(arr) #produce clean, searchable, plaintext from document source + txt_arr,en=[],[] + arr.each do |s| + s.gsub!(/([*\/_-])\{(.+?)\}\1/,'\2') + s.gsub!(/^(?:group|poem|code)\{/,''); s.gsub!(/^\}(?:group|poem|code)/,'') + s.gsub!(/\A(?:@\S+:\s+.+)\Z/m,'') + if s =~/^:A~/ + s.gsub!(/@author/,@md.creator.author) + s.gsub!(/@title/,@md.title.full) + end + s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/,'') + s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/,'') + s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/,'') + s.gsub!(/^%{1,3} .+/,'') #removed even if contained in code block + s.gsub!(/
/,' ') + en << s.scan(/~\{\s*(.+?)\s*\}~/) + s.gsub!(/~\{.+?\}~/,'') + s.gsub!(/ \s+/,' ') + #special_character_escape(s) + s + end + txt_arr << arr << en + #txt_arr=txt_arr.flatten + txt=txt_arr.flatten.join("\n") + txt=special_character_escape(txt) + txt + end + def strip_markup(str) #define rules, make same as in dal clean + str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') + str.gsub!(/(?: \\;|#{Mx[:nbsp]})+/,' ') + str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1') #tables + str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ') #tables + str.gsub!(/#{Mx[:tc_p]}/u,' ') #tables tidy later + str.gsub!(/<.+?>/,'') + str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] ') # else image names found in search + str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]') # else image names found in search + str.gsub!(/\s\s+/,' ') + str.strip! + str + end + end +end +__END__ + -- cgit v1.2.3 From dbc227d7f5f164f1bb584295581f7a98ecac1292 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Fri, 16 Apr 2010 10:15:34 -0400 Subject: param checks metadata string lengths against set db column sizes * param, checks metadata string lengths against set db column sizes, drops entries that are too long with warning * param, classify "populated" a fix --- lib/sisu/v2/param.rb | 278 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 207 insertions(+), 71 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/param.rb b/lib/sisu/v2/param.rb index 9d1f03e9..c50a55ee 100644 --- a/lib/sisu/v2/param.rb +++ b/lib/sisu/v2/param.rb @@ -152,6 +152,21 @@ module SiSU_Param def initialize(str) @s=str end + def validate_length(s,l,n) + #s=(s.length <= l) ? s : nil + s=if s.class==String \ + and s.length <= l + s + elsif s.class==NilClass; nil + elsif s.class !=String + puts "#{n} is #{s.class}: programming error, String expected #{__FILE__}:#{__LINE__}" + s + else + tell=SiSU_Screen::Ansi.new('v',"#{n} length #{s.length} exceeds set db field length #{l}, metadata dropped") + tell.warn + nil + end + end def name_format(name) if name name.strip! @@ -214,49 +229,81 @@ module SiSU_Param a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) def main - @h['main'] + s=@h['main'] + l,n=Db[:col_title_part],'title.main' + validate_length(s,l,n) end def sub - @h['subtitle'] + s=@h['subtitle'] + l,n=Db[:col_title_part],'title.subtitle' + validate_length(s,l,n) end def edition - @h['edition'] + s=@h['edition'] + l,n=Db[:col_title_edition],'title.edition' + validate_length(s,l,n) end def note - @h['note'] + @h['note'] #TEXT end def short s=(@h['short'] ? @h['short'] : @h['main']) + l,n=Db[:col_title_part],'title.short' + validate_length(s,l,n) end def full s=(@h['subtitle'] ? (@h['main'] + ' - ' + @h['subtitle']) : @h['main']) + l,n=Db[:col_title],'title.full' + validate_length(s,l,n) + end + def language + s=@h['language'] + l,n=Db[:col_language],'title.language' + validate_length(s,l,n) + end + def language_char + s=@h['language_char'] + l,n=Db[:col_language_char],'title.language_char' + validate_length(s,l,n) end self end - def creator #there are sub categories that need to be catered for and sometimes more than one author etc. + def creator #there are sub categories that need to be catered for and sometimes more than one author etc.; implement array.to_s.length validation test later, current test on string approximate as string is not used a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) def author @h['author']=(@h['author'] ? @h['author'] : @h['main']) names=name_format(@h['author']) - names[:name_str] + s=names[:name_str] + l,n=Db[:col_name],'creator.author' + validate_length(s,l,n) end - def author_detail + def author_detail s=(@h['author'] ? @h['author'] : @h['main']) names=name_format(s) names[:name_a_h] end - def translator - names=(@h['translator'] ? name_format(@h['translator']) : nil) - (names.class==Hash) ? names[:name_str] : nil + def contributor + names=(@h['contributor'] ? name_format(@h['contributor']) : nil) + s=(names.class==Hash) ? names[:name_str] : nil + s=if s + l,n=Db[:col_name],'creator.author' + validate_length(s,l,n) + else nil + end end - def translator_detail - names=(@h['translator'] ? name_format(@h['translator']) : nil) + def contributor_detail + names=(@h['contributor'] ? name_format(@h['contributor']) : nil) (names.class==Hash) ? names[:name_a_h] : nil end def illustrator names=(@h['illustrator'] ? name_format(@h['illustrator']) : nil) - (names.class==Hash) ? names[:name_str] : nil + s=(names.class==Hash) ? names[:name_str] : nil + s=if s + l,n=Db[:col_name],'creator.illustrator' + validate_length(s,l,n) + else nil + end end def illustrator_detail names=(@h['illustrator'] ? name_format(@h['illustrator']) : nil) @@ -264,15 +311,38 @@ module SiSU_Param end def photographer names=(@h['photographer'] ? name_format(@h['photographer']) : nil) - (names.class==Hash) ? names[:name_str] : nil + s=(names.class==Hash) ? names[:name_str] : nil + s=if s + l,n=Db[:col_name],'creator.photographer' + validate_length(s,l,n) + else nil + end end def photographer_detail names=(@h['photographer'] ? name_format(@h['photographer']) : nil) (names.class==Hash) ? names[:name_a_h] : nil end + def translator + names=(@h['translator'] ? name_format(@h['translator']) : nil) + s=(names.class==Hash) ? names[:name_str] : nil + s=if s + l,n=Db[:col_name],'creator.translator' + validate_length(s,l,n) + else nil + end + end + def translator_detail + names=(@h['translator'] ? name_format(@h['translator']) : nil) + (names.class==Hash) ? names[:name_a_h] : nil + end def audio names=(@h['audio'] ? name_format(@h['audio']) : nil) - (names.class==Hash) ? names[:name_str] : nil + s=(names.class==Hash) ? names[:name_str] : nil + s=if s + l,n=Db[:col_name],'creator.audio' + validate_length(s,l,n) + else nil + end end def audio_detail names=(@h['audio'] ? name_format(@h['audio']) : nil) @@ -280,7 +350,12 @@ module SiSU_Param end def digitized_by names=(@h['digitized_by'] ? name_format(@h['digitized_by']) : nil) - (names.class==Hash) ? names[:name_str] : nil + s=(names.class==Hash) ? names[:name_str] : nil + s=if s + l,n=Db[:col_name],'creator.digitized_by' + validate_length(s,l,n) + else nil + end end def digitized_by_detail names=(@h['digitized_by'] ? name_format(@h['digitized_by']) : nil) @@ -288,27 +363,24 @@ module SiSU_Param end def prepared_by names=(@h['prepared_by'] ? name_format(@h['prepared_by']) : nil) - (names.class==Hash) ? names[:name_str] : nil + s=(names.class==Hash) ? names[:name_str] : nil + s=if s + l,n=Db[:col_name],'creator.prepared_by' + validate_length(s,l,n) + else nil + end end def prepared_by_detail names=(@h['prepared_by'] ? name_format(@h['prepared_by']) : nil) names=name_format(@h['prepared_by']) (names.class==Hash) ? names[:name_a_h] : nil end - def contributor - names=(@h['contributor'] ? name_format(@h['contributor']) : nil) - (names.class==Hash) ? names[:name_str] : nil - end - def contributor_detail - names=(@h['contributor'] ? name_format(@h['contributor']) : nil) - (names.class==Hash) ? names[:name_a_h] : nil - end self end def rights a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) - def copyright + def copyright # TEXT used db sql def text #you may wish to expand to take from all r=if @h['copyright'] @h['copyright'] @@ -376,80 +448,127 @@ module SiSU_Param def classify a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) - def topic_register - @h['topic_register'] + def coverage + s=@h['coverage'] + l,n=Db[:col_classify_short],'classify.coverage' + validate_length(s,l,n) + end + def relation + s=@h['relation'] + l,n=Db[:col_classify_short],'classify.short' + validate_length(s,l,n) end def subject - @h['subject'] + s=@h['subject'] + l,n=Db[:col_classify_txt_short],'classify.subject' + validate_length(s,l,n) + end + def topic_register + s=@h['topic_register'] + l,n=Db[:col_classify_txt_long],'classify.topic_register' + validate_length(s,l,n) end def type - @h['type'] + s=@h['type'] + l,n=Db[:col_classify_txt_short],'classify.type' + validate_length(s,l,n) end def identifier - @h['identifier'] - end - def isbn - @h['isbn'] - end - def dewey - @h['dewey'] + s=@h['identifier'] + l,n=Db[:col_classify_identify],'classify.identifier' + validate_length(s,l,n) end def loc - @h['loc'] + s=@h['loc'] + l,n=Db[:col_classify_library],'classify.loc' + validate_length(s,l,n) end def dewey - @h['dewey'] + s=@h['dewey'] + l,n=Db[:col_classify_library],'classify.dewey' + validate_length(s,l,n) end def pg - @h['pg'] - end - def relation - @h['relation'] + s=@h['pg'] + l,n=Db[:col_classify_small],'classify.pg' + validate_length(s,l,n) end - def coverage - @h['coverage'] + def isbn + s=@h['isbn'] + l,n=Db[:col_classify_small],'classify.isbn' + validate_length(s,l,n) end self end def publisher a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) - @h['main'] + s=@h['main'] + l,n=Db[:col_name],'publisher' + validate_length(s,l,n) end def date a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) - def published - @h['published']=(@h['published'] ? @h['published'] : @h['main']) + def added_to_site + s=@h['added_to_site'] + l,n=Db[:col_date_text],'date.added_to_site' + validate_length(s,l,n) end def available - @h['available'] + s=@h['available'] + l,n=Db[:col_date_text],'date.available' + validate_length(s,l,n) end def created - @h['created'] + s=@h['created'] + l,n=Db[:col_date_text],'date.created' + validate_length(s,l,n) end def issued - @h['issued'] + s=@h['issued'] + l,n=Db[:col_date_text],'date.issued' + validate_length(s,l,n) end def modified - @h['modified'] + s=@h['modified'] + l,n=Db[:col_date_text],'date.modified' + validate_length(s,l,n) end - def valid - @h['valid'] + def published + s=@h['published']=(@h['published'] ? @h['published'] : @h['main']) + l,n=Db[:col_date_text],'date.published' + validate_length(s,l,n) end - def added_to_site - @h['added_to_site'] + def valid + s=@h['valid'] + l,n=Db[:col_date_text],'date.valid' + validate_length(s,l,n) end self end - def language + def language # as things stand this should really be populated from title.language and original.language, resolve a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) def document - @h['document']=(@h['document'] ? @h['document'] : @h['main']) + s=@h['document']=(@h['document'] ? @h['document'] : @h['main']) + l,n=Db[:col_language],'language.document' + validate_length(s,l,n) + end + def document_char + s=@h['document_char']=(@h['document_char'] ? @h['document_char'] : nil) + l,n=Db[:col_language_char],'language.document_char' + validate_length(s,l,n) end def original - @h['original'] + s=@h['original'] + l,n=Db[:col_language],'language.original' + validate_length(s,l,n) + end + def original_char + s=@h['original_char'] + l,n=Db[:col_language_char],'language.original_char' + validate_length(s,l,n) end self end @@ -479,7 +598,9 @@ module SiSU_Param @h['num_top'] end def breaks - pagebreaks=((@h['breaks'] =~/;/) ? (@h['breaks'].split(/;\s*/)) : [ @h['breaks'] ]) + pagebreaks=((@h['breaks'] =~/;/) \ + ? (@h['breaks'].split(/;\s*/)) \ + : [ @h['breaks'] ]) page_new,page_break=nil,nil pagebreaks.each do |x| page_new=x[/(:?[\dA-C],?)+/] if x=~/new|clear/ @@ -555,19 +676,34 @@ module SiSU_Param a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) def publisher - @h['publisher'] + s=@h['publisher'] + l,n=Db[:col_name],'original.publisher' + validate_length(s,l,n) end def language - @h['language'] + s=@h['language'] + l,n=Db[:col_language],'original.language' + validate_length(s,l,n) + end + def language_char + s=@h['language_char'] + l,n=Db[:col_language_char],'original.language_char' + validate_length(s,l,n) end def source - @h['source'] + s=@h['source'] + l,n=Db[:col_name],'original.source' + validate_length(s,l,n) end def institution - @h['institution'] + s=@h['institution'] + l,n=Db[:col_name],'original.institution' + validate_length(s,l,n) end def nationality - @h['nationality'] + s=@h['nationality'] + l,n=Db[:col_language],'original.nationality' + validate_length(s,l,n) end self end @@ -578,12 +714,12 @@ module SiSU_Param def notes a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) - def comment - @h['comment'] - end def abstract @h['abstract'] end + def comment + @h['comment'] + end def description @h['description'] end @@ -600,9 +736,9 @@ module SiSU_Param @doc={ :lv=>[] } @doc[:fns],@doc[:fnb],@doc[:scr_suffix]='','','' @@publisher='SiSU scribe' - attr_accessor :cmd,:make,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:subtitle,:full_title,:html_title,:subtitle_tex,:creator,:author_home,:author,:author_title,:author_nationality,:authors,:authorship,:translator,:illustrator,:prepared_by,:digitized_by,:subject,:description,:publisher,:contributor,:date,:date_created,:date_issued,:date_available,:date_valid,:date_modified,:date_translated,:date_added_to_site,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:type,:format,:identifier,:source,:language,:language_original,:relation,:coverage,:rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:lvs,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:markup,:markup_instruction,:markup_version,:markup_declared,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:author_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:ec,:opt,:sem_tag,:book_idx,:topic_register,:topic_register_array,:original_publication,:original_publication_date,:original_publication_nationality,:original_publication_institution,:writing_focus,:audio,:daisy + attr_accessor :cmd,:make,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:subtitle,:full_title,:html_title,:subtitle_tex,:creator,:classify,:author_home,:author,:author_title,:author_nationality,:authors,:authorship,:translator,:illustrator,:prepared_by,:digitized_by,:subject,:description,:publisher,:contributor,:date,:date_created,:date_issued,:date_available,:date_valid,:date_modified,:date_translated,:date_added_to_site,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:type,:format,:identifier,:source,:language,:language_original,:relation,:coverage,:rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:lvs,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:markup,:markup_instruction,:markup_version,:markup_declared,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:author_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:ec,:opt,:sem_tag,:book_idx,:topic_register,:topic_register_array,:original_publication,:original_publication_date,:original_publication_nationality,:original_publication_institution,:writing_focus,:audio,:daisy def initialize(fns_array,opt) - @env=@fn=@fns=@fnb=@fnn=@fnt=@fnl=@flv=@fnz=@fnstex=@ocn=@sfx_src=@pdf=@file_type=@dir_out=@dir_tex=@dir_lout=@txt_path=@make=@flag_endnotes=@flag_auto_endnotes=@flag_separate_endnotes=@flag_separate_endnotes_make=@site_skin=@sisu=@sisu_version=@ruby_version=@title=@subtitle=@full_title=@html_title=@subtitle_tex=@creator=@author_home=@author=@author_title=@author_nationality=@translator=@illustrator=@prepared_by=@digitized_by=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@date_translated=@date_added_to_site=@date_scheme=@date_created_scheme=@date_issued_scheme=@date_available_scheme=@date_valid_scheme=@date_modified_scheme=@type=@format=@identifier=@source=@language=@language_original=@relation=@coverage=@rights=@keywords=@comments=@abstract=@cls_loc=@cls_dewey=@cls_pg=@cls_isbn=@papersize=@toc=@lv1=@lv2=@lv3=@lv4=@lv5=@lv6=@pagenew=@pagebreak=@num_top=@toc_lev_limit=@flag_tables=@vocabulary=@doc_skin=@doc_css=@yaml=@lnk=@prefix_a=@prefix_b=@suffix=@information=@contact=@icon=@ad_url=@ad_png=@ad_alt=@ad_began=@promo=@ad_home=@stmp=@stmpd=@sc_filename=@sc_number=@sc_date=@sc_time=@sc_info=@yamladdr=@locale=@wc_lines=@wc_words=@wc_bytes=@file_encoding=@file_size=@firstseg=@programs=@author_copymark=@lang=@en=@dgst=@dgst_skin=@generated=@heading_seg_first=@base_program=@topic_register=@original_publication_details=@original_publication=@original_publication_date=@original_publication_nationality=@original_publication_institution=@writing_focus=@audio=nil + @env=@fn=@fns=@fnb=@fnn=@fnt=@fnl=@flv=@fnz=@fnstex=@ocn=@sfx_src=@pdf=@file_type=@dir_out=@dir_tex=@dir_lout=@txt_path=@make=@flag_endnotes=@flag_auto_endnotes=@flag_separate_endnotes=@flag_separate_endnotes_make=@site_skin=@sisu=@sisu_version=@ruby_version=@title=@subtitle=@full_title=@html_title=@subtitle_tex=@creator=@classify=@author_home=@author=@author_title=@author_nationality=@translator=@illustrator=@prepared_by=@digitized_by=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@date_translated=@date_added_to_site=@date_scheme=@date_created_scheme=@date_issued_scheme=@date_available_scheme=@date_valid_scheme=@date_modified_scheme=@type=@format=@identifier=@source=@language=@language_original=@relation=@coverage=@rights=@keywords=@comments=@abstract=@cls_loc=@cls_dewey=@cls_pg=@cls_isbn=@papersize=@toc=@lv1=@lv2=@lv3=@lv4=@lv5=@lv6=@pagenew=@pagebreak=@num_top=@toc_lev_limit=@flag_tables=@vocabulary=@doc_skin=@doc_css=@yaml=@lnk=@prefix_a=@prefix_b=@suffix=@information=@contact=@icon=@ad_url=@ad_png=@ad_alt=@ad_began=@promo=@ad_home=@stmp=@stmpd=@sc_filename=@sc_number=@sc_date=@sc_time=@sc_info=@yamladdr=@locale=@wc_lines=@wc_words=@wc_bytes=@file_encoding=@file_size=@firstseg=@programs=@author_copymark=@lang=@en=@dgst=@dgst_skin=@generated=@heading_seg_first=@base_program=@topic_register=@original_publication_details=@original_publication=@original_publication_date=@original_publication_nationality=@original_publication_institution=@writing_focus=@audio=nil @data,@fns,@cmd,@mod,@opt=fns_array,opt.fns,opt.cmd,opt.mod,opt #@data used as data @flag_tables,@set_header_title,@set_heading_top,@set_heading_seg,@heading_seg_first_flag,@flag_promo,@book_idx=false,false,false,false,false,false,false @seg_autoname_safe=true @@ -748,7 +884,7 @@ module SiSU_Param @authorship=@author=@creator.author @authors=@creator.author_detail when /^@classify:(.+)/m; classify=$1 - Md.new($1.strip).classify + @classify=Md.new($1.strip).classify when /^@publisher:\s+(.+)/m @publisher=Md.new($1.strip).current_publisher when /^@original:(.+)/m -- cgit v1.2.3 From 221cfa4043a1e9423a6ed4222fb41ef68b8fd73d Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Fri, 16 Apr 2010 10:16:59 -0400 Subject: db, sysenv, sisurc.yml determine whether to share sisu markup source in db --- lib/sisu/v2/sysenv.rb | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/sisu/v2/sysenv.rb b/lib/sisu/v2/sysenv.rb index 88195630..c857468f 100644 --- a/lib/sisu/v2/sysenv.rb +++ b/lib/sisu/v2/sysenv.rb @@ -2685,6 +2685,12 @@ WOK @rc=@@rc ||=Get_init.instance.yamlrc @defaults=Info_env.new.defaults end + def share_source? + ((defined? @rc['db']['share_source']) \ + && @rc['db']['share_source']==true) \ + ? @rc['db']['share_source'] \ + : false + end def engine def default ((defined? @rc['db']['engine']['default']) \ -- cgit v1.2.3 From c9d42a44f05828b507ceeae95c9d31ecfff9dbda Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Fri, 16 Apr 2010 10:17:53 -0400 Subject: cgi sample search form (cgi_sql_common): take account of db structure changes; match limit options * update to work with database structure changes * add radio buttons to set increased match limit --- lib/sisu/v2/cgi_pgsql.rb | 10 +-- lib/sisu/v2/cgi_sql_common.rb | 184 +++++++++++++++++++++++++----------------- lib/sisu/v2/cgi_sqlite.rb | 6 +- 3 files changed, 119 insertions(+), 81 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/cgi_pgsql.rb b/lib/sisu/v2/cgi_pgsql.rb index db2d8267..493c39d4 100644 --- a/lib/sisu/v2/cgi_pgsql.rb +++ b/lib/sisu/v2/cgi_pgsql.rb @@ -68,7 +68,7 @@ module SiSU_CGI_pgsql @image_src="#{@env.url.webserv_cgi}/_sisu/image_sys" @common=SiSU_CGI_sql::SiSU_CGI_common.new(@webserv,@opt.cmd,@image_src,@env) @db=SiSU_Env::Info_db.new - @cgi_file_name="sisu_#{SiSU_version_dir}_pgsql.cgi" + @cgi_file_name="#{Db[:name_prefix_db]}pgsql.cgi" end def pgsql serve=[] @@ -177,7 +177,7 @@ module SiSU_CGI_pgsql @search_text,@search_endnotes=[],[] search[:text].each{|x| @search_text << "#{x} AND " } @search_text=@search_text.join.gsub!(/AND\s+$/,'') #watch - @search_text.gsub!(/(documents\.clean~[*]?\(\s*'[^']+'\s*\)\s+(?:(?:AND|OR)\s+documents\.clean~[*]?\(\s*'[^']+'\s*\))+)/,'(\1)') + @search_text.gsub!(/(doc_objects\.clean~[*]?\(\s*'[^']+'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean~[*]?\(\s*'[^']+'\s*\))+)/,'(\1)') search[:endnotes].each{|x| @search_endnotes << "#{x} AND " } @search_endnotes=@search_endnotes.join.gsub!(/AND\s+$/,'') #watch @search_endnotes.gsub!(/(endnotes\.clean~\(\s*'[^']+'\s*\)\s+(?:(?:AND|OR)\s+endnotes\.clean~\(\s*'[^']+'\s*\))+)/,'(\1)') @@ -189,7 +189,7 @@ module SiSU_CGI_pgsql def sql_select_body limit ||=@@limit offset ||=@@offset - @sql_statement[:body]=%{SELECT metadata.title, metadata.subtitle, metadata.creator, metadata.filename, metadata.suffix, documents.body, documents.seg, documents.ocn, metadata.tid FROM documents, metadata WHERE (#{@search_text}) AND documents.metadata_tid = metadata.tid ORDER BY metadata.title, metadata.filename, documents.ocn} + @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.filename, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE (#{@search_text}) AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.title, metadata_and_text.filename, doc_objects.ocn} @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} select=@sql_statement[:body] + ' ' + @sql_statement[:range] select @@ -197,7 +197,7 @@ module SiSU_CGI_pgsql def sql_select_endnotes limit ||=@@limit offset ||=@@offset - @sql_statement[:endnotes]=%{SELECT metadata.title, metadata.subtitle, metadata.creator, metadata.filename, endnotes.body, endnotes.nr, endnotes.ocn, endnotes.metadata_tid FROM metadata, endnotes WHERE (#{@search_endnotes}) AND metadata.tid = endnotes.metadata_tid ORDER BY metadata.title, metadata.filename, endnotes.nr} + @sql_statement[:endnotes]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.filename, endnotes.body, endnotes.nr, endnotes.ocn, endnotes.metadata_tid FROM metadata_and_text, endnotes WHERE (#{@search_endnotes}) AND metadata_and_text.tid = endnotes.metadata_tid ORDER BY metadata_and_text.title, metadata_and_text.filename, endnotes.nr} @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} select=@sql_statement[:endnotes] + ' ' + @sql_statement[:range] select @@ -219,7 +219,7 @@ module SiSU_CGI_pgsql end def buttons1_pgsql <<-'WOK_SQL' - case sensitive + case sensitive WOK_SQL end def dbi_connect diff --git a/lib/sisu/v2/cgi_sql_common.rb b/lib/sisu/v2/cgi_sql_common.rb index 0c3977c4..e536551f 100644 --- a/lib/sisu/v2/cgi_sql_common.rb +++ b/lib/sisu/v2/cgi_sql_common.rb @@ -125,19 +125,19 @@ module SiSU_CGI_sql def header1 <<-'WOK_SQL' #Common TOP - @@limit,@@offset=1000,0 + @@offset=0 @base="#{@hosturl_db}/cgi-bin/#{@version}.cgi" @@canned_search_url=@base @color_heading='#DDFFAA' @color_match='#ffff48' class Form - def initialize(base,search_field,selected_db,checked_index,checked_text,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') + def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') search_note='' if checked_searched !~/\S/ the_can='' if checked_url !~/\S/ search_field='' if checked_echo !~/\S/ - @base,@search_field,@selected_db,@checked_index,@checked_text,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,checked_index,checked_text,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can + @base,@search_field,@selected_db,@result_type,@checked_sql_limit,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can @tip=if checked_tip =~/\S/ - 'text:__; keywords:__; title:__; author:__; subject:__; description:__; publisher:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; filename:__;
' + 'text:__; fulltxt:__; keywords:__; title:__; author:__; topic_register:__; subject:__; description:__; publisher:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; filename:__;
' else '' end end @@ -195,12 +195,18 @@ module SiSU_CGI_sql <<-'WOK_SQL' - index - text / grep + index + text / grep WOK_SQL end def buttons2 <<-'WOK_SQL' +
+ match limit: + 1,000 + 2,000 + 5,000 + 10,000
echo query result stats @@ -226,13 +232,14 @@ module SiSU_CGI_sql def search_request <<-'WOK_SQL' class Search_request #% search_for - attr_accessor :text1,:keywords,:title,:author,:subject,:description,:publisher,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:filename + attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:filename def initialize(search_field='',q='') @search_field,@q=search_field,q - @text1=@keywords=@title=@author=@subject=@description=@publisher=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' + @text1=@fulltext=@keywords=@title=@author=@topic_register=@subject=@description=@publisher=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' if @search_field=~/\S/ @text1=text_to_match('text:') - @keywords=text_to_match('key(?:words?)?:') + @fulltext=text_to_match('fulltxt:') + @topic_register=text_to_match('topic_register:') @title=text_to_match('title:') # DublinCore 1 - title @author=text_to_match('(?:author|creator)s?:') # DublinCore 2 - creator/author @subject=text_to_match('subj(?:ect)?:') # DublinCore 3 - subject @@ -248,6 +255,7 @@ module SiSU_CGI_sql @relation=text_to_match('relation:') # DublinCore 13 - relation @coverage=text_to_match('coverage:') # DublinCore 14 - coverage @rights=text_to_match('rights:') # DublinCore 15 - rights + @keywords=text_to_match('key(?:words?)?:') @comment=text_to_match('comment:') @abstract=text_to_match('abs(?:tract)?:') @owner=text_to_match('owner:') @@ -257,12 +265,14 @@ module SiSU_CGI_sql @date_available=text_to_match('date_available:') @date_valid=text_to_match('date_valid:') @filename=text_to_match('filename:') - @text1=text_to_match unless @keywords or @author or @title or @text1 or @comment or @abstract or @rights or @subject or @publisher or @date or @filename + @text1=text_to_match unless @keywords or @author or @title or @text1 or @fulltext or @comment or @abstract or @rights or @subject or @publisher or @date or @filename or @topic_register else @text1=q['s1'] if q['s1']=~/\S/ + @fulltext=q['ft'] if q['ft']=~/\S/ @keywords=q['key'] if q['key']=~/\S/ @title=q['ti'] if q['ti']=~/\S/ @author=q['au'] if q['au']=~/\S/ + @topic_register=q['tr'] if q['tr']=~/\S/ @subject=q['sj'] if q['sj']=~/\S/ @description=q['dsc'] if q['dsc']=~/\S/ @publisher=q['pb'] if q['pb']=~/\S/ @@ -327,99 +337,109 @@ module SiSU_CGI_sql cse=if c =~/\S/; true else false end - st=Dbi_search_string.new('documents.clean',search_for.text1,q['s1'],cse).string + st=Dbi_search_string.new('doc_objects.clean',search_for.text1,q['s1'],cse).string se=Dbi_search_string.new('endnotes.clean',search_for.text1,q['s1'],cse).string @text_search_flag=st[:flag] if st[:flag] search[:text] << st[:search] search[:endnotes] << se[:search] end - st=Dbi_search_string.new('metadata.keywords',search_for.keywords,q['key'],cse).string + st=Dbi_search_string.new('metadata_and_text.fulltext',search_for.fulltext,q['ft'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.classify_keywords',search_for.keywords,q['key'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.title',search_for.title,q['ti'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.title',search_for.title,q['ti'],cse).string + st=Dbi_search_string.new('metadata_and_text.creator_author',search_for.author,q['au'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.creator',search_for.author,q['au'],cse).string + st=Dbi_search_string.new('metadata_and_text.classify_topic_register',search_for.topic_register,q['tr'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.subject',search_for.subject,q['sj'],cse).string + st=Dbi_search_string.new('metadata_and_text.classify_subject',search_for.subject,q['sj'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.description',search_for.description,q['dsc'],cse).string + st=Dbi_search_string.new('metadata_and_text.notes_description',search_for.description,q['dsc'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.publisher',search_for.publisher,q['pb'],cse).string + st=Dbi_search_string.new('metadata_and_text.publisher',search_for.publisher,q['pb'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.contributor',search_for.contributor,q['cntr'],cse).string + st=Dbi_search_string.new('metadata_and_text.creator_contributor',search_for.contributor,q['cntr'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.date',search_for.date,q['dt'],cse).string + st=Dbi_search_string.new('metadata_and_text.date_published',search_for.date,q['dt'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.type',search_for.type,q['ty'],cse).string + st=Dbi_search_string.new('metadata_and_text.classify_type',search_for.type,q['ty'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.identifier',search_for.identifier,q['id'],cse).string + st=Dbi_search_string.new('metadata_and_text.classify_identifier',search_for.identifier,q['id'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.source',search_for.source,q['src'],cse).string + st=Dbi_search_string.new('metadata_and_text.original_source',search_for.source,q['src'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.language',search_for.language,q['lang'],cse).string + st=Dbi_search_string.new('metadata_and_text.title_language',search_for.language,q['lang'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.relation',search_for.relation,q['rel'],cse).string + st=Dbi_search_string.new('metadata_and_text.classify_relation',search_for.relation,q['rel'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.coverage',search_for.coverage,q['cov'],cse).string + st=Dbi_search_string.new('metadata_and_text.classify_coverage',search_for.coverage,q['cov'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.rights',search_for.rights,q['cr'],cse).string + st=Dbi_search_string.new('metadata_and_text.rights_all',search_for.rights,q['cr'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.comment',search_for.comment,q['co'],cse).string + st=Dbi_search_string.new('metadata_and_text.notes_comment',search_for.comment,q['co'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.abstract',search_for.abstract,q['ab'],cse).string + st=Dbi_search_string.new('metadata_and_text.notes_abstract',search_for.abstract,q['ab'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata.filename',search_for.filename,q['fns'],cse).string + st=Dbi_search_string.new('metadata_and_text.filename',search_for.filename,q['fns'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] @@ -433,31 +453,31 @@ module SiSU_CGI_sql def sql_offset @@offset end - def sql_limit + def sql_match_limit @@limit end def sql_canned_search - @offset_next=sql_offset.to_i + sql_limit.to_i - @offset_previous=sql_offset.to_i - sql_limit.to_i + @offset_next=sql_offset.to_i + sql_match_limit.to_i + @offset_previous=sql_offset.to_i - sql_match_limit.to_i def current - @@canned_search_url.to_s + '<d=' + sql_limit.to_s + '&off=' + sql_offset.to_s + @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + sql_offset.to_s end def next - @@canned_search_url.to_s + '<d=' + sql_limit.to_s + '&off=' + @offset_next.to_s + @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_next.to_s end def previous @offset_previous >= 0 \ - ? (@@canned_search_url.to_s + '<d=' + sql_limit.to_s + '&off=' + @offset_previous.to_s) \ + ? (@@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_previous.to_s) \ : '' end def start - @@canned_search_url.to_s + '<d=' + sql_limit.to_s + '&off=' + 0.to_s + @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + 0.to_s end self end def pre_next(beyond_limit,img) can=sql_canned_search - page=(sql_offset.to_i + sql_limit.to_i)/sql_limit.to_i + page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i if beyond_limit if page.to_s =~ /^1$/ %{

@@ -630,8 +650,23 @@ module SiSU_CGI_sql "#{@db_name_prefix}#{@stub}" end checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' - if cgi['view']=~/text/; checked_index,checked_text='','checked' - else checked_index,checked_text='checked','' + if cgi['view']=~/text/ + result_type={:index=>'',:text=>'checked'} + else + result_type={:index=>'checked',:text=>''} + end + @@limit=if cgi['sql_match_limit'].to_s=~/10000/ + checked_sql_limit={:l1k=>'',:l2k=>'',:l5k=>'',:l10k=>'checked'} + '10000' + elsif cgi['sql_match_limit'].to_s=~/5000/ + checked_sql_limit={:l1k=>'',:l2k=>'',:l5k=>'checked',:l10k=>''} + '5000' + elsif cgi['sql_match_limit'].to_s=~/2000/ + checked_sql_limit={:l1k=>'',:l2k=>'checked',:l5k=>'',:l10k=>''} + '2000' + else + checked_sql_limit={:l1k=>'checked',:l2k=>'',:l5k=>'',:l10k=>''} + '1000' end checked_echo='checked' if cgi['echo'] =~/\S/ checked_stats='checked' if cgi['stats'] =~/\S/ @@ -666,11 +701,13 @@ module SiSU_CGI_sql @search_for=Search_request.new(search_field,q) #.analyze #% search_for #% searches #Canned_search.new(@base,@search_for.text1,cgi) - if @search_for.text1=~/\S+/ or @search_for.author=~/\S+/ #and search_field =~/\S/ + if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ s1='s1=' + CGI.escape(@search_for.text1) if @search_for.text1=~/\S/ + ft='&ft=' + CGI.escape(@search_for.fulltext) if @search_for.fulltext=~/\S/ key='key=' + CGI.escape(@search_for.keywords) if @search_for.keywords=~/\S/ ti='&ti=' + CGI.escape(@search_for.title) if @search_for.title=~/\S/ au='&au=' + CGI.escape(@search_for.author) if @search_for.author=~/\S/ + tr='&tr=' + CGI.escape(@search_for.topic_register) if @search_for.topic_register=~/\S/ sj='&sj=' + CGI.escape(@search_for.subject) if @search_for.subject=~/\S/ dsc='&dsc=' + CGI.escape(@search_for.description) if @search_for.description=~/\S/ pb='&pb=' + CGI.escape(@search_for.publisher) if @search_for.publisher=~/\S/ @@ -692,15 +729,16 @@ module SiSU_CGI_sql dtv='&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid=~/\S/ fns='&fns=' + CGI.escape(@search_for.filename) if @search_for.filename=~/\S/ @@canned_search_url=if checked_all =~/checked/ - "#{@base}?#{s1}#{key}#{ti}#{au}#{sj}#{dsc}#{pb}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" - else "#{@base}?#{s1}#{key}#{ti}#{au}#{sj}#{dsc}#{pb}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" + "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" + else "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" end - @canned_base_url="#{@base}?#{s1}&db=#{cgi['db']}" + mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 + @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" if checked_case=~/\S/ - @search[:text][1]=%{documents.clean~'#{@search_for.text1}'} #s1 + @search[:text][1]=%{doc_objects.clean~'#{@search_for.text1}'} #s1 @search[:endnotes][1]=%{endnotes.clean~'#{@search_for.text1}'} #s1 else - @search[:text][1]=%{documents.clean~*'#{@search_for.text1}'} #s1 + @search[:text][1]=%{doc_objects.clean~*'#{@search_for.text1}'} #s1 @search[:endnotes][1]=%{endnotes.clean~*'#{@search_for.text1}'} #s1 end canned_note='search url:' @@ -711,12 +749,14 @@ module SiSU_CGI_sql if search_field =~/\S+/ analyze_format=search_field.gsub(/\s*\n/,'; ') elsif checked_all =~/checked/ or checked_url =~/checked/ - canned_search=@@canned_search_url.scan(/(?:s1|au|ti|fns)=[^&]+/) + canned_search=@@canned_search_url.scan(/(?:s1|ft|au|ti|fns|tr)=[^&]+/) af=canned_search.join('; ') af.gsub!(/s1=/,'text: ') + af.gsub!(/ft=/,'fulltxt: ') af.gsub!(/au=/,'author: ') af.gsub!(/ti=/,'title: ') af.gsub!(/fns=/,'filename: ') + af.gsub!(/tr=/,'topic_register: ') af.gsub!(/%2B/,' ') analyze_format=af st=af.split(/\s*;\s*/) @@ -725,37 +765,39 @@ module SiSU_CGI_sql green=%{} canned_search_url_txt=CGI.escapeHTML(@@canned_search_url) the_can=%{#{canned_note} #{canned_search_url_txt}
} - p_text=p_keywords=p_title=p_author=p_subject=p_description=p_publisher=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_subject=p_filename='' + p_text=p_fulltext=p_keywords=p_title=p_author=p_topic_register=p_subject=p_description=p_publisher=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_filename='' + p_filename=%{filename: #{green}#{@search_for.filename}

} if @search_for.filename =~/\S+/ p_text=%{text: #{green}#{@search_for.text1}
} if @search_for.text1 =~/\S+/ - p_keywords=%{keywords: #{green}#{@search_for.keywords}
} if @search_for.keywords =~/\S+/ + p_fulltext=%{fulltxt: #{green}#{@search_for.fulltext}
} if @search_for.fulltext =~/\S+/ p_title=%{title: #{green}#{@search_for.title}
} if @search_for.title =~/\S+/ p_author=%{author: #{green}#{@search_for.author}
} if @search_for.author =~/\S+/ - p_subject=%{subject: #{green}#{@search_for.subject}
} if @search_for.subject =~/\S+/ - p_description=%{description: #{green}#{@search_for.description}
} if @search_for.description =~/\S+/ - p_publisher=%{publisher: #{green}#{@search_for.publisher}
} if @search_for.publisher =~/\S+/ p_contributor=%{contributor: #{green}#{@search_for.contributor}
} if @search_for.contributor =~/\S+/ p_date=%{date: #{green}#{@search_for.date}
} if @search_for.date =~/\S+/ + p_rights=%{rights: #{green}#{@search_for.rights}
} if @search_for.rights =~/\S+/ + p_topic_register=%{topic_register: #{green}#{@search_for.topic_register}
} if @search_for.topic_register =~/\S+/ + p_subject=%{subject: #{green}#{@search_for.subject}
} if @search_for.subject =~/\S+/ + p_keywords=%{keywords: #{green}#{@search_for.keywords}
} if @search_for.keywords =~/\S+/ + p_identifier=%{identifier: #{green}#{@search_for.identifier}
} if @search_for.identifier =~/\S+/ p_type=%{type: #{green}#{@search_for.type}
} if @search_for.type =~/\S+/ p_format=%{format: #{green}#{@search_for.format}
} if @search_for.format =~/\S+/ - p_identifier=%{identifier: #{green}#{@search_for.identifier}
} if @search_for.identifier =~/\S+/ - p_source=%{source: #{green}#{@search_for.source}
} if @search_for.source =~/\S+/ - p_language=%{language: #{green}#{@search_for.language}
} if @search_for.language =~/\S+/ p_relation=%{relation: #{green}#{@search_for.relation}
} if @search_for.relation =~/\S+/ p_coverage=%{coverage: #{green}#{@search_for.coverage}
} if @search_for.coverage =~/\S+/ - p_rights=%{rights: #{green}#{@search_for.rights}
} if @search_for.rights =~/\S+/ - p_comment=%{comment: #{green}#{@search_for.comment}
} if @search_for.comment =~/\S+/ + p_description=%{description: #{green}#{@search_for.description}
} if @search_for.description =~/\S+/ p_abstract=%{abstract: #{green}#{@search_for.abstract}
} if @search_for.abstract =~/\S+/ - p_filename=%{filename: #{green}#{@search_for.filename}
} if @search_for.filename =~/\S+/ + p_comment=%{comment: #{green}#{@search_for.comment}
} if @search_for.comment =~/\S+/ + p_publisher=%{publisher: #{green}#{@search_for.publisher}
} if @search_for.publisher =~/\S+/ + p_source=%{source: #{green}#{@search_for.source}
} if @search_for.source =~/\S+/ + p_language=%{language: #{green}#{@search_for.language}
} if @search_for.language =~/\S+/ search_note=<<-WOK database: #{green}#{@db}; selected view: #{green}#{cgi['view']} search string: "#{green}#{analyze_format}"
- #{p_text} #{p_keywords} #{p_title} #{p_author} #{p_subject} #{p_description} #{p_publisher} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} + #{p_text} #{p_fulltext} #{p_keywords} #{p_title} #{p_author} #{p_topic_register} #{p_subject} #{p_description} #{p_publisher} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} WOK #eg = %{canned search e.g.:
#{url}
find: #{analyze}
database: #{database}} #dbi_canning - @header=Form.new(@base,search_field,selected_db,checked_index,checked_text,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form + @header=Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ print "Content-type: text/html\n\n" puts (@header+@tail) @@ -765,10 +807,10 @@ module SiSU_CGI_sql else 'Unavailable' end if checked_case=~/\S/ - @search[:text]<<%{documents.clean~'#{CGI.unescape(s1)}'} + @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} @search[:endnotes]<<%{endnotes.clean~'#{CGI.unescape(s1)}'} else - @search[:text]<<%{documents.clean~*'#{CGI.unescape(s1)}'} + @search[:text]<<%{doc_objects.clean~*'#{CGI.unescape(s1)}'} @search[:endnotes]<<%{endnotes.clean~*'#{CGI.unescape(s1)}'} end #dbi_request @@ -802,13 +844,11 @@ module SiSU_CGI_sql end #metadata_found_body if c['tid'].to_i != oldtid.to_i - ti=if c['subtitle'] =~/\S+/; "#{c['title']} - #{c['subtitle']}" - else c['title'] - end + ti=c['title'] can_txt_srch=if cgi['view']=~/index/; %{search } else %{search } end - title=%{toc html #{ti} by #{c['creator']} #{can_txt_srch}toc html epub pdf portrait pdf landscape odf manifest
} if file_suffix=~/s/ #hmm watch file_suffix + title=%{toc html #{ti} by #{c['creator_author']} #{can_txt_srch}toc html epub pdf portrait pdf landscape odf manifest
} if file_suffix=~/s/ #hmm watch file_suffix if @text_search_flag; title='

'+title else title='
'+title end @@ -875,7 +915,7 @@ module SiSU_CGI_sql end @counters_txt=if @counter_txt_doc > 0 if checked_stats =~/\S/ - @@lt_t=if @counter_txt_ocn==dbi_statement.sql_limit.to_i + @@lt_t=if @counter_txt_ocn==dbi_statement.sql_match_limit.to_i over_limit='over the limit set of' over_this_number='more than' true @@ -904,13 +944,11 @@ module SiSU_CGI_sql #metadata_found_endnotes if @text_search_flag if e['metadata_tid'].to_i != oldtid.to_i - ti=if e['subtitle'] =~/\S+/; "#{e['title']} - #{e['subtitle']}" - else e['title'] - end + ti=e['title'] can_txt_srch=if cgi['view']=~/index/; %{search } else %{search } end - title=%{

toc html #{ti} by #{e['creator']} #{can_txt_srch}toc html epub pdf portrait pdf landscape odf manifest
} if file_suffix=~/s/ + title=%{

toc html #{ti} by #{e['creator_author']} #{can_txt_srch}toc html epub pdf portrait pdf landscape odf manifest
} if file_suffix=~/s/ @counter_endn_doc+=1 oldtid=e['metadata_tid'].to_i else title = '' @@ -929,7 +967,7 @@ module SiSU_CGI_sql end @counters_endn=if @counter_endn_doc > 0 if checked_stats =~/\S/ - @@lt_e=if @counter_endn_ocn==dbi_statement.sql_limit.to_i + @@lt_e=if @counter_endn_ocn==dbi_statement.sql_match_limit.to_i over_limit='over the limit set of' over_this_number='more than' true @@ -946,14 +984,14 @@ module SiSU_CGI_sql end end offset=dbi_statement.sql_offset.to_s - limit=dbi_statement.sql_limit.to_s + limit=dbi_statement.sql_match_limit.to_s @@lt_t ||=false; @@lt_e ||=false canned=if (@@lt_t or @@lt_e) dbi_statement.pre_next(true,@image_src).to_s else dbi_statement.pre_next(false,@image_src).to_s end - limit=dbi_statement.sql_limit.to_s + limit=dbi_statement.sql_match_limit.to_s cgi.out{@header + @counters_txt + @counters_endn + @body_main.join + @endnotes.join + canned + @tail} #% print cgi_output_header+counters+body+endnotes end rescue Exception => e diff --git a/lib/sisu/v2/cgi_sqlite.rb b/lib/sisu/v2/cgi_sqlite.rb index 7af99b9d..947cc80a 100644 --- a/lib/sisu/v2/cgi_sqlite.rb +++ b/lib/sisu/v2/cgi_sqlite.rb @@ -167,7 +167,7 @@ module SiSU_CGI_sqlite @search_text,@search_endnotes=[],[] search[:text].each{|x| @search_text << "#{x} AND " } @search_text=@search_text.join.gsub!(/AND\s+$/,'') - @search_text.gsub!(/(documents\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\)\s+(?:(?:AND|OR)\s+documents\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\))+)/,'(\1)') + @search_text.gsub!(/(doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\))+)/,'(\1)') search[:endnotes].each{|x| @search_endnotes << "#{x} AND " } @search_endnotes=@search_endnotes.join.gsub!(/AND\s+$/,'') @search_text.gsub!(/(endnotes\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\)\s+(?:(?:AND|OR)\s+endnotes\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\))+)/,'(\1)') @@ -179,7 +179,7 @@ module SiSU_CGI_sqlite def sql_select_body limit ||=@@limit offset ||=@@offset - @sql_statement[:body]=%{SELECT metadata.title, metadata.subtitle, metadata.creator, metadata.filename, metadata.suffix, documents.body, documents.seg, documents.ocn, metadata.tid FROM documents, metadata WHERE #{@search_text} AND documents.metadata_tid = metadata.tid ORDER BY metadata.title, metadata.filename, documents.ocn} + @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.filename, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE #{@search_text} AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.title, metadata_and_text.filename, doc_objects.ocn} @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} select=@sql_statement[:body] + ' ' + @sql_statement[:range] select @@ -187,7 +187,7 @@ module SiSU_CGI_sqlite def sql_select_endnotes limit ||=@@limit offset ||=@@offset - @sql_statement[:endnotes]= %{SELECT metadata.title, metadata.subtitle, metadata.creator, metadata.filename, endnotes.body, endnotes.nr, endnotes.ocn, endnotes.metadata_tid FROM metadata, endnotes WHERE #{@search_endnotes} AND metadata.tid = endnotes.metadata_tid ORDER BY metadata.title, metadata.filename, endnotes.nr} + @sql_statement[:endnotes]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.filename, endnotes.body, endnotes.nr, endnotes.ocn, endnotes.metadata_tid FROM metadata_and_text, endnotes WHERE #{@search_endnotes} AND metadata_and_text.tid = endnotes.metadata_tid ORDER BY metadata_and_text.title, metadata_and_text.filename, endnotes.nr} @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} select=@sql_statement[:endnotes] + ' ' + @sql_statement[:range] select -- cgit v1.2.3