aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2010-04-16 10:12:46 -0400
committerRalph Amissah <ralph@amissah.com>2010-04-16 10:15:13 -0400
commitaa47d1db8596aa65746db05d369441d1def62aa4 (patch)
treec1f1ab145389abd7e30b083f05e6adbcfcfc4dbe
parentdb, shared_html_lite, link back to footnote/endnote reference, fix (diff)
db sql table and column structure changes, name prefix "sisu_v2a_" resulting sisu version bump 2.1.0, plus other lesser fixes
[Note: it is necessary to create new database and tables and to populate them] * db (sql) database table name and column structure changes, new pgsql db name prefix "sisu_v2a_" (version bump), continue to review (db_columns, db_create, db_import, db_sqltxt) * db remove and update fix, match filename for removal with = (not LIKE or ~) * db sqlite, issue with --recreate, bugfix (db_drop)
-rw-r--r--lib/sisu/v2/constants.rb18
-rw-r--r--lib/sisu/v2/db_columns.rb2072
-rw-r--r--lib/sisu/v2/db_create.rb463
-rw-r--r--lib/sisu/v2/db_drop.rb80
-rw-r--r--lib/sisu/v2/db_import.rb321
-rw-r--r--lib/sisu/v2/db_indexes.rb24
-rw-r--r--lib/sisu/v2/db_load_tuple.rb176
-rw-r--r--lib/sisu/v2/db_remove.rb10
-rw-r--r--lib/sisu/v2/db_select.rb18
-rw-r--r--lib/sisu/v2/db_sqltxt.rb115
10 files changed, 2581 insertions, 716 deletions
diff --git a/lib/sisu/v2/constants.rb b/lib/sisu/v2/constants.rb
index 9abe9c0b..3fcb1e3a 100644
--- a/lib/sisu/v2/constants.rb
+++ b/lib/sisu/v2/constants.rb
@@ -111,7 +111,6 @@ Rx[:meta]=/#{Mx[:meta_o]}(\S+?)#{Mx[:meta_c]}/
Dx[:url_o]='‹'; Dx[:url_c]='›'
Dx[:url_o_xml]='&lt;'; Dx[:url_c_xml]='&gt;'
Dx[:rel_o]='‹'; Dx[:rel_c]='›'
-Db[:name_prefix]="SiSU#{SiSU_version_dir}_"
Tex[:backslash]="\\\\"
Tex[:backslash]="\\\\"
Tex[:tilde]='\\\\\\~'
@@ -132,6 +131,23 @@ Px[:lv4]= '-'
Px[:lv5]= '.'
Px[:lv6]= '.'
#Px[:lv5_6]= '.'
+Db[:name_prefix]="SiSU#{SiSU_version_dir}a_"
+Db[:name_prefix_db]="sisu_#{SiSU_version_dir}a_"
+Db[:col_title]=800
+Db[:col_title_part]=400
+Db[:col_title_edition]=10
+Db[:col_name]=600
+Db[:col_creator_misc_short]=100
+Db[:col_language]=100
+Db[:col_language_char]=3
+Db[:col_date_text]=10
+Db[:col_classify_txt_long]=600
+Db[:col_classify_txt_short]=600
+Db[:col_classify_short]=200
+Db[:col_classify_identify]=256
+Db[:col_classify_library]=30
+Db[:col_classify_small]=16
+Db[:col_filename]=256
__END__
consider:
〔comment〕
diff --git a/lib/sisu/v2/db_columns.rb b/lib/sisu/v2/db_columns.rb
index 1849a442..ee66c59e 100644
--- a/lib/sisu/v2/db_columns.rb
+++ b/lib/sisu/v2/db_columns.rb
@@ -58,154 +58,1934 @@
=end
module SiSU_DB_columns
- class Column_size
- def lt_title
- 600
- end
- def lt_subtitle
- 600
- end
- def lt_author
- 600
- end
- def lt_author_title
- 100
- end
- def lt_author_nationality
- 100
- end
- def lt_illustrator
- 600
- end
- def lt_translator
- 600
- end
- def lt_prepared_by
- 600
- end
- def lt_digitized_by
- 600
- end
- def lt_subject
- 600
- end
- def lt_date
- 10
- end
- def lt_type
- 600
- end
- def lt_description
- 2000
- end
- def lt_publisher
- 600
- end
- def lt_contributor
- 600
- end
- def lt_format
- 600
- end
- def lt_identifier
- 256
- end
- def lt_source
- 200
- end
- def lt_language
- 30
- end
- def lt_language_char
- 3
- end
- def lt_language_original
- 30
- end
- def lt_language_original_char
- 3
- end
- def lt_relation
- 100
- end
- def lt_coverage
- 100
- end
- def lt_rights
- 2000
- end
- def lt_copyright
- 2000
- end
- def lt_owner
- 600
- end
- def lt_keywords
- 600
- end
- def lt_comment
- 600
- end
- def lt_loc
- 30
- end
- def lt_dewey
- 30
- end
- def lt_isbn
- 16
- end
- def lt_pg
- 16
- end
- def lt_abstract
- 600
- end
- def lt_skin
- 100
+ require "#{SiSU_lib}/sysenv" # sysenv.rb
+ require "#{SiSU_lib}/db_sqltxt" # db_sqltxt.rb
+ class Columns < SiSU_DB_text::Prepare
+ def initialize(md=nil)
+ @md=md
+ @db=SiSU_Env::Info_db.new #watch
+ if defined? md.mod \
+ and md.mod.inspect=~/import|update/ \
+ and FileTest.exist?(md.fns)
+ txt_arr=IO.readlines(md.fns,'')
+ src=txt_arr.join("\n")
+ if @db.share_source?
+ @sisutxt=special_character_escape(src)
+ else @sisutxt=''
+ end
+ @fulltext=clean_searchable_text(txt_arr)
+ else @sisutxt,@fulltext='',''
+ end
end
- def lt_markup
- 100
- end
- def lt_links
- 100
- end
- def lt_information
- 100
- end
- def lt_contact
- 100
- end
- def lt_suffix
- 600
- end
- def lt_filename
- 256
- end
- def lt_types
- 1
- end
- def lt_subj
- 64
- end
- def lt_orig_pub
- 400
- end
- def lt_orig_pub_date
- 400
- end
- def lt_orig_pub_institution
- 200
- end
- def lt_orig_pub_nationality
- 200
- end
- def lt_writing_focus_nationality
- 100
- end
- def lt_topic_register
- 2000
+#% structures
+ #def column_define
+ # def varchar(name,size)
+ # "#{name} VARCHAR(#{size}) NULL,"
+ # end
+ #end
+=begin
+#% title
+@title:
+ :subtitle:
+ :short:
+ :edition:
+ :language:
+ :note:
+=end
+ def column
+ def title # DublinCore 1 - title
+ def name
+ 'title'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_title]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata full document title [DC1]';}
+ end
+ def tuple
+ t=if defined? @md.title.full \
+ and @md.title.full=~/\S+/
+ txt=@md.title.full
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def title_main
+ def name
+ 'title_main'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_title_part]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata main document title';}
+ end
+ def tuple
+ t=if defined? @md.title.main \
+ and @md.title.main=~/\S+/
+ txt=@md.title.main
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def title_sub
+ def name
+ 'title_sub'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_title_part]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document subtitle';}
+ end
+ def tuple
+ t=if defined? @md.title.sub \
+ and @md.title.sub=~/\S+/
+ txt=@md.title.sub
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def title_short
+ def name
+ 'title_short'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_title_part]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document short title if any';}
+ end
+ def tuple
+ t=if defined? @md.title.short \
+ and @md.title.short=~/\S+/
+ txt=@md.title.short
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def title_edition
+ def name
+ 'title_edition'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_title_edition]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document edition (version)';}
+ end
+ def tuple
+ t=if defined? @md.title.edition \
+ and @md.title.edition=~/\S+/
+ txt=@md.title.edition
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def title_note
+ def name
+ 'title_note'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document notes associated with title';}
+ end
+ def tuple
+ t=if defined? @md.title.note \
+ and @md.title.note=~/\S+/
+ txt=@md.title.note
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def title_language
+ def name
+ 'title_language'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_language]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document language [DC12]';}
+ end
+ def tuple
+ t=if defined? @md.title.language \
+ and @md.title.language=~/\S+/
+ txt=@md.title.language
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def title_language_char # consider
+ def name
+ 'title_language_char'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_language_char]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document language iso code';}
+ end
+ def tuple
+ t=if defined? @md.title.language_char \
+ and @md.title.language_char=~/\S+/
+ txt=@md.title.language_char
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+=begin
+#% creator
+@creator:
+ :author:
+ :contributor:
+ :illustrator:
+ :photographer:
+ :translator:
+ :prepared_by:
+ :digitized_by:
+ :audio:
+ :video:
+=end
+ def creator_author # DublinCore 2 - creator/author (author)
+ def name
+ 'creator_author'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_name]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document author (creator) [DC2]';}
+ end
+ def tuple
+ t=if defined? @md.creator.author_detail \
+ and @md.creator.author_detail.class==Array \
+ and @md.creator.author_detail.length > 0
+ txt=''
+ @md.creator.author_detail.each do |h|
+ txt=txt + %{#{h[:the]}, #{h[:others]}; }
+ end
+ txt.gsub!(/[;, ]+\s*$/,'')
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def creator_author_honorific # consider
+ def name
+ 'creator_author_hon'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_creator_misc_short]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document author honorific (title e.g, Ms. Dr. Prof.)';}
+ end
+ def tuple
+ t=if defined? @md.creator.author_hon \
+ and @md.creator.author_hon=~/\S+/
+ txt=@md.creator.author_hon
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def creator_author_nationality # consider
+ def name
+ 'creator_author_nationality'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_creator_misc_short]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata nationality of document author (creator)';}
+ end
+ def tuple
+ t=if defined? @md.creator.author_nationality_detail \
+ and @md.creator.author_nationality=~/\S+/
+ txt=@md.creator.author_nationality_detail
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def creator_contributor # DublinCore 6 - contributor
+ def name
+ 'creator_contributor'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_name]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document contributor name(s) [DC6]';}
+ end
+ def tuple
+ t=if defined? @md.creator.contributor_detail \
+ and @md.creator.contributor_detail.class==Array \
+ and @md.creator.contributor_detail.length > 0
+ txt=@md.creator.contributor_detail #dc
+ txt=''
+ @md.creator.contributor_detail.each do |h|
+ txt=txt + %{#{h[:the]}, #{h[:others]}; }
+ end
+ txt.gsub!(/[;, ]+\s*$/,'')
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def creator_illustrator
+ def name
+ 'creator_illustrator'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_name]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document illustrator name(s)';}
+ end
+ def tuple
+ t=if defined? @md.creator.illustrator_detail \
+ and @md.creator.illustrator_detail.class==Array \
+ and @md.creator.illustrator_detail.length > 0
+ txt=@md.creator.illustrator_detail
+ txt=''
+ @md.creator.illustrator_detail.each do |h|
+ txt=txt + %{#{h[:the]}, #{h[:others]}; }
+ end
+ txt.gsub!(/[;, ]+\s*$/,'')
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def creator_photographer
+ def name
+ 'creator_photographer'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_name]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document photographer name(s)';}
+ end
+ def tuple
+ t=if defined? @md.creator.photographer_detail \
+ and @md.creator.photographer_detail.class==Array \
+ and @md.creator.photographer_detail.length > 0
+ txt=@md.creator.photographer_detail
+ txt=''
+ @md.creator.photographer_detail.each do |h|
+ txt=txt + %{#{h[:the]}, #{h[:others]}; }
+ end
+ txt.gsub!(/[;, ]+\s*$/,'')
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def creator_translator
+ def name
+ 'creator_translator'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_name]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document translator name(s)';}
+ end
+ def tuple
+ t=if defined? @md.creator.translator_detail \
+ and @md.creator.translator_detail.class==Array \
+ and @md.creator.translator_detail.length > 0
+ txt=''
+ @md.creator.translator_detail.each do |h|
+ txt=txt + %{#{h[:the]}, #{h[:others]}; }
+ end
+ txt.gsub!(/[;, ]+\s*$/,'')
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def creator_prepared_by
+ def name
+ 'creator_prepared_by'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_name]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document prepared by name(s)';}
+ end
+ def tuple
+ t=if defined? @md.creator.prepared_by_detail \
+ and @md.creator.prepared_by_detail.class==Array \
+ and @md.creator.prepared_by_detail.length > 0
+ txt=@md.creator.prepared_by_detail
+ txt=''
+ @md.creator.prepared_by_detail.each do |h|
+ txt=txt + %{#{h[:the]}, #{h[:others]}; }
+ end
+ txt.gsub!(/[;, ]+\s*$/,'')
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def creator_digitized_by
+ def name
+ 'creator_digitized_by'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_name]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document digitized by name(s)';}
+ end
+ def tuple
+ t=if defined? @md.creator.digitized_by_detail \
+ and @md.creator.digitized_by_detail.class==Array \
+ and @md.creator.digitized_by_detail.length > 0
+ txt=@md.creator.digitized_by_detail
+ txt=''
+ @md.creator.digitized_by_detail.each do |h|
+ txt=txt + %{#{h[:the]}, #{h[:others]}; }
+ end
+ txt.gsub!(/[;, ]+\s*$/,'')
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def creator_audio
+ def name
+ 'creator_audio'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_name]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document audio by name(s)';}
+ end
+ def tuple
+ t=if defined? @md.creator.audio_detail \
+ and @md.creator.audio_detail.class==Array \
+ and @md.creator.audio_detail.length > 0
+ txt=@md.creator.audio_detail
+ txt=''
+ @md.creator.audio_detail.each do |h|
+ txt=txt + %{#{h[:the]}, #{h[:others]}; }
+ end
+ txt.gsub!(/[;, ]+\s*$/,'')
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def creator_video
+ def name
+ 'creator_video'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_name]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document video by name(s)';}
+ end
+ def tuple
+ t=if defined? @md.creator.video_detail \
+ and @md.creator.video_detail.class==Array \
+ and @md.creator.video_detail.length > 0
+ txt=''
+ @md.creator.video_detail.each do |h|
+ txt=txt + %{#{h[:the]}, #{h[:others]}; }
+ end
+ txt.gsub!(/[;, ]+\s*$/,'')
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+=begin
+#% language
+#taken from other fields
+@title:
+ :language:
+@original:
+ :language:
+#not available -->
+#@language:
+# :document:
+# :original:
+=end
+ def language_document
+ def name
+ 'language_document'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_language]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document language';}
+ end
+ def tuple
+ t=if defined? @md.language.document \
+ and @md.language.document=~/\S+/
+ txt=@md.language.document
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def language_document_char
+ def name
+ 'language_document_char'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_language_char]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document language';}
+ end
+ def tuple
+ t=if defined? @md.language.document_char \
+ and @md.language.document_char=~/\S+/
+ txt=@md.language.document_char
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def language_original
+ def name
+ 'language_original'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_language]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata original document/text language';}
+ end
+ def tuple
+ t=if defined? @md.language.original \
+ and @md.language.original=~/\S+/
+ txt=@md.language.original
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def language_original_char
+ def name
+ 'language_original_char'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_language_char]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document language';}
+ end
+ def tuple
+ t=if defined? @md.language.original_char \
+ and @md.language.original_char=~/\S+/
+ txt=@md.language.original_char
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+=begin
+#% date
+@date:
+ :added_to_site:
+ :available:
+ :created:
+ :issued:
+ :modified:
+ :published:
+ :valid:
+ :translated:
+ :original_publication:
+=end
+ def date_added_to_site
+ def name
+ 'date_added_to_site'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_date_text]}) NULL,"
+ #"#{name} DATE,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata date added to site';}
+ end
+ def tuple
+ t=if defined? @md.date.added_to_site \
+ and @md.date.added_to_site=~/\S+/
+ txt=@md.date.added_to_site
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def date_available
+ def name
+ 'date_available'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_date_text]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata date added to site [DC]';}
+ end
+ def tuple
+ t=if defined? @md.date.available \
+ and @md.date.available=~/\S+/
+ txt=@md.date.available
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def date_created
+ def name
+ 'date_created'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_date_text]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata date created [DC]';}
+ end
+ def tuple
+ t=if defined? @md.date.created \
+ and @md.date.created=~/\S+/
+ txt=@md.date.created
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def date_issued
+ def name
+ 'date_issued'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_date_text]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata date issued [DC]';}
+ end
+ def tuple
+ t=if defined? @md.date.issued \
+ and @md.date.issued=~/\S+/
+ txt=@md.date.issued
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def date_modified
+ def name
+ 'date_modified'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_date_text]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata date modified [DC]';}
+ end
+ def tuple
+ t=if defined? @md.date.modified \
+ and @md.date.modified=~/\S+/
+ txt=@md.date.modified
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def date_published
+ def name
+ 'date_published'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_date_text]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata date published [DC7]';}
+ end
+ def tuple
+ t=if defined? @md.date.published \
+ and @md.date.published=~/\S+/
+ txt=@md.date.published
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def date_valid
+ def name
+ 'date_valid'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_date_text]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata date valid [DC]';}
+ end
+ def tuple
+ t=if defined? @md.date.valid \
+ and @md.date.valid=~/\S+/
+ txt=@md.date.valid
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def date_translated
+ def name
+ 'date_translated'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_date_text]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata date translated';}
+ end
+ def tuple
+ t=if defined? @md.date.translated \
+ and @md.date.translated=~/\S+/
+ txt=@md.date.translated
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def date_original_publication
+ def name
+ 'date_original_publication'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_date_text]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata date of original publication';}
+ end
+ def tuple
+ t=if defined? @md.date.original_publication \
+ and @md.date.original_publication=~/\S+/
+ txt=@md.date.original_publication
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def date_generated
+ def name
+ 'date_generated'
+ end
+ def create_column #choose other representation of time
+ "#{name} VARCHAR(30) NULL,"
+ #"#{name} VARCHAR(10) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata date of sisu generation of document, automatically populated';}
+ end
+ def tuple #choose other representation of time
+ t=if defined? @md.generated \
+ and @md.generated.to_s=~/\S+/
+ txt=@md.generated.to_s
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+=begin
+#% publisher
+@publisher:
+=end
+ def publisher
+ def name
+ 'publisher'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_name]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document publisher [DC5]';}
+ end
+ def tuple
+ t=if defined? @md.publisher \
+ and @md.publisher=~/\S+/
+ txt=@md.publisher
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+##% current
+# def current_publisher
+# def name
+# 'current_publisher'
+# end
+# def size
+# 10
+# end
+# def create_column
+# "#{name} VARCHAR(#{current_publisher.size}) NULL,"
+# end
+# def tuple
+# t=if defined? @md.current.publisher \
+# and @md.current.publisher=~/\S+/
+# txt=@md.current.publisher
+# special_character_escape(txt)
+# "'#{txt}', "
+# end
+# end
+# self
+# end
+=begin
+#% original
+@original:
+ :publisher:
+ #:date: #repeated under date
+ :language:
+ :institution:
+ :nationality:
+ :source:
+=end
+ def original_publisher
+ def name
+ 'original_publisher'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_name]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document original publisher [DC5]';}
+ end
+ def tuple
+ t=if defined? @md.original.publisher \
+ and @md.original.publisher=~/\S+/
+ txt=@md.original.publisher
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def original_language
+ def name
+ 'original_language'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_language]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document original language';}
+ end
+ def tuple
+ t=if defined? @md.original.language \
+ and @md.original.language=~/\S+/
+ txt=@md.original.language
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def original_language_char # consider
+ def name
+ 'original_language_char'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_language_char]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document original language iso character';}
+ end
+ def tuple
+ t=if defined? @md.original.language_char \
+ and @md.original.language_char=~/\S+/
+ txt=@md.original.language_char
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def original_source
+ def name
+ 'original_source'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_name]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document original source [DC11]';}
+ end
+ def tuple
+ t=if defined? @md.original.source \
+ and @md.original.source=~/\S+/
+ txt=@md.original.source
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def original_institution
+ def name
+ 'original_institution'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_name]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document original institution';}
+ end
+ def tuple
+ t=if defined? @md.original.institution \
+ and @md.original.institution=~/\S+/
+ txt=@md.original.institution
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def original_nationality
+ def name
+ 'original_nationality'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_language]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document original nationality';}
+ end
+ def tuple
+ t=if defined? @md.original.nationality \
+ and @md.original.nationality=~/\S+/
+ txt=@md.original.nationality
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+=begin
+#% rights
+@rights:
+ #:copyright: #mapped to :text: used where no other copyrights and included in :all:
+ :text:
+ :translation:
+ :illustrations:
+ :photographs:
+ :preparation:
+ :digitization:
+ :audio:
+ :video:
+ :license:
+ :all:
+=end
+ def rights_all
+ def name
+ 'rights'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata rights associated with document (composite) [DC15]';}
+ end
+ def tuple
+ t=if defined? @md.rights.all \
+ and @md.rights.all=~/\S+/
+ txt=@md.rights.all
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def rights_copyright_text
+ def name
+ 'rights_copyright_text'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata copyright associated for document text';}
+ end
+ def tuple
+ t=if defined? @md.rights.copyright_text \
+ and @md.rights.copyright_text=~/\S+/
+ txt=@md.rights.copyright_text
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def rights_copyright_translation
+ def name
+ 'rights_copyright_translation'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata copyright associated for document text translation (if any)';}
+ end
+ def tuple
+ t=if defined? @md.rights.copyright_translation \
+ and @md.rights.copyright_translation=~/\S+/
+ txt=@md.rights.copyright_translation
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def rights_copyright_illustrations
+ def name
+ 'rights_copyright_illustrations'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata copyright associated for document text illustrations (if any)';}
+ end
+ def tuple
+ t=if defined? @md.rights.copyright_illustrations \
+ and @md.rights.copyright_illustrations=~/\S+/
+ txt=@md.rights.copyright_illustrations
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def rights_copyright_photographs
+ def name
+ 'rights_copyright_photographs'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata copyright associated for document text photographs (if any)';}
+ end
+ def tuple
+ t=if defined? @md.rights.copyright_photographs \
+ and @md.rights.copyright_photographs=~/\S+/
+ txt=@md.rights.copyright_photographs
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def rights_copyright_preparation
+ def name
+ 'rights_copyright_preparation'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata copyright associated for document text preparation (if any)';}
+ end
+ def tuple
+ t=if defined? @md.rights.copyright_preparation \
+ and @md.rights.copyright_preparation=~/\S+/
+ txt=@md.rights.copyright_preparation
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def rights_copyright_digitization
+ def name
+ 'rights_copyright_digitization'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata copyright associated for document text digitization (if any)';}
+ end
+ def tuple
+ t=if defined? @md.rights.copyright_digitization \
+ and @md.rights.copyright_digitization=~/\S+/
+ txt=@md.rights.copyright_digitization
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def rights_copyright_audio
+ def name
+ 'rights_copyright_audio'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata copyright associated for document text audio (if any)';}
+ end
+ def tuple
+ t=if defined? @md.rights.copyright_audio \
+ and @md.rights.copyright_audio=~/\S+/
+ txt=@md.rights.copyright_audio
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def rights_copyright_video
+ def name
+ 'rights_copyright_video'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata copyright associated for document text video (if any)';}
+ end
+ def tuple
+ t=if defined? @md.rights.copyright_video \
+ and @md.rights.copyright_video=~/\S+/
+ txt=@md.rights.copyright_video
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def rights_license
+ def name
+ 'rights_license'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata license granted for use of document if any)';}
+ end
+ def tuple
+ t=if defined? @md.rights.license \
+ and @md.rights.license=~/\S+/
+ txt=@md.rights.license
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+=begin
+#% classify
+@classify:
+ :topic_register:
+ :coverage:
+ :format:
+ :identifier:
+ :keywords:
+ :relation:
+ :subject:
+ :type:
+ :loc:
+ :dewey:
+ :pg:
+ :isbn:
+=end
+ def classify_topic_register
+ def name
+ 'classify_topic_register'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_classify_txt_long]}) NULL,"
+ #"#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata classify document topic register (semi-structured document subject information)';}
+ end
+ def tuple
+ t=if defined? @md.classify.topic_register \
+ and @md.classify.topic_register=~/\S+/
+ txt=@md.classify.topic_register
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def classify_subject
+ def name
+ 'classify_subject'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata classify document subject matter [DC3]';}
+ end
+ def tuple
+ t=if defined? @md.classify.subject \
+ and @md.classify.subject=~/\S+/
+ txt=@md.classify.subject
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def classify_type #check
+ def name
+ 'classify_type'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata classify document type [DC8]';}
+ end
+ def tuple
+ t=if defined? @md.classify.type \
+ and @md.classify.type=~/\S+/
+ txt=@md.classify.type
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def classify_loc
+ def name
+ 'classify_loc'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_classify_library]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata classify document library of congress (if available)';}
+ end
+ def tuple
+ t=if defined? @md.classify.loc \
+ and @md.classify.loc=~/\S+/
+ txt=@md.classify.loc
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def classify_dewey
+ def name
+ 'classify_dewey'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_classify_library]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata classify document dewey (if available)';}
+ end
+ def tuple
+ t=if defined? @md.classify.dewey \
+ and @md.classify.dewey=~/\S+/
+ txt=@md.classify.dewey
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def classify_pg
+ def name
+ 'classify_pg'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_classify_small]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata classify document project gutenberg (if any)';}
+ end
+ def tuple
+ t=if defined? @md.classify.pg \
+ and @md.classify.pg=~/\S+/
+ txt=@md.classify.pg
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def classify_isbn
+ def name
+ 'classify_isbn'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_classify_small]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata classify document isbn (if any)';}
+ end
+ def tuple
+ t=if defined? @md.classify.isbn \
+ and @md.classify.isbn=~/\S+/
+ txt=@md.classify.isbn
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def classify_format
+ def name
+ 'classify_format'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata classify document format [DC9]';}
+ end
+ def tuple
+ t=if defined? @md.classify.format \
+ and @md.classify.format=~/\S+/
+ txt=@md.classify.format
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def classify_identifier
+ def name
+ 'classify_identifier'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_classify_identify]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata classify document identifier [DC10]';}
+ end
+ def tuple
+ t=if defined? @md.classify.identifier \
+ and @md.classify.identifier=~/\S+/
+ txt=@md.classify.identifier
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def classify_relation
+ def name
+ 'classify_relation'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_classify_short]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata classify document relation [DC13]';}
+ end
+ def tuple
+ t=if defined? @md.classify.relation \
+ and @md.classify.relation=~/\S+/
+ txt=@md.classify.relation
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def classify_coverage
+ def name
+ 'classify_coverage'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_classify_short]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata classify document coverage [DC14]';}
+ end
+ def tuple
+ t=if defined? @md.classify.coverage \
+ and @md.classify.coverage=~/\S+/
+ txt=@md.classify.coverage
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def classify_keywords
+ def name
+ 'classify_keywords'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata classify document keywords';}
+ end
+ def tuple
+ t=if defined? @md.classify.keywords \
+ and @md.classify.keywords=~/\S+/
+ txt=@md.classify.keywords
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+=begin
+#% notes
+@notes:
+ :abstract:
+ :comment:
+ :description:
+ :history:
+ :prefix:
+ :prefix_a:
+ :prefix_b:
+ :suffix:
+=end
+ def notes_abstract
+ def name
+ 'notes_abstract'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document notes abstract';}
+ end
+ def tuple
+ t=if defined? @md.notes.abstract \
+ and @md.notes.abstract=~/\S+/
+ txt=@md.notes.abstract
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def notes_comment
+ def name
+ 'notes_comment'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document notes comment';}
+ end
+ def tuple
+ t=if defined? @md.notes.comment \
+ and @md.notes.comment=~/\S+/
+ txt=@md.notes.comment
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def notes_description
+ def name
+ 'notes_description'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document notes description [DC4]';}
+ end
+ def tuple
+ t=if defined? @md.notes.description \
+ and @md.notes.description=~/\S+/
+ txt=@md.notes.description
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def notes_history #check, consider removal
+ def name
+ 'notes_history'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document notes history';}
+ end
+ def tuple
+ t=if defined? @md.notes.history \
+ and @md.notes.history=~/\S+/
+ txt=@md.notes.history
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def notes_prefix
+ def name
+ 'notes_prefix'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document notes prefix';}
+ end
+ def tuple
+ t=if defined? @md.notes.prefix \
+ and @md.notes.prefix=~/\S+/
+ txt=@md.notes.prefix
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def notes_prefix_a
+ def name
+ 'notes_prefix_a'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document notes prefix_a';}
+ end
+ def tuple
+ t=if defined? @md.notes.prefix_a \
+ and @md.notes.prefix_a=~/\S+/
+ txt=@md.notes.prefix_a
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def notes_prefix_b
+ def name
+ 'notes_prefix_b'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document notes prefix_b';}
+ end
+ def tuple
+ t=if defined? @md.notes.prefix_b \
+ and @md.notes.prefix_b=~/\S+/
+ txt=@md.notes.prefix_b
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def notes_suffix
+ def name
+ 'notes_suffix'
+ end
+ def create_column # keep text
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document notes suffix';}
+ end
+ def tuple
+ t=if defined? @md.notes.suffix \
+ and @md.notes.suffix=~/\S+/
+ txt=@md.notes.suffix
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+=begin
+#% misc
+@make:
+ :skin:
+@links:
+=end
+ def filename
+ def name
+ 'filename'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_filename]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document filename';}
+ end
+ def tuple
+ t=if defined? @md.fns \
+ and @md.fns=~/\S+/
+ txt=@md.fns
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def sisutxt # consider naming sisusrc
+ def name
+ 'sisutxt'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'sisu markup text (if shared)';}
+ end
+ def tuple
+ t=if @md.mod.inspect=~/import|update/ \
+ and FileTest.exist?(@md.fns)
+ ["#{name}, ","'#{@sisutxt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def fulltext
+ def name
+ 'fulltext'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'document full text clean, searchable';}
+ end
+ def tuple
+ t=if @md.mod.inspect=~/import|update/ \
+ and FileTest.exist?(@md.fns)
+ ["#{name}, ","'#{@fulltext}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def word_count
+ def name
+ 'word_count'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'document word count';}
+ end
+ def tuple
+ t=if defined? @md.wc_words \
+ and @md.wc_words=~/\S+/
+ txt=@md.wc_words
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def digest
+ def name
+ 'dgst'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'document hash digest sha256 (or md5)';}
+ end
+ def tuple
+ t=if defined? @md.dgst \
+ and @md.dgst=~/\S+/
+ txt=@md.dgst
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def skin_name #check
+ def name
+ 'skin_name'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_filename]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document skin name';}
+ end
+ def tuple
+ t=if defined? @md.notes.skin_name \
+ and @md.notes.skin_name=~/\S+/
+ txt=@md.notes.skin_name
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def skin # you likely want a separate table for skins
+ def name
+ 'skin'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document skin';}
+ end
+ def tuple
+ t=if defined? @md.skin \
+ and @md.skin=~/\S+/
+ txt=@md.skin
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def links
+ def name
+ 'links'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ #"#{name} VARCHAR(#{links.size}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'metadata document links';}
+ end
+ def tuple
+ t=if defined? @md.notes.links \
+ and @md.notes.links=~/\S+/
+ txt=@md.notes.links
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ self
end
+ end
+ class Column_size
def document_clean # restriction not necessary
60000
end
diff --git a/lib/sisu/v2/db_create.rb b/lib/sisu/v2/db_create.rb
index feba670c..8ed638dc 100644
--- a/lib/sisu/v2/db_create.rb
+++ b/lib/sisu/v2/db_create.rb
@@ -59,7 +59,7 @@
=end
module SiSU_DB_create
require "#{SiSU_lib}/db_columns" # db_columns.rb
- class Create < SiSU_DB_columns::Column_size
+ class Create < SiSU_DB_columns::Columns
require "#{SiSU_lib}/sysenv" # sysenv.rb
@@dl=nil
def initialize(opt,conn,file,sql_type='pg')
@@ -82,7 +82,7 @@ module SiSU_DB_create
@env=SiSU_Env::Info_env.new(@opt.fns)
tell=SiSU_Screen::Ansi.new(@opt.cmd,'invert','Create PG db:',%{"SiSU_#{@env.path.stub_pwd}"})
tell.colorize unless @opt.cmd =~/q/
- SiSU_Env::System_call.new.create_pg_db(@env.path.stub_pwd) #watch use of path.stub_pwd instead of stub
+ SiSU_Env::System_call.new.create_pg_db(@env.path.stub_pwd) if @sql_type=='pg' #watch use of path.stub_pwd instead of stub
end
def output_dir?
dir=SiSU_Env::Info_env.new('')
@@ -91,102 +91,129 @@ module SiSU_DB_create
end
end
def create_table
- def metadata
+ def metadata_and_text
print %{
currently using sisu dbi module
- to be populated from documents files
- create tables metadata
+ to be populated from document files
+ create tables metadata_and_text
data import through ruby transfer
} unless @opt.cmd =~/q/
@conn.execute(%{
- CREATE TABLE metadata (
+ CREATE TABLE metadata_and_text (
tid BIGINT PRIMARY KEY,
- title VARCHAR(#{lt_title}) NULL,
- subtitle VARCHAR(#{lt_subtitle}) NULL,
- author VARCHAR(#{lt_author}) NULL,
-/* plan to replace creator field, currently used, with author field */
- creator VARCHAR(#{lt_author}) NULL,
- author_title VARCHAR(#{lt_author_title}) NULL,
- author_nationality VARCHAR(#{lt_author_nationality}) NULL,
- illustrator VARCHAR(#{lt_illustrator}) NULL,
- translator VARCHAR(#{lt_translator}) NULL,
- subject VARCHAR(#{lt_subject}) NULL,
- date VARCHAR(#{lt_date}) NULL,
- date_added_to_site VARCHAR(#{lt_date}) NULL,
- date_created VARCHAR(#{lt_date}) NULL,
- date_issued VARCHAR(#{lt_date}) NULL,
- date_available VARCHAR(#{lt_date}) NULL,
- date_valid VARCHAR(#{lt_date}) NULL,
- date_modified VARCHAR(#{lt_date}) NULL,
- date_translated VARCHAR(#{lt_date}) NULL,
-/* date DATE, */
-/* date_added_to_site DATE, */
-/* date_created DATE, */
-/* date_issued DATE, */
-/* date_available DATE, */
-/* date_valid DATE, */
-/* date_modified DATE, */
-/* date_translated DATE, */
- type VARCHAR(#{lt_type}) NULL,
- description VARCHAR(#{lt_description}) NULL,
- publisher VARCHAR(#{lt_publisher}) NULL,
- contributor VARCHAR(#{lt_contributor}) NULL,
- prepared_by VARCHAR(#{lt_prepared_by}) NULL,
- digitized_by VARCHAR(#{lt_digitized_by}) NULL,
- format VARCHAR(#{lt_format}) NULL,
- identifier VARCHAR(#{lt_identifier}) NULL,
- source VARCHAR(#{lt_source}) NULL,
- language VARCHAR(#{lt_language}) NULL,
- language_original VARCHAR(#{lt_language_original}) NULL,
- relation VARCHAR(#{lt_relation}) NULL,
- coverage VARCHAR(#{lt_coverage}) NULL,
- rights VARCHAR(#{lt_rights}) NULL,
- copyright VARCHAR(#{lt_copyright}) NULL,
- owner VARCHAR(#{lt_owner}) NULL,
- keywords VARCHAR(#{lt_keywords}) NULL,
- comment VARCHAR(#{lt_comment}) NULL,
- loc VARCHAR(#{lt_loc}) NULL,
- dewey VARCHAR(#{lt_dewey}) NULL,
- isbn VARCHAR(#{lt_isbn}) NULL,
- pg VARCHAR(#{lt_pg}) NULL,
- abstract VARCHAR(#{lt_abstract}) NULL,
- prefix_a TEXT NULL,
- prefix_b TEXT NULL,
- skin VARCHAR(#{lt_skin}) NULL,
- markup VARCHAR(#{lt_markup}) NULL,
- links VARCHAR(#{lt_links}) NULL,
- information VARCHAR(#{lt_information}) NULL,
- contact VARCHAR(#{lt_contact}) NULL,
- suffix VARCHAR(#{lt_suffix}) NULL,
- filename VARCHAR(#{lt_filename}) NULL UNIQUE,
- types CHAR(#{lt_types}) NULL,
- subj VARCHAR(#{lt_subj}) NULL,
- original_publication VARCHAR(#{lt_orig_pub}) NULL,
- original_publication_date VARCHAR(#{lt_orig_pub_date}) NULL,
- original_publication_institution VARCHAR(#{lt_orig_pub_institution}) NULL,
- original_publication_nationality VARCHAR(#{lt_orig_pub_nationality}) NULL,
- writing_focus_nationality VARCHAR(#{lt_writing_focus_nationality}) NULL,
- topic_register VARCHAR(#{lt_topic_register}) NULL
+ /* title */
+ #{column.title.create_column}
+ #{column.title_main.create_column}
+ #{column.title_sub.create_column}
+ #{column.title_short.create_column}
+ #{column.title_edition.create_column}
+ #{column.title_note.create_column}
+ #{column.title_language.create_column}
+ #{column.title_language_char.create_column}
+ /* creator */
+ #{column.creator_author.create_column}
+ #{column.creator_author_honorific.create_column}
+ #{column.creator_author_nationality.create_column}
+ #{column.creator_contributor.create_column}
+ #{column.creator_illustrator.create_column}
+ #{column.creator_photographer.create_column}
+ #{column.creator_translator.create_column}
+ #{column.creator_prepared_by.create_column}
+ #{column.creator_digitized_by.create_column}
+ #{column.creator_audio.create_column}
+ #{column.creator_video.create_column}
+ /* language */
+ #{column.language_document.create_column}
+ #{column.language_document_char.create_column}
+ #{column.language_original.create_column}
+ #{column.language_original_char.create_column}
+ /* date */
+ #{column.date_added_to_site.create_column}
+ #{column.date_available.create_column}
+ #{column.date_created.create_column}
+ #{column.date_issued.create_column}
+ #{column.date_modified.create_column}
+ #{column.date_published.create_column}
+ #{column.date_valid.create_column}
+ #{column.date_translated.create_column}
+ #{column.date_original_publication.create_column}
+ #{column.date_generated.create_column}
+ /* publisher */
+ #{column.publisher.create_column}
+ /* original */
+ #{column.original_publisher.create_column}
+ #{column.original_language.create_column}
+ #{column.original_language_char.create_column}
+ #{column.original_source.create_column}
+ #{column.original_institution.create_column}
+ #{column.original_nationality.create_column}
+ /* rights */
+ #{column.rights_all.create_column}
+ #{column.rights_copyright_text.create_column}
+ #{column.rights_copyright_translation.create_column}
+ #{column.rights_copyright_illustrations.create_column}
+ #{column.rights_copyright_photographs.create_column}
+ #{column.rights_copyright_preparation.create_column}
+ #{column.rights_copyright_digitization.create_column}
+ #{column.rights_copyright_audio.create_column}
+ #{column.rights_copyright_video.create_column}
+ #{column.rights_license.create_column}
+ /* classify */
+ #{column.classify_topic_register.create_column}
+ #{column.classify_subject.create_column}
+ #{column.classify_type.create_column}
+ #{column.classify_loc.create_column}
+ #{column.classify_dewey.create_column}
+ #{column.classify_pg.create_column}
+ #{column.classify_isbn.create_column}
+ #{column.classify_format.create_column}
+ #{column.classify_identifier.create_column}
+ #{column.classify_relation.create_column}
+ #{column.classify_coverage.create_column}
+ #{column.classify_keywords.create_column}
+ /* notes */
+ #{column.notes_abstract.create_column}
+ #{column.notes_comment.create_column}
+ #{column.notes_description.create_column}
+ #{column.notes_history.create_column}
+ #{column.notes_prefix.create_column}
+ #{column.notes_prefix_a.create_column}
+ #{column.notes_prefix_b.create_column}
+ #{column.notes_suffix.create_column}
+ /* misc */
+ #{column.filename.create_column}
+ #{column.sisutxt.create_column}
+ #{column.fulltext.create_column}
+ #{column.word_count.create_column}
+ #{column.digest.create_column}
+ #{column.skin_name.create_column}
+ #{column.skin.create_column}
+ #{column.links.create_column.gsub(/,$/,'')}
+/* subj VARCHAR(64) NULL, */
+/* contact VARCHAR(100) NULL, */
+/* information VARCHAR(100) NULL, */
+/* types CHAR(1) NULL, */
+/* writing_focus_nationality VARCHAR(100) NULL, */
);
})
- @comment.psql.metadata if @comment
+ @comment.psql.metadata_and_text if @comment
end
- def documents # create documents base
+ def doc_objects # create doc_objects base
print %{
to be populated from documents files
- create tables documents document_trade document_env
+ create tables doc_objects
data import through ruby transfer
} unless @opt.cmd =~/q/
@conn.execute(%{
- CREATE TABLE documents (
+ CREATE TABLE doc_objects (
lid BIGINT PRIMARY KEY,
- metadata_tid BIGINT REFERENCES metadata,
+ metadata_tid BIGINT REFERENCES metadata_and_text,
ocn SMALLINT,
ocnd VARCHAR(6),
ocns VARCHAR(6),
clean TEXT NULL,
body TEXT NULL,
- seg VARCHAR(#{document_seg}) NULL,
+ seg VARCHAR(120) NULL,
lev_an VARCHAR(1),
lev SMALLINT NULL,
lev1 SMALLINT,
@@ -210,7 +237,7 @@ module SiSU_DB_create
types CHAR(1) NULL
);
})
- @comment.psql.documents if @comment
+ @comment.psql.doc_objects if @comment
end
def endnotes
print %{
@@ -221,7 +248,7 @@ module SiSU_DB_create
@conn.execute(%{
CREATE TABLE endnotes (
nid BIGINT PRIMARY KEY,
- document_lid BIGINT REFERENCES documents,
+ document_lid BIGINT REFERENCES doc_objects,
nr SMALLINT,
clean TEXT NULL,
body TEXT NULL,
@@ -229,7 +256,7 @@ module SiSU_DB_create
ocnd VARCHAR(6),
ocns VARCHAR(6),
digest_clean CHAR(#{@@dl}),
- metadata_tid BIGINT REFERENCES metadata
+ metadata_tid BIGINT REFERENCES metadata_and_text
);
})
@comment.psql.endnotes if @comment
@@ -243,7 +270,7 @@ module SiSU_DB_create
@conn.execute(%{
CREATE TABLE endnotes_asterisk (
nid BIGINT PRIMARY KEY,
- document_lid BIGINT REFERENCES documents,
+ document_lid BIGINT REFERENCES doc_objects,
nr SMALLINT,
clean TEXT NULL,
body TEXT NULL,
@@ -251,7 +278,7 @@ module SiSU_DB_create
ocnd VARCHAR(6),
ocns VARCHAR(6),
digest_clean CHAR(#{@@dl}),
- metadata_tid BIGINT REFERENCES metadata
+ metadata_tid BIGINT REFERENCES metadata_and_text
);
})
@comment.psql.endnotes_asterisk if @comment
@@ -265,7 +292,7 @@ module SiSU_DB_create
@conn.execute(%{
CREATE TABLE endnotes_plus (
nid BIGINT PRIMARY KEY,
- document_lid BIGINT REFERENCES documents,
+ document_lid BIGINT REFERENCES doc_objects,
nr SMALLINT,
clean TEXT NULL,
body TEXT NULL,
@@ -273,21 +300,21 @@ module SiSU_DB_create
ocnd VARCHAR(6),
ocns VARCHAR(6),
digest_clean CHAR(#{@@dl}),
- metadata_tid BIGINT REFERENCES metadata
+ metadata_tid BIGINT REFERENCES metadata_and_text
);
})
@comment.psql.endnotes_plus if @comment
end
- def urls # create documents file links mapping
+ def urls # create doc_objects file links mapping
print %{
currently using sisu dbi module
- to be populated from documents files
+ to be populated from doc_objects files
create tables urls
data import through ruby transfer
} unless @opt.cmd =~/q/
@conn.execute(%{
CREATE TABLE urls (
- metadata_tid BIGINT REFERENCES metadata,
+ metadata_tid BIGINT REFERENCES metadata_and_text,
plaintext varchar(512),
html_toc varchar(512),
html_doc varchar(512),
@@ -311,7 +338,7 @@ module SiSU_DB_create
self
end
end
- class Comment
+ class Comment < SiSU_DB_columns::Columns
def initialize(conn,sql_type='pg')
@conn=conn
if sql_type =~ /pg/; psql
@@ -325,162 +352,146 @@ module SiSU_DB_create
end
end
end
- def metadata
+ def metadata_and_text
sql_arr=[
- %{COMMENT ON Table metadata
- IS 'contains SiSU documents metadata with metadata';},
- %{COMMENT ON COLUMN metadata.tid
+ %{COMMENT ON Table metadata_and_text
+ IS 'contains SiSU metadata and fulltext for search (including source .sst if shared)';},
+ %{COMMENT ON COLUMN metadata_and_text.tid
IS 'unique';},
- %{COMMENT ON COLUMN metadata.filename
- IS 'document filename';},
- %{COMMENT ON COLUMN metadata.title
- IS 'metadata title (dublin core element 1)';},
- %{COMMENT ON COLUMN metadata.subtitle
- IS 'document subtitle';},
- %{COMMENT ON COLUMN metadata.creator
- IS 'metadata creator (dublin core element 2)';},
- %{COMMENT ON COLUMN metadata.author
- IS 'metadata author (dublin core element 2)';},
- %{COMMENT ON COLUMN metadata.illustrator
- IS 'metadata illustrator';},
- %{COMMENT ON COLUMN metadata.translator
- IS 'metadata translator';},
- %{COMMENT ON COLUMN metadata.subject
- IS 'metadata subject (dublin core element 3)';},
- %{COMMENT ON COLUMN metadata.date
- IS 'metadata date (dublin core element 7)';},
- %{COMMENT ON COLUMN metadata.date_created
- IS 'metadata date created (dublin core)';},
- %{COMMENT ON COLUMN metadata.date_issued
- IS 'metadata date of issue (dublin core)';},
- %{COMMENT ON COLUMN metadata.date_available
- IS 'metadata date available (dublin core)';},
- %{COMMENT ON COLUMN metadata.date_valid
- IS 'metadata date valid (dublin core)';},
- %{COMMENT ON COLUMN metadata.date_modified
- IS 'metadata date modified (dublin core)';},
- %{COMMENT ON COLUMN metadata.type
- IS 'metadata type (dublin core element 8)';},
- %{COMMENT ON COLUMN metadata.description
- IS 'metadata description (dublin core element 4)';},
- %{COMMENT ON COLUMN metadata.publisher
- IS 'metadata publisher (dublin core element 5)';},
- %{COMMENT ON COLUMN metadata.contributor
- IS 'metadata contributor (dublin core element 6)';},
- %{COMMENT ON COLUMN metadata.prepared_by
- IS 'metadata markup prepared by';},
- %{COMMENT ON COLUMN metadata.digitized_by
- IS 'metadata digitized by';},
- %{COMMENT ON COLUMN metadata.format
- IS 'metadata format (dublin core element 9)';},
- %{COMMENT ON COLUMN metadata.identifier
- IS 'metadata identifier (dublin core element 10)';},
- %{COMMENT ON COLUMN metadata.source
- IS 'metadata source (dublin core element 11)';},
- %{COMMENT ON COLUMN metadata.language
- IS 'metadata language (dublin core element 12)';},
- %{COMMENT ON COLUMN metadata.language_original
- IS 'metadata original language';},
- %{COMMENT ON COLUMN metadata.relation
- IS 'metadata (dublin core element 13)';},
- %{COMMENT ON COLUMN metadata.coverage
- IS 'metadata coverage (dublin core element 14)';},
- %{COMMENT ON COLUMN metadata.rights
- IS 'metadata rights / copyright / license (dublin core element 15)';},
- %{COMMENT ON COLUMN metadata.owner
- IS 'metadata owner';},
- %{COMMENT ON COLUMN metadata.keywords
- IS 'metadata keywords';},
- %{COMMENT ON COLUMN metadata.comment
- IS 'metadata comment';},
- %{COMMENT ON COLUMN metadata.abstract
- IS 'metadata abstract';},
- %{COMMENT ON COLUMN metadata.loc
- IS 'metadata library of congress';},
- %{COMMENT ON COLUMN metadata.dewey
- IS 'metadata dewey';},
- %{COMMENT ON COLUMN metadata.isbn
- IS 'metadata isbn';},
- %{COMMENT ON COLUMN metadata.pg
- IS 'metadata project gutenberg number';},
- %{COMMENT ON COLUMN metadata.prefix_a
- IS 'metadata prefix';},
- %{COMMENT ON COLUMN metadata.prefix_b
- IS 'metadata prefix';},
- %{COMMENT ON COLUMN metadata.skin
- IS 'metadata sisu skin';},
- %{COMMENT ON COLUMN metadata.markup
- IS 'metadata markup source';},
- %{COMMENT ON COLUMN metadata.links
- IS 'metadata links';},
- %{COMMENT ON COLUMN metadata.information
- IS 'metadata information';},
- %{COMMENT ON COLUMN metadata.contact
- IS 'metadata contact';},
- %{COMMENT ON COLUMN metadata.suffix
- IS 'metadata sisu suffix (output related)';},
- %{COMMENT ON COLUMN metadata.filename
- IS 'metadata source filename';},
- %{COMMENT ON COLUMN metadata.types
- IS 'document types scroll 1, seg 2, both 3';},
- %{COMMENT ON COLUMN metadata.subj
- IS 'subject areas - no way to populate at present as not mapped';},
+ %{#{column.title.column_comment}},
+ %{#{column.title_main.column_comment}},
+ %{#{column.title_sub.column_comment}},
+ %{#{column.title_short.column_comment}},
+ %{#{column.title_edition.column_comment}},
+ %{#{column.title_note.column_comment}},
+ %{#{column.title_language.column_comment}},
+ %{#{column.title_language_char.column_comment}},
+ %{#{column.creator_author.column_comment}},
+ %{#{column.creator_author_honorific.column_comment}},
+ %{#{column.creator_author_nationality.column_comment}},
+ %{#{column.creator_contributor.column_comment}},
+ %{#{column.creator_illustrator.column_comment}},
+ %{#{column.creator_photographer.column_comment}},
+ %{#{column.creator_translator.column_comment}},
+ %{#{column.creator_prepared_by.column_comment}},
+ %{#{column.creator_digitized_by.column_comment}},
+ %{#{column.creator_audio.column_comment}},
+ %{#{column.creator_video.column_comment}},
+ %{#{column.language_document.column_comment}},
+ %{#{column.language_document_char.column_comment}},
+ %{#{column.language_original.column_comment}},
+ %{#{column.language_original_char.column_comment}},
+ %{#{column.date_added_to_site.column_comment}},
+ %{#{column.date_available.column_comment}},
+ %{#{column.date_created.column_comment}},
+ %{#{column.date_issued.column_comment}},
+ %{#{column.date_modified.column_comment}},
+ %{#{column.date_published.column_comment}},
+ %{#{column.date_valid.column_comment}},
+ %{#{column.date_translated.column_comment}},
+ %{#{column.date_original_publication.column_comment}},
+ %{#{column.date_generated.column_comment}},
+ %{#{column.publisher.column_comment}},
+ %{#{column.original_publisher.column_comment}},
+ %{#{column.original_language.column_comment}},
+ %{#{column.original_language_char.column_comment}},
+ %{#{column.original_source.column_comment}},
+ %{#{column.original_institution.column_comment}},
+ %{#{column.original_nationality.column_comment}},
+ %{#{column.rights_all.column_comment}},
+ %{#{column.rights_copyright_text.column_comment}},
+ %{#{column.rights_copyright_translation.column_comment}},
+ %{#{column.rights_copyright_illustrations.column_comment}},
+ %{#{column.rights_copyright_photographs.column_comment}},
+ %{#{column.rights_copyright_preparation.column_comment}},
+ %{#{column.rights_copyright_digitization.column_comment}},
+ %{#{column.rights_copyright_audio.column_comment}},
+ %{#{column.rights_copyright_video.column_comment}},
+ %{#{column.rights_license.column_comment}},
+ %{#{column.classify_topic_register.column_comment}},
+ %{#{column.classify_subject.column_comment}},
+ %{#{column.classify_type.column_comment}},
+ %{#{column.classify_loc.column_comment}},
+ %{#{column.classify_dewey.column_comment}},
+ %{#{column.classify_pg.column_comment}},
+ %{#{column.classify_isbn.column_comment}},
+ %{#{column.classify_format.column_comment}},
+ %{#{column.classify_identifier.column_comment}},
+ %{#{column.classify_relation.column_comment}},
+ %{#{column.classify_coverage.column_comment}},
+ %{#{column.classify_keywords.column_comment}},
+ %{#{column.notes_abstract.column_comment}},
+ %{#{column.notes_comment.column_comment}},
+ %{#{column.notes_description.column_comment}},
+ %{#{column.notes_history.column_comment}},
+ %{#{column.notes_prefix.column_comment}},
+ %{#{column.notes_prefix_a.column_comment}},
+ %{#{column.notes_prefix_b.column_comment}},
+ %{#{column.notes_suffix.column_comment}},
+ %{#{column.filename.column_comment}},
+ %{#{column.sisutxt.column_comment}},
+ %{#{column.fulltext.column_comment}},
+ %{#{column.word_count.column_comment}},
+ %{#{column.digest.column_comment}},
+ %{#{column.skin_name.column_comment}},
+ %{#{column.skin.column_comment}},
+ %{#{column.links.column_comment}},
]
conn_execute_array(sql_arr)
end
- def documents
+ def doc_objects
sql_arr=[
- %{COMMENT ON Table documents
- IS 'contains searchable text of SiSU documents';},
- %{COMMENT ON COLUMN documents.lid
+ %{COMMENT ON Table doc_objects
+ IS 'contains searchable text of SiSU document objects';},
+ %{COMMENT ON COLUMN doc_objects.lid
IS 'unique';},
- %{COMMENT ON COLUMN documents.metadata_tid
- IS 'tie to title in metadata';},
- %{COMMENT ON COLUMN documents.lev_an
+ %{COMMENT ON COLUMN doc_objects.metadata_tid
+ IS 'tie to title in metadata_and_text';},
+ %{COMMENT ON COLUMN doc_objects.lev_an
IS 'doc level A-C 1-6';},
- %{COMMENT ON COLUMN documents.lev
+ %{COMMENT ON COLUMN doc_objects.lev
IS 'doc level 1-6 \d\~';},
- %{COMMENT ON COLUMN documents.seg
+ %{COMMENT ON COLUMN doc_objects.seg
IS 'segment name from level number 4 (lv 1)';},
- %{COMMENT ON COLUMN documents.ocn
+ %{COMMENT ON COLUMN doc_objects.ocn
IS 'object citation number';},
- %{COMMENT ON COLUMN documents.en_a
+ %{COMMENT ON COLUMN doc_objects.en_a
IS 'first endnote number in text object (eg. NULL or 34) (used with en_z to create range)';},
- %{COMMENT ON COLUMN documents.en_z
+ %{COMMENT ON COLUMN doc_objects.en_z
IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a to create range)';},
- %{COMMENT ON COLUMN documents.en_a_asterisk
+ %{COMMENT ON COLUMN doc_objects.en_a_asterisk
IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_asterisk to create range)';},
- %{COMMENT ON COLUMN documents.en_z_asterisk
+ %{COMMENT ON COLUMN doc_objects.en_z_asterisk
IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_asterisk to create range)';},
- %{COMMENT ON COLUMN documents.en_a_plus
+ %{COMMENT ON COLUMN doc_objects.en_a_plus
IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_plus to create range)';},
- %{COMMENT ON COLUMN documents.en_z_plus
+ %{COMMENT ON COLUMN doc_objects.en_z_plus
IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_plus to create range)';},
- %{COMMENT ON COLUMN documents.types
+ %{COMMENT ON COLUMN doc_objects.types
IS 'document types seg scroll';},
- %{COMMENT ON COLUMN documents.clean
+ %{COMMENT ON COLUMN doc_objects.clean
IS 'text object - substantive text: clean, stripped of markup';},
- %{COMMENT ON COLUMN documents.body
+ %{COMMENT ON COLUMN doc_objects.body
IS 'text object - substantive text: light html markup';},
- %{COMMENT ON COLUMN documents.lev1
+ %{COMMENT ON COLUMN doc_objects.lev1
IS 'document structure, level number 1';},
- %{COMMENT ON COLUMN documents.lev2
+ %{COMMENT ON COLUMN doc_objects.lev2
IS 'document structure, level number 2';},
- %{COMMENT ON COLUMN documents.lev3
+ %{COMMENT ON COLUMN doc_objects.lev3
IS 'document structure, level number 3';},
- %{COMMENT ON COLUMN documents.lev4
+ %{COMMENT ON COLUMN doc_objects.lev4
IS 'document structure, level number 4';},
- %{COMMENT ON COLUMN documents.lev5
+ %{COMMENT ON COLUMN doc_objects.lev5
IS 'document structure, level number 5';},
- %{COMMENT ON COLUMN documents.lev6
+ %{COMMENT ON COLUMN doc_objects.lev6
IS 'document structure, level number 6';},
- %{COMMENT ON COLUMN documents.t_of
+ %{COMMENT ON COLUMN doc_objects.t_of
IS 'document structure, type of object (object is of)';},
- %{COMMENT ON COLUMN documents.t_is
+ %{COMMENT ON COLUMN doc_objects.t_is
IS 'document structure, object is';},
- %{COMMENT ON COLUMN documents.node
+ %{COMMENT ON COLUMN doc_objects.node
IS 'document structure, object node if heading';},
- %{COMMENT ON COLUMN documents.parent
+ %{COMMENT ON COLUMN doc_objects.parent
IS 'document structure, object parent (is a heading)';}
]
conn_execute_array(sql_arr)
@@ -501,15 +512,15 @@ module SiSU_DB_create
IS 'endnote substantive content';},
%{COMMENT ON COLUMN endnotes.ocn
IS 'object citation no# <\~(\d+)> from which endnote is referenced';},
- %{COMMENT ON COLUMN documents.metadata_tid
- IS 'tie to title in metadata - unique for each document';}
+ %{COMMENT ON COLUMN doc_objects.metadata_tid
+ IS 'tie to title in metadata_and_text - unique for each document';}
]
conn_execute_array(sql_arr)
end
def endnotes_asterisk
sql_arr=[
%{COMMENT ON Table endnotes_asterisk
- IS 'contains searchable text of SiSU documents endnotes asterisk';},
+ IS 'contains searchable text of SiSU documents endnotes marked with asterisk';},
%{COMMENT ON COLUMN endnotes_asterisk.nid
IS 'unique';},
%{COMMENT ON COLUMN endnotes_asterisk.document_lid
@@ -522,15 +533,15 @@ module SiSU_DB_create
IS 'endnote substantive content';},
%{COMMENT ON COLUMN endnotes_asterisk.ocn
IS 'object citation no# <\~(\d+)> from which endnote is referenced';},
- %{COMMENT ON COLUMN documents.metadata_tid
- IS 'tie to title in metadata - unique for each document';}
+ %{COMMENT ON COLUMN doc_objects.metadata_tid
+ IS 'tie to title in metadata_and_text - unique for each document';}
]
conn_execute_array(sql_arr)
end
def endnotes_plus
sql_arr=[
%{COMMENT ON Table endnotes_plus
- IS 'contains searchable text of SiSU documents endnotes';},
+ IS 'contains searchable text of SiSU documents endnotes marked with plus';},
%{COMMENT ON COLUMN endnotes_plus.nid
IS 'unique';},
%{COMMENT ON COLUMN endnotes_plus.document_lid
@@ -543,8 +554,8 @@ module SiSU_DB_create
IS 'endnote substantive content';},
%{COMMENT ON COLUMN endnotes_plus.ocn
IS 'object citation no# <\~(\d+)> from which endnote is referenced';},
- %{COMMENT ON COLUMN documents.metadata_tid
- IS 'tie to title in metadata - unique for each document';},
+ %{COMMENT ON COLUMN doc_objects.metadata_tid
+ IS 'tie to title in metadata_and_text - unique for each document';},
]
conn_execute_array(sql_arr)
end
@@ -552,8 +563,8 @@ module SiSU_DB_create
sql_arr=[
%{COMMENT ON Table urls
IS 'contains base url links to different SiSU output';},
- %{COMMENT ON COLUMN documents.metadata_tid
- IS 'tie to title in metadata - unique for each document, the mapping of rows is one to one';},
+ %{COMMENT ON COLUMN doc_objects.metadata_tid
+ IS 'tie to title in metadata_and_text - unique for each document, the mapping of rows is one to one';},
%{COMMENT ON COLUMN urls.plaintext
IS 'plaintext utf-8';},
%{COMMENT ON COLUMN urls.html_toc
diff --git a/lib/sisu/v2/db_drop.rb b/lib/sisu/v2/db_drop.rb
index 673c5f8f..7189da56 100644
--- a/lib/sisu/v2/db_drop.rb
+++ b/lib/sisu/v2/db_drop.rb
@@ -70,8 +70,8 @@ module SiSU_DB_drop
cascade='CASCADE'
end
@drop_table=[
- "DROP TABLE metadata #{cascade};",
- "DROP TABLE documents #{cascade};",
+ "DROP TABLE metadata_and_text #{cascade};",
+ "DROP TABLE doc_objects #{cascade};",
"DROP TABLE urls #{cascade};",
"DROP TABLE endnotes #{cascade};",
"DROP TABLE endnotes_asterisk #{cascade};",
@@ -84,15 +84,35 @@ module SiSU_DB_drop
msg_sqlite="as not all disk space is recovered after dropping the database << #{@db_info.sqlite.db} >>, you may be better off deleting the file, and recreating it as necessary"
case @sql_type
when /sqlite/
- @conn.transaction
- @drop_table.each do |d|
- @conn.execute(d)
- end
- @conn.commit
puts msg_sqlite
ans=@ans.response?('remove sql database?')
- if ans and File.exist?(@db_info.sqlite.db)
+ if ans \
+ and File.exist?(@db_info.sqlite.db)
+ @conn.close
File.unlink(@db_info.sqlite.db)
+ db=SiSU_Env::Info_db.new
+ conn=db.sqlite.conn_sqlite3
+ sdb=SiSU_DB_DBI::Create.new(@opt,conn,@db_info,@sql_type)
+ sdb_index=SiSU_DB_DBI::Index.new(@opt,conn,@db_info,@sql_type)
+ sdb.output_dir?
+ begin
+ sdb.create_db
+ sdb.create_table.metadata_and_text
+ sdb.create_table.doc_objects
+ sdb.create_table.endnotes
+ sdb.create_table.endnotes_asterisk
+ sdb.create_table.endnotes_plus
+ sdb.create_table.urls
+ sdb_index.create_indexes
+ rescue; SiSU_Errors::Info_error.new($!,$@,'-D').error; @sdb.output_dir?
+ end
+ exit
+ else
+ @conn.transaction
+ @drop_table.each do |d|
+ @conn.execute(d)
+ end
+ @conn.commit
end
else
@drop_table.each do |d|
@@ -115,33 +135,33 @@ module SiSU_DB_drop
end
def indexes #% drop all indexes
#@conn.do(%{
- # DROP INDEX object_nr ON documents(ocn);
- # DROP INDEX body ON documents(body);
- # DROP INDEX clean ON documents(clean);
- # DROP INDEX lev1 ON documents(lev1);
- # DROP INDEX lev2 ON documents(lev2);
- # DROP INDEX lev3 ON documents(lev3);
- # DROP INDEX lev4 ON documents(lev4);
- # DROP INDEX lev5 ON documents(lev5);
- # DROP INDEX lev6 ON documents(lev6);
+ # DROP INDEX object_nr ON doc_objects(ocn);
+ # DROP INDEX body ON doc_objects(body);
+ # DROP INDEX clean ON doc_objects(clean);
+ # DROP INDEX lev1 ON doc_objects(lev1);
+ # DROP INDEX lev2 ON doc_objects(lev2);
+ # DROP INDEX lev3 ON doc_objects(lev3);
+ # DROP INDEX lev4 ON doc_objects(lev4);
+ # DROP INDEX lev5 ON doc_objects(lev5);
+ # DROP INDEX lev6 ON doc_objects(lev6);
# DROP INDEX endnote_nr ON endnotes(nr);
# DROP INDEX endnote ON endnotes(body);
- # DROP INDEX title ON metadata(title);
- # DROP INDEX filename ON metadata(filename)
+ # DROP INDEX title ON metadata_and_text(title);
+ # DROP INDEX filename ON metadata_and_text(filename)
# /*
- # DROP INDEX object_nr ON documents(ocn) CASCADE;
- # DROP INDEX body ON documents(body) CASCADE;
- # DROP INDEX clean ON documents(clean) CASCADE;
- # DROP INDEX lev1 ON documents(lev1) CASCADE;
- # DROP INDEX lev2 ON documents(lev2) CASCADE;
- # DROP INDEX lev3 ON documents(lev3) CASCADE;
- # DROP INDEX lev4 ON documents(lev4) CASCADE;
- # DROP INDEX lev5 ON documents(lev5) CASCADE;
- # DROP INDEX lev6 ON documents(lev6) CASCADE;
+ # DROP INDEX object_nr ON doc_objects(ocn) CASCADE;
+ # DROP INDEX body ON doc_objects(body) CASCADE;
+ # DROP INDEX clean ON doc_objects(clean) CASCADE;
+ # DROP INDEX lev1 ON doc_objects(lev1) CASCADE;
+ # DROP INDEX lev2 ON doc_objects(lev2) CASCADE;
+ # DROP INDEX lev3 ON doc_objects(lev3) CASCADE;
+ # DROP INDEX lev4 ON doc_objects(lev4) CASCADE;
+ # DROP INDEX lev5 ON doc_objects(lev5) CASCADE;
+ # DROP INDEX lev6 ON doc_objects(lev6) CASCADE;
# DROP INDEX endnote_nr ON endnotes(nr) CASCADE;
# DROP INDEX endnote ON endnotes(body) CASCADE;
- # DROP INDEX title ON metadata(title) CASCADE;
- # DROP INDEX filename ON metadata(filename) CASCADE
+ # DROP INDEX title ON metadata_and_text(title) CASCADE;
+ # DROP INDEX filename ON metadata_and_text(filename) CASCADE
# */
#})
end
diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb
index 1f795e68..5610a1d0 100644
--- a/lib/sisu/v2/db_import.rb
+++ b/lib/sisu/v2/db_import.rb
@@ -60,9 +60,10 @@
module SiSU_DB_import
require "#{SiSU_lib}/db_columns" # db_columns.rb
require "#{SiSU_lib}/db_load_tuple" # db_load_tuple.rb
+ require "#{SiSU_lib}/db_sqltxt" # db_sqltxt.rb
require "#{SiSU_lib}/shared_html_lite" # shared_html_lite.rb
require 'sqlite3'
- class Import < SiSU_DB_columns::Column_size
+ class Import < SiSU_DB_text::Prepare
include SiSU_Param
include SiSU_Screen
@@dl=nil
@@ -86,7 +87,7 @@ module SiSU_DB_import
@counter={}
@db=SiSU_Env::Info_db.new
@driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) ? true : false
- sql='SELECT MAX(lid) FROM documents'
+ sql='SELECT MAX(lid) FROM doc_objects'
begin
@col[:lid] ||=0
@col[:lid]=if @driver_sqlite3
@@ -122,7 +123,7 @@ module SiSU_DB_import
tell.print_grey if @opt.cmd =~/v/
file_exist=if @sql_type=~/sqlite/; nil
else
- @conn.select_one(%{ SELECT metadata.tid FROM metadata WHERE metadata.filename ~ '#{@opt.fns}'; })
+ @conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; })
end
if (@sql_type!~/sqlite/ and not file_exist) \
or @sql_type=~/sqlite/
@@ -192,28 +193,6 @@ module SiSU_DB_import
end
end
end
- def special_character_escape(str)
- str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
- str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"<br />\n")
- str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
- str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2')
- str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2')
- str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1')
- str
- end
- def strip_markup(str) #define rules, make same as in dal clean
- str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]')
- str.gsub!(/(?:&nbsp\\;|#{Mx[:nbsp]})+/,' ')
- str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1') #tables
- str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ') #tables
- str.gsub!(/#{Mx[:tc_p]}/u,' ') #tables tidy later
- str.gsub!(/<.+?>/,'')
- str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] ') # else image names found in search
- str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]') # else image names found in search
- str.gsub!(/\s\s+/,' ')
- str.strip!
- str
- end
def pf_db_import_transaction_open
end
def pf_db_import_transaction_close
@@ -222,12 +201,23 @@ module SiSU_DB_import
print %{ #{@cX.grey}import documents dbi_unit #{@cX.off} } unless @opt.cmd =~/q/
@tp={}
@md=SiSU_Param::Parameters.new(@opt).get
+#% sisutxt & fulltxt
+ if FileTest.exist?(@md.fns)
+ txt_arr=IO.readlines(@md.fns,'')
+ src=txt_arr.join("\n")
+ src=special_character_escape(src)
+ @tp[:sisutxt_f],@tp[:sisutxt_i]='sisutxt, ',"'#{src}', "
+ txt=clean_searchable_text(txt_arr)
+ #special_character_escape(txt)
+ @tp[:fulltxt_f],@tp[:fulltxt_i]='fulltxt, ',"'#{txt}', "
+ end
+#% title
if defined? @md.title.full \
and @md.title.full=~/\S+/ # DublinCore 1 - title
- @tp[:title]=@md.title.full
- special_character_escape(@tp[:title])
- @tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', "
- sql='SELECT MAX(tid) FROM metadata'
+ #@tp[:title]=@md.title.full
+ #special_character_escape(@tp[:title])
+ #@tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', "
+ sql='SELECT MAX(tid) FROM metadata_and_text'
begin
@@id_t ||=0
id_t=if @driver_sqlite3
@@ -242,220 +232,9 @@ module SiSU_DB_import
@@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks @title:
puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}} unless @opt.cmd =~/q/
end
- if defined? @md.creator.author \
- and @md.creator.author=~/\S+/ # DublinCore 2 - creator/author (author)
- txt=@md.creator.author #dc
- special_character_escape(txt)
- @tp[:creator_f],@tp[:creator_i]='creator, ',"'#{txt}', "
- end
- if defined? @md.creator.contributor \
- and @md.creator.contributor=~/\S+/ # DublinCore 6 - contributor
- txt=@md.creator.contributor #dc
- special_character_escape(txt)
- @tp[:contributor_f],@tp[:contributor_i]='contributor, ',"'#{txt}', "
- end
- if defined? @md.creator.translator \
- and @md.creator.translator=~/\S+/
- txt=@md.creator.translator
- special_character_escape(txt)
- @tp[:translator_f],@tp[:translator_i]='translator, ',"'#{txt}', "
- end
- if defined? @md.creator.illustrator \
- and @md.creator.illustrator=~/\S+/
- txt=@md.creator.illustrator
- special_character_escape(txt)
- @tp[:illustrator_f],@tp[:illustrator_i]='illustrator, ',"'#{txt}', "
- end
- if defined? @md.publisher \
- and @md.publisher
- txt=@md.publisher #dc
- special_character_escape(txt)
- @tp[:publisher_f],@tp[:publisher_i]='publisher, ',"'#{txt}', "
- end
- if defined? @md.creator.prepared_by \
- and @md.creator.prepared_by=~/\S+/
- txt=@md.creator.prepared_by
- special_character_escape(txt)
- @tp[:prepared_by_f],@tp[:prepared_by_i]='prepared_by, ',"'#{txt}', "
- end
- if defined? @md.creator.digitized_by \
- and @md.creator.digitized_by=~/\S+/
- txt=@md.creator.digitized_by
- special_character_escape(txt)
- @tp[:digitized_by_f],@tp[:digitized_by_i]='digitized_by, ',"'#{txt}', "
- end
- if defined? @md.classify.subject \
- and @md.classify.subject=~/\S+/ # DublinCore 3 - subject (us library of congress, eric or udc, or schema???)
- txt=@md.classify.subject #dc
- special_character_escape(txt)
- @tp[:subject_f],@tp[:subject_i]='subject, ',"'#{txt}', "
- end
- if defined? @md.notes.description \
- and @md.notes.description=~/\S+/ # DublinCore 4 - description
- txt=@md.notes.description #dc
- special_character_escape(txt)
- @tp[:description_f],@tp[:description_i]='description, ',"'#{txt}', "
- end
- if defined? @md.classify.subject \
- and @md.classify.subject=~/\S+/ # DublinCore 8 - type (genre eg. report, convention etc)
- txt=@md.classify.abstract
- special_character_escape(txt)
- @tp[:abstract_f],@tp[:abstract_i]='abstract, ',"'#{txt}', "
- end
- if defined? @md.rights.all \
- and @md.rights.all=~/\S+/ # DublinCore 15 - rights
- txt=@md.rights.all #dc
- special_character_escape(txt)
- @tp[:rights_f],@tp[:rights_i]='rights, ',"'#{txt}', "
- end
- if defined? @md.date.published \
- and @md.date.published=~/\S+/ # DublinCore 7 - date year-mm-dd
- txt=@md.date.published #dc
- special_character_escape(txt)
- @tp[:date_f],@tp[:date_i]='date, ',"'#{txt}', "
- end
- if defined? @md.date.created \
- and @md.date.created=~/\S+/
- txt=@md.date.created #dc
- special_character_escape(txt)
- @tp[:date_created_f],@tp[:date_created_i]='date_created, ',"'#{txt}', "
- end
- if defined? @md.date.issued \
- and @md.date.issued=~/\S+/
- txt=@md.date.issued #dc
- special_character_escape(txt)
- @tp[:date_issued_f],@tp[:date_issued_i]='date_issued, ',"'#{txt}', "
- end
- if defined? @md.date.available \
- and @md.date.available=~/\S+/
- txt=@md.date.available #dc
- special_character_escape(txt)
- @tp[:date_available_f],@tp[:date_available_i]='date_available, ',"'#{txt}', "
- end
- if defined? @md.date.modified \
- and @md.date.modified=~/\S+/
- txt=@md.date.modified #dc
- special_character_escape(txt)
- @tp[:date_modified_f],@tp[:date_modified_i]='date_modified, ',"'#{txt}', "
- end
- if defined? @md.date.valid \
- and @md.date.valid=~/\S+/
- txt=@md.date.valid #dc
- special_character_escape(txt)
- @tp[:date_valid_f],@tp[:date_valid_i]='date_valid, ',"'#{txt}', "
- end
- if defined? @md.title.language \
- and @md.title.language=~/\S+/
- txt=@md.title.language
- special_character_escape(txt)
- @tp[:language_f],@tp[:language_i]='language, ',"'#{txt}', "
- end
- if defined? @md.original.language \
- and @md.original.language=~/\S+/
- txt=@md.original.language
- special_character_escape(txt)
- @tp[:language_original_f],@tp[:language_original_i]='language_original, ',"'#{txt}', "
- end
- if defined? @md.classify.format \
- and @md.classify.format=~/\S+/ # DublinCore 9 - format (use your mime type)
- txt=@md.classify.format #dc
- special_character_escape(txt)
- @tp[:format_f],@tp[:format_i]='format, ',"'#{txt}', "
- end
- if defined? @md.classify.identifier \
- and @md.classify.identifier=~/\S+/ # DublinCore 10 - identifier (your identifier, could use urn which is free)
- txt=@md.classify.identifier #dc
- special_character_escape(txt)
- @tp[:identifier_f],@tp[:identifier_i]='identifier, ',"'#{txt}', "
- end
- if defined? @md.original.source \
- and @md.original.source=~/\S+/ # DublinCore 11 - source (document source)
- txt=@md.original.source #dc
- special_character_escape(txt)
- @tp[:source_f],@tp[:source_i]='source, ',"'#{txt}', "
- end
- if defined? @md.classify.relation \
- and @md.classify.relation=~/\S+/ # DublinCore 13 - relation
- txt=@md.classify.relation #dc
- special_character_escape(txt)
- @tp[:relation_f],@tp[:relation_i]='relation, ',"'#{txt}', "
- end
- if defined? @md.classify.coverage \
- and @md.classify.coverage=~/\S+/ # DublinCore 14 - coverage
- txt=@md.classify.coverage #dc
- special_character_escape(txt)
- @tp[:coverage_f],@tp[:coverage_i]='coverage, ',"'#{txt}', "
- end
- if defined? @md.classify.keywords \
- and @md.classify.keywords=~/\S+/
- txt=@md.classify.keywords
- special_character_escape(txt)
- @tp[:keywords_f],@tp[:keywords_i]='keywords, ',"'#{txt}', "
- end
- if defined? @md.notes.comment \
- and @md.notes.comment=~/\S+/
- txt=@md.notes.comments
- special_character_escape(txt)
- @tp[:comments_f],@tp[:comments_i]='comments, ',"'#{txt}', "
- end
- if defined? @md.classify.loc \
- and @md.classify.loc=~/\S+/
- txt=@md.classify.loc
- special_character_escape(txt)
- @tp[:cls_loc_f],@tp[:cls_loc_i]='cls_loc, ',"'#{txt}', "
- end
- if defined? @md.classify.dewey \
- and @md.classify.dewey=~/\S+/
- txt=@md.classify.dewey
- special_character_escape(txt)
- @tp[:cls_dewey_f],@tp[:cls_dewey_i]='cls_dewey, ',"'#{txt}', "
- end
- if defined? @md.classify.pg \
- and @md.classify.pg=~/\S+/
- txt=@md.classify.pg
- special_character_escape(txt)
- @tp[:cls_pg_f],@tp[:cls_pg_i]='cls_pg, ',"'#{txt}', "
- end
- if defined? @md.classify.isbn \
- and @md.classify.isbn=~/\S+/
- txt=@md.classify.isbn
- special_character_escape(txt)
- @tp[:cls_isbn_f],@tp[:cls_isbn_i]='cls_isbn, ',"'#{txt}', "
- end
- if defined? @md.notes.prefix_a \
- and @md.notes.prefix_a=~/\S+/
- txt=@md.notes.prefix_a
- special_character_escape(txt)
- @tp[:prefix_a_f],@tp[:prefix_a_i]='prefix_a, ',"'#{txt}', "
- end
- if defined? @md.notes.prefix_b \
- and @md.notes.prefix_b=~/\S+/
- txt=@md.notes.prefix_b
- special_character_escape(txt)
- @tp[:prefix_b_f],@tp[:prefix_b_i]='prefix_b, ',"'#{txt}', "
- end
- if defined? @md.fns \
- and @md.fns=~/\S+/
- txt=@md.fns
- special_character_escape(txt)
- @tp[:fns_f],@tp[:fns_i]="filename, ","'#{txt}', "
- end
- if @md.wc_words; txt=@md.wc_words
- @tp[:wc_words_f],@tp[:wc_words_i]='wc_words, ',"'#{txt}', "
- end
- if defined? @md.dgst \
- and @md.dgst.class==Array
- txt=@md.dgst[1]
- @tp[:dgst_f],@tp[:dgst_i]='dgst, ',"'#{txt}', "
- end
- if @md.sc_date; txt=@md.sc_date
- @tp[:sc_date_f],@tp[:sc_date_i]='sc_date, ',"'#{txt}', "
- end
- if @md.generated; txt=@md.generated
- @tp[:generated_f],@tp[:generated_i]='generated, ',"'#{@txt}', "
- end
+ ################ CLEAR ##############
SiSU_DB_DBI::Test.new(self,@opt).verify #% import title names, filenames (tuple)
- t=SiSU_DB_tuple::Load_metadata.new(@conn,@tp,@@id_t,@opt,@file)
+ t=SiSU_DB_tuple::Load_metadata.new(@conn,@@id_t,@md,@file)
tuple=t.tuple
tuple
end
@@ -482,13 +261,7 @@ module SiSU_DB_import
and data.ln.inspect=~/[123]/
@col[:lev],txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.ln,data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'',''
@col[:lid]+=1
- if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/
- endnotes(txt).range
- @en << endnotes(txt).standard if txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/
- @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/
- @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_o]}/
- txt=endnotes(txt).clean_text
- end
+ txt=endnotes(txt).extract_any
@col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_minus
special_character_escape(@col[:body])
@col[:plaintext]=@col[:body].dup
@@ -521,13 +294,7 @@ module SiSU_DB_import
end
@env=SiSU_Env::Info_env.new(@md.fns)
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
- if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/
- endnotes(txt).range
- @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/
- @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/
- @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/
- txt=endnotes(txt).clean_text(@base_url)
- end
+ txt=endnotes(txt).extract_any
@col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus
special_character_escape(@col[:body])
@col[:plaintext]=@col[:body].dup
@@ -553,13 +320,7 @@ module SiSU_DB_import
end
@env=SiSU_Env::Info_env.new(@md.fns)
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
- if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/
- endnotes(txt).range
- @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/
- @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/
- @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/
- txt=endnotes(txt).clean_text(@base_url)
- end
+ txt=endnotes(txt).extract_any
@col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus
special_character_escape(@col[:body])
@col[:plaintext]=@col[:body].dup
@@ -585,13 +346,7 @@ module SiSU_DB_import
end
@env=SiSU_Env::Info_env.new(@md.fns)
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
- if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/
- endnotes(txt).range
- @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/
- @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/
- @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/
- txt=endnotes(txt).clean_text(@base_url)
- end
+ txt=endnotes(txt).extract_any
@col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus
special_character_escape(@col[:body])
@col[:plaintext]=@col[:body].dup
@@ -613,15 +368,9 @@ module SiSU_DB_import
end
@env=SiSU_Env::Info_env.new(@md.fns)
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
- if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/
- endnotes(txt).range
- @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/
- @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/
- @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/
- txt=endnotes(txt).clean_text(@base_url)
- end
+ txt=endnotes(txt).extract_any
if @sql_type=~/pg/ \
- and txt.size > (document_clean - 1) #% examine pg build & remove limitation
+ and txt.size > (SiSU_DB_columns::Column_size.new.document_clean - 1) #% examine pg build & remove limitation
puts "\n\nTOO LARGE (TXT - see error log)\n\n"
open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
error.puts("\n#{@opt.fns}\nTEXT BODY\n#{@col[:body].size} object #{@col[:ocn]} -> #{@col[:body].slice(0..500)}")
@@ -660,7 +409,7 @@ module SiSU_DB_import
#special_character_escape(body)
#special_character_escape(txt)
strip_markup(txt)
- if txt.size > (endnote_clean - 1)
+ if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)
puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
@@ -698,7 +447,7 @@ module SiSU_DB_import
special_character_escape(txt)
body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)
strip_markup(txt)
- if txt.size > (endnote_clean - 1)
+ if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)
puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
@@ -736,7 +485,7 @@ module SiSU_DB_import
special_character_escape(txt)
body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)
strip_markup(txt)
- if txt.size > (endnote_clean - 1)
+ if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)
puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
@@ -772,6 +521,16 @@ module SiSU_DB_import
end
def endnotes(txt)
@txt=txt
+ def extract_any
+ if @txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/
+ endnotes(@txt).range
+ @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/
+ @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/
+ @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_o]}/
+ @txt=endnotes(@txt).clean_text
+ end
+ @txt
+ end
def standard
x=if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/; @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/)
else nil
diff --git a/lib/sisu/v2/db_indexes.rb b/lib/sisu/v2/db_indexes.rb
index a64fb362..3cbcc20c 100644
--- a/lib/sisu/v2/db_indexes.rb
+++ b/lib/sisu/v2/db_indexes.rb
@@ -73,15 +73,15 @@ module SiSU_DB_index
def base
print "\n create documents common indexes\n" unless @opt.cmd =~/q/
sql_arr=[
- %{CREATE INDEX object_nr ON documents(ocn);},
- %{CREATE INDEX digest_clean ON documents(digest_clean);},
- %{CREATE INDEX digest_all ON documents(digest_all);},
- %{CREATE INDEX lev1 ON documents(lev1);},
- %{CREATE INDEX lev2 ON documents(lev2);},
- %{CREATE INDEX lev3 ON documents(lev3);},
- %{CREATE INDEX lev4 ON documents(lev4);},
- %{CREATE INDEX lev5 ON documents(lev5);},
- %{CREATE INDEX lev6 ON documents(lev6);},
+ %{CREATE INDEX object_nr ON doc_objects(ocn);},
+ %{CREATE INDEX digest_clean ON doc_objects(digest_clean);},
+ %{CREATE INDEX digest_all ON doc_objects(digest_all);},
+ %{CREATE INDEX lev1 ON doc_objects(lev1);},
+ %{CREATE INDEX lev2 ON doc_objects(lev2);},
+ %{CREATE INDEX lev3 ON doc_objects(lev3);},
+ %{CREATE INDEX lev4 ON doc_objects(lev4);},
+ %{CREATE INDEX lev5 ON doc_objects(lev5);},
+ %{CREATE INDEX lev6 ON doc_objects(lev6);},
%{CREATE INDEX endnote_nr ON endnotes(nr);},
%{CREATE INDEX digest_en ON endnotes(digest_clean);},
%{CREATE INDEX endnote_nr_asterisk ON endnotes_asterisk(nr);},
@@ -90,15 +90,15 @@ module SiSU_DB_index
%{CREATE INDEX endnote_nr_plus ON endnotes_plus(nr);},
%{CREATE INDEX endnote_plus ON endnotes_plus(clean);},
%{CREATE INDEX digest_en_plus ON endnotes_plus(digest_clean);},
- %{CREATE INDEX title ON metadata(title);},
- %{CREATE INDEX filename ON metadata(filename)},
+ %{CREATE INDEX title ON metadata_and_text(title);},
+ %{CREATE INDEX filename ON metadata_and_text(filename)},
]
conn_execute_array(sql_arr)
end
def text
print "\n create documents text indexes\n" unless @opt.cmd =~/q/
sql_arr=[
- %{CREATE INDEX clean ON documents(clean);},
+ %{CREATE INDEX clean ON doc_objects(clean);},
%{CREATE INDEX endnote ON endnotes(clean);}
]
conn_execute_array(sql_arr)
diff --git a/lib/sisu/v2/db_load_tuple.rb b/lib/sisu/v2/db_load_tuple.rb
index 2fc3a455..cc00b74a 100644
--- a/lib/sisu/v2/db_load_tuple.rb
+++ b/lib/sisu/v2/db_load_tuple.rb
@@ -58,6 +58,7 @@
=end
module SiSU_DB_tuple
+ require "#{SiSU_lib}/db_columns" # db_columns.rb
class Load_documents
require "#{SiSU_lib}/param" # param.rb
include SiSU_Param
@@ -78,10 +79,10 @@ module SiSU_DB_tuple
end
def tuple #% import line
sql_entry=if @col[:en_a]
- "INSERT INTO documents (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " +
+ "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " +
"VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"
else
- "INSERT INTO documents (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " +
+ "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " +
"VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"
end
if @opt.cmd =~/M/
@@ -113,13 +114,174 @@ module SiSU_DB_tuple
sql_entry
end
end
- class Load_metadata
- def initialize(conn,tp,id,opt,file)
- @conn,@tp,@id,@opt,@file=conn,tp,id,opt,file
+ class Load_metadata #< SiSU_DB_columns::Columns
+ def initialize(conn,id,md,file)
+ @conn,@id,@opt,@file=conn,id,md,file
+ @tp=SiSU_DB_columns::Columns.new(md)
end
def tuple
- sql_entry="INSERT INTO metadata (#{@tp[:fns_f]} #{@tp[:suffix_f]} #{@tp[:title_f]} #{@tp[:subtitle_f]} #{@tp[:creator_f]} #{@tp[:illustrator_f]} #{@tp[:translator_f]} #{@tp[:subject_f]} #{@tp[:description_f]} #{@tp[:publisher_f]} #{@tp[:contributor_f]} #{@tp[:prepared_by_f]} #{@tp[:digitized_by_f]} #{@tp[:date_f]} #{@tp[:date_created_f]} #{@tp[:date_issued_f]} #{@tp[:date_valid_f]} #{@tp[:date_available_f]} #{@tp[:date_modified_f]} #{@tp[:type_f]} #{@tp[:format_f]} #{@tp[:identifier_f]} #{@tp[:source_f]} #{@tp[:language_f]} #{@tp[:language_original_f]} #{@tp[:relation_f]} #{@tp[:coverage_f]} #{@tp[:rights_f]} #{@tp[:copyright_f]} #{@tp[:owner_f]} #{@tp[:keywords_f]} #{@tp[:abstract_f]} #{@tp[:comment_f]} #{@tp[:loc_f]} #{@tp[:dewey_f]} #{@tp[:isbn_f]} #{@tp[:pg_f]} #{@tp[:prefix_a_f]} #{@tp[:prefix_b_f]} tid) " +
- "VALUES (#{@tp[:fns_i]} #{@tp[:suffix_i]} #{@tp[:title_i]} #{@tp[:subtitle_i]} #{@tp[:creator_i]} #{@tp[:illustrator_i]} #{@tp[:translator_i]} #{@tp[:subject_i]} #{@tp[:description_i]} #{@tp[:publisher_i]} #{@tp[:contributor_i]} #{@tp[:prepared_by_i]} #{@tp[:digitized_by_i]} #{@tp[:date_i]} #{@tp[:date_created_i]} #{@tp[:date_issued_i]} #{@tp[:date_valid_i]} #{@tp[:date_available_i]} #{@tp[:date_modified_i]} #{@tp[:type_i]} #{@tp[:format_i]} #{@tp[:identifier_i]} #{@tp[:source_i]} #{@tp[:language_i]} #{@tp[:language_original_i]} #{@tp[:relation_i]} #{@tp[:coverage_i]} #{@tp[:rights_i]} #{@tp[:copyright_i]} #{@tp[:owner_i]} #{@tp[:keywords_i]} #{@tp[:abstract_i]} #{@tp[:comment_i]} #{@tp[:loc_i]} #{@tp[:dewey_i]} #{@tp[:isbn_i]} #{@tp[:pg_i]} #{@tp[:prefix_a_i]} #{@tp[:prefix_b_i]} #{@id});"
+ sql_entry="INSERT INTO metadata_and_text (
+#{@tp.column.title.tuple[0]}
+#{@tp.column.title_main.tuple[0]}
+#{@tp.column.title_sub.tuple[0]}
+#{@tp.column.title_short.tuple[0]}
+#{@tp.column.title_edition.tuple[0]}
+#{@tp.column.title_note.tuple[0]}
+#{@tp.column.title_language.tuple[0]}
+#{@tp.column.title_language_char.tuple[0]}
+#{@tp.column.creator_author.tuple[0]}
+#{@tp.column.creator_author_honorific.tuple[0]}
+#{@tp.column.creator_author_nationality.tuple[0]}
+#{@tp.column.creator_contributor.tuple[0]}
+#{@tp.column.creator_illustrator.tuple[0]}
+#{@tp.column.creator_photographer.tuple[0]}
+#{@tp.column.creator_translator.tuple[0]}
+#{@tp.column.creator_prepared_by.tuple[0]}
+#{@tp.column.creator_digitized_by.tuple[0]}
+#{@tp.column.creator_audio.tuple[0]}
+#{@tp.column.creator_video.tuple[0]}
+#{@tp.column.language_document.tuple[0]}
+#{@tp.column.language_document_char.tuple[0]}
+#{@tp.column.language_original.tuple[0]}
+#{@tp.column.language_original_char.tuple[0]}
+#{@tp.column.date_added_to_site.tuple[0]}
+#{@tp.column.date_available.tuple[0]}
+#{@tp.column.date_created.tuple[0]}
+#{@tp.column.date_issued.tuple[0]}
+#{@tp.column.date_modified.tuple[0]}
+#{@tp.column.date_published.tuple[0]}
+#{@tp.column.date_valid.tuple[0]}
+#{@tp.column.date_translated.tuple[0]}
+#{@tp.column.date_original_publication.tuple[0]}
+#{@tp.column.date_generated.tuple[0]}
+#{@tp.column.publisher.tuple[0]}
+#{@tp.column.original_publisher.tuple[0]}
+#{@tp.column.original_language.tuple[0]}
+#{@tp.column.original_language_char.tuple[0]}
+#{@tp.column.original_source.tuple[0]}
+#{@tp.column.original_institution.tuple[0]}
+#{@tp.column.original_nationality.tuple[0]}
+#{@tp.column.rights_all.tuple[0]}
+#{@tp.column.rights_copyright_text.tuple[0]}
+#{@tp.column.rights_copyright_translation.tuple[0]}
+#{@tp.column.rights_copyright_illustrations.tuple[0]}
+#{@tp.column.rights_copyright_photographs.tuple[0]}
+#{@tp.column.rights_copyright_preparation.tuple[0]}
+#{@tp.column.rights_copyright_digitization.tuple[0]}
+#{@tp.column.rights_copyright_audio.tuple[0]}
+#{@tp.column.rights_copyright_video.tuple[0]}
+#{@tp.column.rights_license.tuple[0]}
+#{@tp.column.classify_topic_register.tuple[0]}
+#{@tp.column.classify_subject.tuple[0]}
+#{@tp.column.classify_type.tuple[0]}
+#{@tp.column.classify_loc.tuple[0]}
+#{@tp.column.classify_dewey.tuple[0]}
+#{@tp.column.classify_pg.tuple[0]}
+#{@tp.column.classify_isbn.tuple[0]}
+#{@tp.column.classify_format.tuple[0]}
+#{@tp.column.classify_identifier.tuple[0]}
+#{@tp.column.classify_relation.tuple[0]}
+#{@tp.column.classify_coverage.tuple[0]}
+#{@tp.column.classify_keywords.tuple[0]}
+#{@tp.column.notes_abstract.tuple[0]}
+#{@tp.column.notes_comment.tuple[0]}
+#{@tp.column.notes_description.tuple[0]}
+#{@tp.column.notes_history.tuple[0]}
+#{@tp.column.notes_prefix.tuple[0]}
+#{@tp.column.notes_prefix_a.tuple[0]}
+#{@tp.column.notes_prefix_b.tuple[0]}
+#{@tp.column.notes_suffix.tuple[0]}
+#{@tp.column.filename.tuple[0]}
+#{@tp.column.sisutxt.tuple[0]}
+#{@tp.column.fulltext.tuple[0]}
+#{@tp.column.word_count.tuple[0]}
+#{@tp.column.digest.tuple[0]}
+#{@tp.column.skin_name.tuple[0]}
+#{@tp.column.skin.tuple[0]}
+#{@tp.column.links.tuple[0]}
+tid)
+" +
+ "VALUES (
+#{@tp.column.title.tuple[1]}
+#{@tp.column.title_main.tuple[1]}
+#{@tp.column.title_sub.tuple[1]}
+#{@tp.column.title_short.tuple[1]}
+#{@tp.column.title_edition.tuple[1]}
+#{@tp.column.title_note.tuple[1]}
+#{@tp.column.title_language.tuple[1]}
+#{@tp.column.title_language_char.tuple[1]}
+#{@tp.column.creator_author.tuple[1]}
+#{@tp.column.creator_author_honorific.tuple[1]}
+#{@tp.column.creator_author_nationality.tuple[1]}
+#{@tp.column.creator_contributor.tuple[1]}
+#{@tp.column.creator_illustrator.tuple[1]}
+#{@tp.column.creator_photographer.tuple[1]}
+#{@tp.column.creator_translator.tuple[1]}
+#{@tp.column.creator_prepared_by.tuple[1]}
+#{@tp.column.creator_digitized_by.tuple[1]}
+#{@tp.column.creator_audio.tuple[1]}
+#{@tp.column.creator_video.tuple[1]}
+#{@tp.column.language_document.tuple[1]}
+#{@tp.column.language_document_char.tuple[1]}
+#{@tp.column.language_original.tuple[1]}
+#{@tp.column.language_original_char.tuple[1]}
+#{@tp.column.date_added_to_site.tuple[1]}
+#{@tp.column.date_available.tuple[1]}
+#{@tp.column.date_created.tuple[1]}
+#{@tp.column.date_issued.tuple[1]}
+#{@tp.column.date_modified.tuple[1]}
+#{@tp.column.date_published.tuple[1]}
+#{@tp.column.date_valid.tuple[1]}
+#{@tp.column.date_translated.tuple[1]}
+#{@tp.column.date_original_publication.tuple[1]}
+#{@tp.column.date_generated.tuple[1]}
+#{@tp.column.publisher.tuple[1]}
+#{@tp.column.original_publisher.tuple[1]}
+#{@tp.column.original_language.tuple[1]}
+#{@tp.column.original_language_char.tuple[1]}
+#{@tp.column.original_source.tuple[1]}
+#{@tp.column.original_institution.tuple[1]}
+#{@tp.column.original_nationality.tuple[1]}
+#{@tp.column.rights_all.tuple[1]}
+#{@tp.column.rights_copyright_text.tuple[1]}
+#{@tp.column.rights_copyright_translation.tuple[1]}
+#{@tp.column.rights_copyright_illustrations.tuple[1]}
+#{@tp.column.rights_copyright_photographs.tuple[1]}
+#{@tp.column.rights_copyright_preparation.tuple[1]}
+#{@tp.column.rights_copyright_digitization.tuple[1]}
+#{@tp.column.rights_copyright_audio.tuple[1]}
+#{@tp.column.rights_copyright_video.tuple[1]}
+#{@tp.column.rights_license.tuple[1]}
+#{@tp.column.classify_topic_register.tuple[1]}
+#{@tp.column.classify_subject.tuple[1]}
+#{@tp.column.classify_type.tuple[1]}
+#{@tp.column.classify_loc.tuple[1]}
+#{@tp.column.classify_dewey.tuple[1]}
+#{@tp.column.classify_pg.tuple[1]}
+#{@tp.column.classify_isbn.tuple[1]}
+#{@tp.column.classify_format.tuple[1]}
+#{@tp.column.classify_identifier.tuple[1]}
+#{@tp.column.classify_relation.tuple[1]}
+#{@tp.column.classify_coverage.tuple[1]}
+#{@tp.column.classify_keywords.tuple[1]}
+#{@tp.column.notes_abstract.tuple[1]}
+#{@tp.column.notes_comment.tuple[1]}
+#{@tp.column.notes_description.tuple[1]}
+#{@tp.column.notes_history.tuple[1]}
+#{@tp.column.notes_prefix.tuple[1]}
+#{@tp.column.notes_prefix_a.tuple[1]}
+#{@tp.column.notes_prefix_b.tuple[1]}
+#{@tp.column.notes_suffix.tuple[1]}
+#{@tp.column.filename.tuple[1]}
+#{@tp.column.sisutxt.tuple[1]}
+#{@tp.column.fulltext.tuple[1]}
+#{@tp.column.word_count.tuple[1]}
+#{@tp.column.digest.tuple[1]}
+#{@tp.column.skin_name.tuple[1]}
+#{@tp.column.skin.tuple[1]}
+#{@tp.column.links.tuple[1]}
+#{@id}
+);"
if @opt.cmd =~/M/
puts "maintenance mode on: creating sql transaction file (for last transaction set (document) only):\n\t#{@file.inspect}"
@file.puts sql_entry
diff --git a/lib/sisu/v2/db_remove.rb b/lib/sisu/v2/db_remove.rb
index 99640cdf..0a51b892 100644
--- a/lib/sisu/v2/db_remove.rb
+++ b/lib/sisu/v2/db_remove.rb
@@ -68,19 +68,19 @@ module SiSU_DB_remove
def remove
driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) ? true : false
del_id=if driver_sqlite3
- @conn.get_first_value(%{ SELECT tid FROM metadata WHERE filename LIKE '#{@opt.fns}'; }).to_i
+ @conn.get_first_value(%{ SELECT tid FROM metadata_and_text WHERE filename = '#{@opt.fns}'; }).to_i
else
- x=@conn.select_one(%{ SELECT tid FROM metadata WHERE filename LIKE '#{@opt.fns}'; })
- del=x ? (x.join.to_i) : nil
+ x=@conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; })
+ x ? (x.join.to_i) : nil
end
if del_id
sql_entry=[
"DELETE FROM endnotes WHERE metadata_tid = '#{del_id}';",
"DELETE FROM endnotes_asterisk WHERE metadata_tid = '#{del_id}';",
"DELETE FROM endnotes_plus WHERE metadata_tid = '#{del_id}';",
- "DELETE FROM documents WHERE metadata_tid = '#{del_id}';",
+ "DELETE FROM doc_objects WHERE metadata_tid = '#{del_id}';",
"DELETE FROM urls WHERE metadata_tid = '#{del_id}';",
- "DELETE FROM metadata WHERE tid = '#{del_id}';",
+ "DELETE FROM metadata_and_text WHERE metadata_and_text.tid = '#{del_id}';",
]
if driver_sqlite3
@conn.transaction
diff --git a/lib/sisu/v2/db_select.rb b/lib/sisu/v2/db_select.rb
index 33441b45..1ac9195f 100644
--- a/lib/sisu/v2/db_select.rb
+++ b/lib/sisu/v2/db_select.rb
@@ -69,6 +69,8 @@ module SiSU_DB_select
if @opt.mod.inspect =~/update|import/
@sdb_import=SiSU_DB_DBI::Import.new(@opt,@conn,@file,@sql_type)
@sdb_remove_doc=SiSU_DB_DBI::Remove.new(@opt,@conn,@file)
+ elsif @opt.mod.inspect =~/remove/
+ @sdb_remove_doc=SiSU_DB_DBI::Remove.new(@opt,@conn,@file)
end
end
def sql_maintenance_file
@@ -98,8 +100,8 @@ module SiSU_DB_select
when /^--(?:init(?:ialize)?|create(?:all)?)$/
@sdb.output_dir?
begin
- @sdb.create_table.metadata
- @sdb.create_table.documents
+ @sdb.create_table.metadata_and_text
+ @sdb.create_table.doc_objects
@sdb.create_table.endnotes
@sdb.create_table.endnotes_asterisk
@sdb.create_table.endnotes_plus
@@ -110,8 +112,8 @@ module SiSU_DB_select
when /^--createtable(s)?$/
@sdb.output_dir?
begin
- @sdb.create_table.metadata
- @sdb.create_table.documents
+ @sdb.create_table.metadata_and_text
+ @sdb.create_table.doc_objects
@sdb.create_table.endnotes
@sdb.create_table.endnotes_asterisk
@sdb.create_table.endnotes_plus
@@ -123,8 +125,8 @@ module SiSU_DB_select
@sdb.output_dir?
begin
@sdb_no.drop.tables
- @sdb.create_table.metadata
- @sdb.create_table.documents
+ @sdb.create_table.metadata_and_text
+ @sdb.create_table.doc_objects
@sdb.create_table.endnotes
@sdb.create_table.endnotes_asterisk
@sdb.create_table.endnotes_plus
@@ -135,13 +137,13 @@ module SiSU_DB_select
when /^--cr(eate)?lex$/
@sdb.output_dir?
begin
- @sdb.create_table.documents
+ @sdb.create_table.doc_objects
rescue; @sdb.output_dir?
end
when /^--cr(eate)?metadata$/
@sdb.output_dir?
begin
- @sdb.create_table.metadata
+ @sdb.create_table.metadata_and_text
rescue; @sdb.output_dir?
end
when /^--import$/
diff --git a/lib/sisu/v2/db_sqltxt.rb b/lib/sisu/v2/db_sqltxt.rb
new file mode 100644
index 00000000..f120b95f
--- /dev/null
+++ b/lib/sisu/v2/db_sqltxt.rb
@@ -0,0 +1,115 @@
+# coding: utf-8
+=begin
+
+ * Name: SiSU
+
+ * Description: a framework for document structuring, publishing and search
+ #___#
+
+ * Author: Ralph Amissah
+
+ * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2007, 2008, 2009, 2010 Ralph Amissah All Rights Reserved.
+
+ * License: GPL 3 or later:
+
+ SiSU, a framework for document structuring, publishing and search
+
+ Copyright (C) Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see <http://www.gnu.org/licenses/>.
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ <http://www.fsf.org/licensing/licenses/gpl.html>
+ <http://www.gnu.org/copyleft/gpl.html>
+
+ <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
+ <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
+ <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>
+
+ * SiSU uses:
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+ * Hompages:
+ <http://www.jus.uio.no/sisu>
+ <http://www.sisudoc.org>
+
+ * Download:
+ <http://www.jus.uio.no/sisu/SiSU/download.html>
+
+ * Ralph Amissah
+ <ralph@amissah.com>
+ <ralph.amissah@gmail.com>
+
+ ** Description: system environment, resource control and configuration details
+
+=end
+module SiSU_DB_text
+ class Prepare
+ def special_character_escape(str)
+ str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
+ str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"<br />\n")
+ str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
+ str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2')
+ str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2')
+ str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1')
+ str
+ end
+ def clean_searchable_text(arr) #produce clean, searchable, plaintext from document source
+ txt_arr,en=[],[]
+ arr.each do |s|
+ s.gsub!(/([*\/_-])\{(.+?)\}\1/,'\2')
+ s.gsub!(/^(?:group|poem|code)\{/,''); s.gsub!(/^\}(?:group|poem|code)/,'')
+ s.gsub!(/\A(?:@\S+:\s+.+)\Z/m,'')
+ if s =~/^:A~/
+ s.gsub!(/@author/,@md.creator.author)
+ s.gsub!(/@title/,@md.title.full)
+ end
+ s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/,'')
+ s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/,'')
+ s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/,'')
+ s.gsub!(/^%{1,3} .+/,'') #removed even if contained in code block
+ s.gsub!(/<br>/,' ')
+ en << s.scan(/~\{\s*(.+?)\s*\}~/)
+ s.gsub!(/~\{.+?\}~/,'')
+ s.gsub!(/ \s+/,' ')
+ #special_character_escape(s)
+ s
+ end
+ txt_arr << arr << en
+ #txt_arr=txt_arr.flatten
+ txt=txt_arr.flatten.join("\n")
+ txt=special_character_escape(txt)
+ txt
+ end
+ def strip_markup(str) #define rules, make same as in dal clean
+ str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]')
+ str.gsub!(/(?:&nbsp\\;|#{Mx[:nbsp]})+/,' ')
+ str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1') #tables
+ str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ') #tables
+ str.gsub!(/#{Mx[:tc_p]}/u,' ') #tables tidy later
+ str.gsub!(/<.+?>/,'')
+ str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] ') # else image names found in search
+ str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]') # else image names found in search
+ str.gsub!(/\s\s+/,' ')
+ str.strip!
+ str
+ end
+ end
+end
+__END__
+