aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v2/db_import.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/v2/db_import.rb')
-rw-r--r--lib/sisu/v2/db_import.rb131
1 files changed, 67 insertions, 64 deletions
diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb
index 5610a1d0..0e2db8e3 100644
--- a/lib/sisu/v2/db_import.rb
+++ b/lib/sisu/v2/db_import.rb
@@ -86,14 +86,17 @@ module SiSU_DB_import
@col[:ocn]=''
@counter={}
@db=SiSU_Env::Info_db.new
- @driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) ? true : false
+ if @sql_type=='sqlite'
+ @driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) \
+ ? true \
+ : false
+ end
sql='SELECT MAX(lid) FROM doc_objects'
begin
@col[:lid] ||=0
- @col[:lid]=if @driver_sqlite3
- @conn.execute( sql ).join.to_i
- else @conn.execute( sql ) { |x| x.fetch_all.to_s.to_i }
- end
+ @col[:lid]=@driver_sqlite3 \
+ ? @conn.execute( sql ).join.to_i \
+ : @conn.execute( sql ) { |x| x.fetch_all.to_s.to_i }
rescue
puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/
end
@@ -101,11 +104,9 @@ module SiSU_DB_import
sql='SELECT MAX(nid) FROM endnotes'
begin
@id_n ||=0
- @id_n=if @driver_sqlite3
- @conn.execute( sql ).join.to_i
- else
- @id_n=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i }
- end
+ @id_n=@driver_sqlite3 \
+ ? @conn.execute( sql ).join.to_i \
+ : @id_n=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i }
rescue
puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/
end
@@ -121,12 +122,11 @@ module SiSU_DB_import
tell.puts_blue unless @opt.cmd =~/q/
tell=SiSU_Screen::Ansi.new(@opt.cmd,'Marshal Load',@fnc)
tell.print_grey if @opt.cmd =~/v/
- file_exist=if @sql_type=~/sqlite/; nil
- else
- @conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; })
- end
- if (@sql_type!~/sqlite/ and not file_exist) \
- or @sql_type=~/sqlite/
+ select_first_match=%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.src_filename = '#{@opt.fns}'; }
+ file_exist=@sql_type=~/sqlite/ \
+ ? @conn.get_first_value(select_first_match) \
+ : @conn.select_one(select_first_match)
+ if not file_exist
t_d=[] # transaction_data
t_d << db_import_metadata
t_d << db_import_documents(@dal_array)
@@ -265,7 +265,8 @@ module SiSU_DB_import
@col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_minus
special_character_escape(@col[:body])
@col[:plaintext]=@col[:body].dup
- strip_markup(@col[:plaintext])
+ @col[:plaintext]=strip_markup(@col[:plaintext])
+ @col[:plaintext]=clean_searchable_text(@col[:plaintext])
if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last
end
if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last
@@ -298,7 +299,8 @@ module SiSU_DB_import
@col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus
special_character_escape(@col[:body])
@col[:plaintext]=@col[:body].dup
- strip_markup(@col[:plaintext])
+ @col[:plaintext]=strip_markup(@col[:plaintext])
+ @col[:plaintext]=clean_searchable_text(@col[:plaintext])
@en_a,@en_z=@en[0].first,@en[0].last if @en[0]
@en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
@en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
@@ -324,7 +326,8 @@ module SiSU_DB_import
@col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus
special_character_escape(@col[:body])
@col[:plaintext]=@col[:body].dup
- strip_markup(@col[:plaintext])
+ @col[:plaintext]=strip_markup(@col[:plaintext])
+ @col[:plaintext]=clean_searchable_text(@col[:plaintext])
@en_a,@en_z=@en[0].first,@en[0].last if @en[0]
@en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
@en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
@@ -350,7 +353,8 @@ module SiSU_DB_import
@col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus
special_character_escape(@col[:body])
@col[:plaintext]=@col[:body].dup
- strip_markup(@col[:plaintext])
+ @col[:plaintext]=strip_markup(@col[:plaintext])
+ @col[:plaintext]=clean_searchable_text(@col[:plaintext])
@en_a,@en_z=@en[0].first,@en[0].last if @en[0]
@en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
@en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
@@ -389,12 +393,13 @@ module SiSU_DB_import
end
special_character_escape(@col[:body])
@col[:plaintext]=@col[:body].dup
- strip_markup(@col[:plaintext])
+ @col[:plaintext]=strip_markup(@col[:plaintext])
+ @col[:plaintext]=clean_searchable_text(@col[:plaintext])
t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file)
@tuple_array << t.tuple
@en,@en_ast,@en_pls=[],[],[]
@col[:en_a]=@col[:en_z]=nil
- @col[:lev]=@col[:plaintext]=@col[:body]=''
+ @col[:lev]=@col[:plaintext]=@col[:body]=@col[:words]=''
end
if notedata =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ #% import into database endnotes tables
endnote_array=notedata.scan(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/)
@@ -406,8 +411,6 @@ module SiSU_DB_import
@id_n+=1
special_character_escape(txt)
body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)
- #special_character_escape(body)
- #special_character_escape(txt)
strip_markup(txt)
if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)
puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
@@ -418,16 +421,16 @@ module SiSU_DB_import
end
if txt
en={ :type => 'endnotes',
- :id => @id_n,
- :lid => @col[:lid],
- :nr => nr,
- :txt => txt,
- :body => body,
- :ocn => @col[:ocn],
- :ocnd => @col[:ocnd],
- :ocns => @col[:ocns],
- :id_t => @@id_t,
- :hash => digest_clean
+ :id => @id_n,
+ :lid => @col[:lid],
+ :nr => nr,
+ :txt => txt,
+ :body => body,
+ :ocn => @col[:ocn],
+ :ocnd => @col[:ocnd],
+ :ocns => @col[:ocns],
+ :id_t => @@id_t,
+ :hash => digest_clean
}
t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file)
@tuple_array << t.tuple
@@ -456,16 +459,16 @@ module SiSU_DB_import
end
if txt
en={ :type => 'endnotes_asterisk',
- :id => @id_n,
- :lid => @col[:lid],
- :nr => nr,
- :txt => txt,
- :body => body,
- :ocn => @col[:ocn],
- :ocnd => @col[:ocnd],
- :ocns => @col[:ocns],
- :id_t => @@id_t,
- :hash => digest_clean
+ :id => @id_n,
+ :lid => @col[:lid],
+ :nr => nr,
+ :txt => txt,
+ :body => body,
+ :ocn => @col[:ocn],
+ :ocnd => @col[:ocnd],
+ :ocns => @col[:ocns],
+ :id_t => @@id_t,
+ :hash => digest_clean
}
t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file)
@tuple_array << t.tuple
@@ -494,16 +497,16 @@ module SiSU_DB_import
end
if txt
en={ :type => 'endnotes_plus',
- :id => @id_n,
- :lid => @col[:lid],
- :nr => nr,
- :txt => txt,
- :body => body,
- :ocn => @col[:ocn],
- :ocnd => @col[:ocnd],
- :ocns => @col[:ocns],
- :id_t => @@id_t,
- :hash => digest_clean
+ :id => @id_n,
+ :lid => @col[:lid],
+ :nr => nr,
+ :txt => txt,
+ :body => body,
+ :ocn => @col[:ocn],
+ :ocnd => @col[:ocnd],
+ :ocns => @col[:ocns],
+ :id_t => @@id_t,
+ :hash => digest_clean
}
t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file)
@tuple_array << t.tuple
@@ -526,25 +529,25 @@ module SiSU_DB_import
endnotes(@txt).range
@en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/
@en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/
- @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_o]}/
+ @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/
@txt=endnotes(@txt).clean_text
end
@txt
end
def standard
- x=if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/; @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/)
- else nil
- end
+ x=(@txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) \
+ ? @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) \
+ : nil
end
def asterisk
- x=if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/; @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/)
- else nil
- end
+ x=(@txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) \
+ ? @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/) \
+ : nil
end
def plus
- x=if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/; @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/)
- else nil
- end
+ x=(@txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) \
+ ? @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/) \
+ : nil
end
def clean_text(base_url=nil)
if base_url