From 804a103722aa7731ca7f2062ee2ebf533607e6aa Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 3 Oct 2012 00:11:08 -0400 Subject: v4: 4.0.0 new branch & version & changelog "opened" --- lib/sisu/v2/db_import.rb | 649 ----------------------------------------------- 1 file changed, 649 deletions(-) delete mode 100644 lib/sisu/v2/db_import.rb (limited to 'lib/sisu/v2/db_import.rb') diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb deleted file mode 100644 index 4237757f..00000000 --- a/lib/sisu/v2/db_import.rb +++ /dev/null @@ -1,649 +0,0 @@ -# coding: utf-8 -=begin - - * Name: SiSU - - * Description: a framework for document structuring, publishing and search - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Download: - - - * Ralph Amissah - - - - ** Description: modules shared by the different db types, dbi, postgresql, - sqlite - -=end -module SiSU_DB_import - require "#{SiSU_lib}/db_columns" # db_columns.rb - require "#{SiSU_lib}/db_load_tuple" # db_load_tuple.rb - require "#{SiSU_lib}/db_sqltxt" # db_sqltxt.rb - require "#{SiSU_lib}/shared_html_lite" # shared_html_lite.rb - require 'sqlite3' - class Import < SiSU_DB_text::Prepare - include SiSU_Param - include SiSU_Screen - @@dl=nil - @@hname=nil - attr_accessor :tp - def initialize(opt,conn,file,sql_type='pg') - @opt,@conn,@file,@sql_type=opt,conn,file,sql_type - @cX=SiSU_Screen::Ansi.new(@opt.cmd).cX - @env=SiSU_Env::Info_env.new(@opt.fns) - @dal="#{@env.path.dal}" - if @opt.fns.empty? or @opt.cmd.empty?; @fnb='' - else - @md=SiSU_Param::Parameters.new(@opt).get - @fnb=@md.fnb - end - @suffix=@opt.fns[/(?:.+?)(?:\.ssm\.sst|\.-?sst)/,1] - @fnc="#{@dal}/#{@opt.fns}.content.rbm" - @@seg,@@seg_full='','' #create? consider placing field just before clean text as opposed to seg which contains seg(.html) name info seg_full would contain seg info for levels 5 & 6 where available eg seg_full may be 7.3 (level 5) and 7.3.1 (level 6) where seg is 7 - @col=Hash.new('') - @col[:ocn]='' - @counter={} - @db=SiSU_Env::Info_db.new - if @sql_type=='sqlite' - @driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) \ - ? true \ - : false - end - sql='SELECT MAX(lid) FROM doc_objects' - begin - @col[:lid] ||=0 - @col[:lid]=@driver_sqlite3 \ - ? @conn.execute( sql ).join.to_i \ - : @conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } - rescue - puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/ - end - @col[:lid]=0 if @col[:lid].nil? or @col[:lid].to_s.empty? - sql='SELECT MAX(nid) FROM endnotes' - begin - @id_n ||=0 - @id_n=@driver_sqlite3 \ - ? @conn.execute( sql ).join.to_i \ - : @id_n=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } - rescue - puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/ - end - @id_n =0 if @col[:lid].nil? or @col[:lid].to_s.empty? - @col[:lv1]=@col[:lv2]=@col[:lv3]=@col[:lv4]=@col[:lv5]=@col[:lv6]=0 - @db=SiSU_Env::Info_db.new - @@dl ||=SiSU_Env::Info_env.new.digest.length - end - def marshal_load - require "#{SiSU_lib}/dal" # dal.rb - @dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here - SiSU_Screen::Ansi.new(@opt.cmd,"#{@db.psql.db}::#{@opt.fns}").puts_blue if @opt.cmd =~/vVM/ - SiSU_Screen::Ansi.new(@opt.cmd,'Marshal Load',@fnc).puts_grey if @opt.cmd =~/v/ - select_first_match=%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.src_filename = '#{@opt.fns}'; } - file_exist=@sql_type=~/sqlite/ \ - ? @conn.get_first_value(select_first_match) \ - : @conn.select_one(select_first_match) - if not file_exist - t_d=[] # transaction_data - t_d << db_import_metadata - t_d << db_import_documents(@dal_array) - t_d << db_import_urls(@dal_array,@fnc) #import OID on/off - t_d=t_d.flatten - if @opt.cmd =~/[MV]/ - puts @conn.class if defined? @conn.class - puts @conn.driver_name if defined? @conn.driver_name - puts @conn.driver if defined? @conn.driver - end - begin - sql='' - if @sql_type=~/sqlite/ - @conn.transaction do |conn| - t_d.each do |sql| - conn.execute(sql) - end - end - #also 'execute' works for sqlite - #@conn.execute("BEGIN") - # t_d.each do |sql| - # @conn.execute(sql) - # end - #@conn.execute("COMMIT") - else - #'do' works for postgresql - @conn.do("BEGIN") - t_d.each do |sql| - @conn.do(sql) - end - @conn.do("COMMIT") - end - rescue DBI::DatabaseError => e - puts "Error code: #{e.err}" - puts "Error message: #{e.errstr}" - puts "Error SQLSTATE: #{e.state}" - SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error - sqlfn="#{@env.path.sql}/#{@md.fnb}.sql" - sql=File.new(sqlfn,'w') - t_d.each {|i| sql.puts i} - p sqlfn - if @opt.cmd =~/M/ - puts sql - p @conn.methods.sort - puts "#{__FILE__}:#{__LINE__}" - end - rescue - SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error - sqlfn="#{@env.path.sql}/#{@md.fnb}.sql" - sql=File.new(sqlfn,'w') - t_d.each {|i| sql.puts i} - p sqlfn - if @opt.cmd =~/M/ - puts sql - p @conn.methods.sort - puts "#{__FILE__}:#{__LINE__}" - end - ensure - end - else - if file_exist - @db=SiSU_Env::Info_db.new - puts "\n#{@cX.grey}file #{@cX.off} #{@cX.blue}#{@opt.fns}#{@cX.off} #{@cX.grey}already exists in database#{@cX.off} #{@cX.blue}#{@db.psql.db}#{@cX.off} #{@cX.brown}update instead?#{@cX.off}" - end - end - end - def pf_db_import_transaction_open - end - def pf_db_import_transaction_close - end - def db_import_metadata #% import documents - populate database - print %{ #{@cX.grey}import documents dbi_unit #{@cX.off} } if @opt.cmd =~/vVM/ - @tp={} - @md=SiSU_Param::Parameters.new(@opt).get -#% sisutxt & fulltxt - if FileTest.exist?(@md.fns) - txt_arr=IO.readlines(@md.fns,'') - src=txt_arr.join("\n") - src=special_character_escape(src) - @tp[:sisutxt_f],@tp[:sisutxt_i]='sisutxt, ',"'#{src}', " - txt=clean_searchable_text(txt_arr) - #special_character_escape(txt) - @tp[:fulltxt_f],@tp[:fulltxt_i]='fulltxt, ',"'#{txt}', " - end -#% title - if defined? @md.title.full \ - and @md.title.full=~/\S+/ # DublinCore 1 - title - #@tp[:title]=@md.title.full - #special_character_escape(@tp[:title]) - #@tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', " - sql='SELECT MAX(tid) FROM metadata_and_text' - begin - @@id_t ||=0 - id_t=if @driver_sqlite3 - @conn.execute( sql ).join.to_i # { |x| id_t=x.join.to_i } - else @conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } - end - @@id_t=id_t if id_t - rescue - puts "#{__FILE__} #{__LINE__}" if @opt.cmd =~/M/ - end - @@id_t =0 if @col[:lid].nil? or @col[:lid].to_s.empty? - @@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks @title: - puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}} if @opt.cmd =~/vVM/ - end - ################ CLEAR ############## - SiSU_DB_DBI::Test.new(self,@opt).verify #% import title names, filenames (tuple) - t=SiSU_DB_tuple::Load_metadata.new(@conn,@@id_t,@md,@file) - tuple=t.tuple - tuple - end - def db_import_documents(dal_array) #% import documents - populate main database table, import into substantive database tables (tuple) - begin - @col[:tid]=@@id_t - @en,@en_ast,@en_pls,@tuple_array=[],[],[],[] - @col[:en_a],@col[:en_z]=nil,nil - dal_array.each do |data| - data.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') - data.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') - data.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') - data.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') - data.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') - data.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1') - data.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1') - data.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') - data.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1') - data.obj.gsub!(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 ') - data.obj.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check - @col[:seg]=@@seg - if data.of =~/para|heading|group/ # regular text what of code-blocks grouped text etc. - notedata=data.obj.dup - if data.is=='heading' \ - and data.ln.inspect=~/[123]/ - @col[:lev],txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.ln,data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'','' - @col[:lid]+=1 - txt=endnotes(txt).extract_any - @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_minus - special_character_escape(@col[:body]) - @col[:plaintext]=@col[:body].dup - @col[:plaintext]=strip_markup(@col[:plaintext]) - @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last - end - if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last - end - if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last - end - t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) - @tuple_array << t.tuple - case @col[:lev] - when /1/; @col[:lv1]+=1 - when /2/; @col[:lv2]+=1 - when /3/; @col[:lv3]+=1 - end - @col[:lev]=@col[:plaintext]=@col[:body]='' - elsif data.is=='heading' \ - and data.ln==4 - @@seg,txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.name,data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'','' - @col[:seg]=@@seg - @col[:lv4]+=1 - @col[:lid]+=1 - @col[:lev]=4 - @hname=if @col[:seg] \ - and not @col[:seg].to_s.empty? - @@hname=@col[:seg].to_s - else @@hname - end - @env=SiSU_Env::Info_env.new(@md.fns) - @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" - txt=endnotes(txt).extract_any - @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus - special_character_escape(@col[:body]) - @col[:plaintext]=@col[:body].dup - @col[:plaintext]=strip_markup(@col[:plaintext]) - @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @en_a,@en_z=@en[0].first,@en[0].last if @en[0] - @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] - @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] - t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) - @tuple_array << t.tuple - @col[:lev]=@col[:plaintext]=@col[:body]='' - elsif data.is=='heading' and data.ln==5 - txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'','' - @@seg_full=data.name if data.is=='heading' and data.ln==5 and data.name #check data.name - @@seg ||='' #nil # watch - @col[:seg]=@@seg - @col[:lv5]+=1 - @col[:lid]+=1 - @col[:lev]=5 - @hname=if @col[:seg] \ - and not @col[:seg].to_s.empty? - @@hname=@col[:seg].to_s - else @@hname - end - @env=SiSU_Env::Info_env.new(@md.fns) - @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" - txt=endnotes(txt).extract_any - @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus - special_character_escape(@col[:body]) - @col[:plaintext]=@col[:body].dup - @col[:plaintext]=strip_markup(@col[:plaintext]) - @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @en_a,@en_z=@en[0].first,@en[0].last if @en[0] - @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] - @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] - t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) - @tuple_array << t.tuple - @col[:lev]=@col[:plaintext]=@col[:body]='' - elsif data.is=='heading' and data.ln==6 - txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'','' - @@seg_full=data.name if data.is=='heading' and data.ln==6 and data.name #check data.name - @@seg ||='' #nil # watch - @col[:seg]=@@seg - @col[:lv6]+=1 - @col[:lid]+=1 - @col[:lev]=6 - @hname=if @col[:seg] \ - and not @col[:seg].to_s.empty? - @@hname=@col[:seg].to_s - else @@hname - end - @env=SiSU_Env::Info_env.new(@md.fns) - @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" - txt=endnotes(txt).extract_any - @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus - special_character_escape(@col[:body]) - @col[:plaintext]=@col[:body].dup - @col[:plaintext]=strip_markup(@col[:plaintext]) - @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @en_a,@en_z=@en[0].first,@en[0].last if @en[0] - @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] - @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] - t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) - @tuple_array << t.tuple - @col[:lev]=@col[:plaintext]=@col[:body]='' - else #% regular text - @col[:lid]+=1 - txt='' - txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.obj,data.ocn,data.odv,data.osp,data.of,data.is,'',data.parent,'','' - @hname=if @col[:seg] \ - and not @col[:seg].to_s.empty? - @@hname=@col[:seg].to_s - else @@hname - end - @env=SiSU_Env::Info_env.new(@md.fns) - @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" - txt=endnotes(txt).extract_any - if @sql_type=~/pg/ \ - and txt.size > (SiSU_DB_columns::Column_size.new.document_clean - 1) #% examine pg build & remove limitation - puts "\n\nTOO LARGE (TXT - see error log)\n\n" - open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| - error.puts("\n#{@opt.fns}\nTEXT BODY\n#{@col[:body].size} object #{@col[:ocn]} -> #{@col[:body].slice(0..500)}") - end - txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} - end - @en_a,@en_z=@en[0].first,@en[0].last if @en[0] - @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] - @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] - @col[:body]=if data.is=='table' - SiSU_Format_Shared::CSS_Format.new(@md,data).html_table - elsif data.is=='code' - SiSU_Format_Shared::CSS_Format.new(@md,data).code - elsif defined? data.indent and data.indent =~/[1-9]/ - SiSU_Format_Shared::CSS_Format.new(@md,data).indent(data.indent) - else - SiSU_Format_Shared::CSS_Format.new(@md,data).norm - end - special_character_escape(@col[:body]) - @col[:plaintext]=@col[:body].dup - @col[:plaintext]=strip_markup(@col[:plaintext]) - @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) - @tuple_array << t.tuple - @en,@en_ast,@en_pls=[],[],[] - @col[:en_a]=@col[:en_z]=nil - @col[:lev]=@col[:plaintext]=@col[:body]=@col[:words]='' - end - if notedata =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ #% import into database endnotes tables - endnote_array=notedata.scan(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) - endnote_array.each do |inf| - if inf[/#{Mx[:en_a_o]}\d+.+?#{Mx[:en_a_c]}/] - if inf[/#{Mx[:en_a_o]}(\d+)(.+?)#{Mx[:en_a_c]}/] - nr,txt,digest_clean=$1,$2.strip,0 - end - @id_n+=1 - special_character_escape(txt) - body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) - strip_markup(txt) - if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) - puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" - open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| - error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") - end - txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} - end - if txt - en={ :type => 'endnotes', - :id => @id_n, - :lid => @col[:lid], - :nr => nr, - :txt => txt, - :body => body, - :ocn => @col[:ocn], - :ocnd => @col[:ocnd], - :ocns => @col[:ocns], - :id_t => @@id_t, - :hash => digest_clean - } - t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file) - @tuple_array << t.tuple - end - end - end - word_mode=notedata.scan(/\S+/) - end - if notedata =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ #% import into database endnotes tables - endnote_array=notedata.scan(/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) - endnote_array.each do |inf| - if inf[/#{Mx[:en_b_o]}\*\d+.+?#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 - if inf[/#{Mx[:en_b_o]}[*](\d+)(.+?)#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 - nr,txt,digest_clean=$1,$2.strip,0 - end - @id_n+=1 - special_character_escape(txt) - body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) - strip_markup(txt) - if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) - puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" - open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| - error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") - end - txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} - end - if txt - en={ :type => 'endnotes_asterisk', - :id => @id_n, - :lid => @col[:lid], - :nr => nr, - :txt => txt, - :body => body, - :ocn => @col[:ocn], - :ocnd => @col[:ocnd], - :ocns => @col[:ocns], - :id_t => @@id_t, - :hash => digest_clean - } - t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file) - @tuple_array << t.tuple - end - end - end - word_mode=notedata.scan(/\S+/) - end - if notedata =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ #% import into database endnotes tables - endnote_array=notedata.scan(/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) - endnote_array.each do |inf| - if inf[/#{Mx[:en_b_o]}\+\d+.+?#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 - if inf[/#{Mx[:en_b_o]}[+](\d+)(.+?)#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 - nr,txt,digest_clean=$1,$2.strip,0 - end - @id_n+=1 - special_character_escape(txt) - body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) - strip_markup(txt) - if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) - puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" - open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| - error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") - end - txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} - end - if txt - en={ :type => 'endnotes_plus', - :id => @id_n, - :lid => @col[:lid], - :nr => nr, - :txt => txt, - :body => body, - :ocn => @col[:ocn], - :ocnd => @col[:ocnd], - :ocns => @col[:ocns], - :id_t => @@id_t, - :hash => digest_clean - } - t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file) - @tuple_array << t.tuple - end - end - end - word_mode=notedata.scan(/\S+/) - end - end - end - rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error - ensure - end - @tuple_array - end - def endnotes(txt) - @txt=txt - def extract_any - if @txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ - endnotes(@txt).range - @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ - @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ - @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ - @txt=endnotes(@txt).clean_text - end - @txt - end - def standard - x=(@txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) \ - ? @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) \ - : nil - end - def asterisk - x=(@txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) \ - ? @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/) \ - : nil - end - def plus - x=(@txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) \ - ? @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/) \ - : nil - end - def clean_text(base_url=nil) - if base_url - @txt.gsub!(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,%{\\1}) - @txt.gsub!(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,%{\\1}) - @txt.gsub!(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,%{\\1}) - else - @txt.gsub!(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,'\1') - @txt.gsub!(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,'\1') - @txt.gsub!(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,'\1') - end - @txt - end - def range - @col[:en_a]=@col[:en_z]=nil - if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}|#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/ - word_array=@txt.scan(/\S+/) - word_array.each do |w| - if w[/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/] # not tested since change 2003w31 - @col[:en_a]=$1 unless @col[:en_a] - @col[:en_z]=@col[:en_a].dup unless @col[:en_a] - @col[:en_z]=$1 if @col[:en_a] - end - end - end - @col - end - self - end - def db_import_urls(dbi_unit,content) #% import documents OID - populate database - begin - @fnc=content - @env=SiSU_Env::Info_env.new(@opt.fns) - base=@env.url.root - out=@env.path.output - f,u={},{} - if @fnb.empty? \ - or @fnb.nil? - p 'file output path error' #remove - end - if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:plain]}")==true - f[:txt],u[:txt]='plaintext,', "'#{base}/#{@fnb}/#{@md.fn[:plain]}'," - end - if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:toc]}")==true - f[:html_toc],u[:html_toc]='html_toc,', "'#{base}/#{@fnb}/#{@md.fn[:toc]}'," - end - if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:doc]}")==true - f[:html_doc],u[:html_doc]='html_doc,', "'#{base}/#{@fnb}/#{@md.fn[:doc]}'," - end - if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:xhtml]}")==true - f[:xhtml],u[:xhtml]='xhtml,', "'#{base}/#{@fnb}/#{@md.fn[:xhtml]}'," - end - if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:sax]}")==true - f[:xml_sax],u[:xml_sax]='xml_sax,', "'#{base}/#{@fnb}/#{@md.fn[:sax]}'," - end - if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:dom]}")==true - f[:xml_dom],u[:xml_dom]='xml_dom,', "'#{base}/#{@fnb}/#{@md.fn[:dom]}'," - end - if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:odf]}")==true - f[:odf],u[:odf]='odf,', "'#{base}/#{@fnb}/#{@md.fn[:odf]}'," - end - if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:pdf_p]}")==true - f[:pdf_p],u[:pdf_p]='pdf_p,', "'#{base}/#{@fnb}/#{@md.fn[:pdf_p]}'," - end - if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:pdf_l]}")==true - f[:pdf_l],u[:pdf_l]='pdf_l,', "'#{base}/#{@fnb}/#{@md.fn[:pdf_l]}'," - end - if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:concordance]}")==true - f[:concordance],u[:concordance]='concordance,', "'#{base}/#{@fnb}/#{@md.fn[:concordance]}'," - end - if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.tex")==true - f[:latex_p],u[:latex_p]='latex_p,', "'#{base}/#{@fnb}/#{@opt.fns}.tex'," - end - if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.landscape.tex")==true - f[:latex_l],u[:latex_l]='latex_l,', "'#{base}/#{@fnb}/#{@opt}.fns}.landscape.tex'," - end - if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:digest]}")==true - f[:digest],u[:digest]='digest,', "'#{base}/#{@fnb}/#{@md.fn[:digest]}'," - end - if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:manifest]}")==true #revisit, was to be text, this is html - f[:manifest],u[:manifest]='manifest,', "'#{base}/#{@fnb}/#{@md.fn[:manifest]}'," - end - if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.meta")==true - f[:markup],u[:markup]='markup,', "'#{base}/#{@fnb}/#{@opt.fns}.meta'," - end - if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.tgz")==true - f[:sisupod],u[:sisupod]='sisupod,', "'#{base}/#{@fnb}/#{@opt.fns}.tgz'," - end - t=SiSU_DB_tuple::Load_urls.new(@conn,f,u,@@id_t,@opt,@file) - tuple=t.tuple - rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error - ensure - end - tuple - end - end -end -__END__ -- cgit v1.2.3