From 26767cc88c0548ad7978021796d0ccc4c9f7ffed Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 2 Jun 2007 11:27:06 +0100 Subject: 0.53.0, pre-build, see changelog, library naming changed for scm, placed under v0 (instead of 0.53) --- lib/sisu/0.52/dal.rb | 1089 -------------------------------------------------- 1 file changed, 1089 deletions(-) delete mode 100644 lib/sisu/0.52/dal.rb (limited to 'lib/sisu/0.52/dal.rb') diff --git a/lib/sisu/0.52/dal.rb b/lib/sisu/0.52/dal.rb deleted file mode 100644 index 130dbf87..00000000 --- a/lib/sisu/0.52/dal.rb +++ /dev/null @@ -1,1089 +0,0 @@ -=begin - * Name: SiSU information Structuring Universe - Structured information, Serialized Units - * Author: Ralph Amissah - * http://www.jus.uio.no/sisu - * http://www.jus.uio.no/sisu/SiSU/download.html - - * Description: preprocessing, (document abstraction), data abstraction used in subsequent processing - - * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah - - * License: GPL 2 or later - - Summary of GPL 2 - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - http://www.fsf.org/licenses/gpl.html - http://www.gnu.org/copyleft/gpl.html - http://www.jus.uio.no/sisu/gpl2.fsf - - SiSU was first released to the public on January 4th 2005 - - SiSU uses: - - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - © Ralph Amissah 1997, current 2007. - All Rights Reserved. - - * Ralph Amissah: ralph@amissah.com - ralph.amissah@gmail.com -=end -module SiSU_DAL - require SiSU_lib + '/defaults' - require SiSU_lib + '/sysenv' - require SiSU_lib + '/param' - require SiSU_lib + '/dal_syntax' - require SiSU_lib + '/dal_doc_str' - require SiSU_lib + '/i18n' - include SiSU_Env - include SiSU_Param - include SiSU_Viz - include Syntax - class Instantiate < SiSU_Param::Parameters::Instructions - def initialize - @@flag_vocab=0 - @@endnote={} - @@endnote_array=@@word_mode=[] - @@endnote_counter,@@endnote_counter_asterisk,@@endnote_counter_dag=1,1,1 #added - @@line_mode='' - end - end - class Source #{@my_make_fns.meta}") if @md.cmd =~/M/ - tell.txt_grey unless @md.cmd =~/q/ - dal.each{|s| dal_array << "#{s.strip}\n\n" unless s.strip.empty?} - dal_array - end - def read_fnm - dal=[] - dal=if FileTest.file?(@fnm); File.open(@fnm){ |f| dal=Marshal.load(f)} - else SiSU_DAL::Source.new(@opt).create_dal - end - end - end - class Output - def initialize(md,data) - @md,@data=md,data - @my_make=SiSU_Env::Create_file.new(@md.cmd,@md.fns) - dir=SiSU_Env::Info_env.new(@md.fns) - @hard="#{dir.path.dal}/#{@md.fns}.meta" - end - def hard_output - if @md.cmd =~/M/ - filename_meta=@my_make.file_meta - @data.each {|s| filename_meta.puts s.strip + "\n\n" unless s.strip.empty?} - else File.unlink(@hard) if FileTest.file?(@hard) - end - end - def marshal - marshal_meta=@my_make.marshal_meta - File.open(marshal_meta,'w'){|f| Marshal.dump(@data.to_a,f)} - end - end - class Make - @@endnote={} - @@endnote_array=@@word_mode=[] - @@endnote_counter,@@endnote_counter_asterisk,@@endnote_counter_dag=1,1,1 - @@comment='%' - @@dp=nil - def initialize(md,data) - @md,@data=md,data - @@word_mode=[] - @env=SiSU_Env::Info_env.new(@md.fns) - @skin=SiSU_Env::Info_skin.new(@md) - @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - l=SiSU_Env::Standardise_language.new.file_to_language(@md.fns) - @language=l[:l] - @tr=SiSU_Translate::Source.new(@md,@language) - end - def reset - @@flag_vocab=0 - @@endnote={} - @@endnote_array=@@word_mode=[] - @@endnote_counter,@@endnote_counter_asterisk,@@endnote_counter_dag=1,1,1 - @@line_mode='' - end - def song - reset - data=@data - @metafile="#{@env.path.dal}/#{@md.fns}.meta" - my_make_source_file=SiSU_Env::Create_file.new(@md.cmd,@md.fns) - data=data.join.split("\n\n") - data=SiSU_document_structure::Code.new(@md,data).code - data_new=[] - data.each do |x| - data_new << if x =~ /\n\n/m; x.split(/\n\n+/) - else x - end - end - data=data_new.flatten - data=SiSU_DAL::Make.new(@md,data).substitutions_and_insertions? - data=Syntax::Markup.new(@md,data).songsheet - data=SiSU_DAL::Make.new(@md,data).character_check - data=SiSU_DAL::Make.new(@md,data).images - data=SiSU_document_structure::Tables.new(@md,data).tables - data=SiSU_DAL::Make.new(@md,data).numbering_song - data=SiSU_DAL::Make.new(@md,data).endnotes - data=SiSU_DAL::Make.new(@md,data).object_digest - meta=SiSU_DAL::Make.new(@md,data).metadata - outputdata=data + meta - if @md.cmd =~/[mM]/ - SiSU_DAL::Output.new(@md,outputdata).hard_output - SiSU_DAL::Output.new(@md,outputdata).marshal - end - reset - outputdata - end - protected - def vocabulary - data=@data - vocab_insert,tuned_file=[],[] - data.each do |para| - if para =~/^1~/ and @@flag_vocab == 0 - vocab_insert << '0~vocabulary lex' << "\n\n" << para #watch consider - tuned_file << vocab_insert unless para.nil? - @@flag_vocab=1 - else tuned_file << para unless para.nil? - end - end - tuned_file - end - def character_check - require 'iconv' - reset - data=@data - @tuned_file=[] - endnote_no=1 - data.each do |para| - para.strip! - para.gsub!(/^([12])~\?\s+/,'\1~ ') #conditional header for incorporated document 2004w12 - para.gsub!(/^[{~}]\s*$/,'') - para.gsub!(/^#{@@comment}.*/,'') #remove comment and divider #% - para.gsub!(/<~#>|~#\s*/,'<~#>') - para.gsub!(/-#\s*/,'<-#><~#>') - #para.gsub!(/(#\{{3} arch-tag:|0\{{3}~cvs)\s+/, "0{{~rcs ") #KEEP ... ENABLE WIDER USE OF REVISION CONTROL - para.gsub!(/(~\{ )\s+/,'\1') - para.gsub!(/ \/\//,'
') #added 2004w29 - para.gsub!(/
/,'
') #needed by xml, xhtml etc. - #para.gsub!(/

/,'

') #consider - para.gsub!(/`/,"'") - para.gsub!(/\342\200\231/,"'") #if para =~/’/ #Avoid #‘ ’ #“ ” - para.gsub!(/\t/,' ') - para.gsub!(/�/,' ') #watch, replace with char code - para.gsub!(/[“”]/,'""') - para.gsub!(/[­–—]/,'-') #— – chk - para.gsub!(/·/,'*') - para.gsub!(/\\copy(?:right)?\b/,'©') - para.gsub!(/\\trademark\b|\\tm\b/,'®') - #non_utf8(para) - para=para + "\n" - case para - when /\^~/ # endnotes - #% Note must do this first (earlier loop) and then enter gathered data into ~^\d+ - sub_para=para.dup - @@endnote_array << sub_para.gsub!(/\n/,'').gsub!(/\^~\s+(.+)\s*/, %{~\{#{endnote_no} \\1 \}~}).strip - endnote_no+=1 - para=nil if para =~/\^~ .+/ #removes 'binary' endnote now in endnote array for later insertion - end - @tuned_file << para unless para.nil? - end - @tuned_file - end - def images - data=@data - tuned_file=[] - @rmgk=false - if SiSU_Env::Info_settings.new.program?('rmagick'); @rmgk=SiSU_Env::Load.new('RMagick').prog - else tell=SiSU_Screen::Ansi.new(@md.cmd,'use of RMagick is not enabled in sisurc.yml') - tell.warn if @md.cmd =~/[vVM]/ - end - data.each do |para| - para.strip! - if para =~/\{\s*\S+\.(?:png|jpg|gif)(?:\s*|\s+.+)?\}(?:(?:https?|ftp):\S+|image)/ - if para !~/\{\s*\S+\.(?:png|jpg|gif)\s+\d+x\d+\s+/ - m=/\{\s*(\S+\.(?:png|jpg|gif))/ - if @rmgk - imgs=para.scan(m).flatten - images=imgs.each do |image| - dir=SiSU_Env::Info_env.new(@md.fns) - path_image=[dir.path.image_source_local_tex,dir.path.image_source_remote_tex,dir.path.image_source_tex] - image_path=nil - path_image.each do |image_path| - break if FileTest.exist?("#{image_path}/#{image}") - end - if FileTest.exist?("#{image_path}/#{image}") - img=Magick::ImageList.new("#{image_path}/#{image}") - img_col,img_row=img.columns,img.rows - if img_col > img_row #landscape - if img_col> 640 #480 - img_col=640 #480 - img_row=((1.00*img_col/img.columns)*img.rows).round - end - else #portrait - if img_col> 640 #480 - img_col=640 #480 - img_row=((1.00*img_col/img.columns)*img.rows).round - end - if img_row > 640 - img_row=640 - img_col=((1.00*img_row/img.rows)*img.columns).round - end - end - para.gsub!(/(#{image})/,"#{image} #{img_col}x#{img_row}") - else para.gsub!(/\{\s*(\S+)\.(png|jpg|gif).+?\}((?:https?|ftp):\S+|image)/,'[ \1 (\2 missing) ]') - end - end - else - images=para.scan(m) do |image| - tell=SiSU_Screen::Ansi.new(@md.cmd,'where image dimensions have not been provided RMagick is required',image) - tell.warn #unless @opt.cmd =~/q/ - end - end - end - end - para.gsub!(/\{\s+(\S+\.(?:png|jpg|gif))\s+/i,'{\1 ') if para =~/\{\s+\S+\.(?:png|jpg|gif).+?\}(?:(?:https?|ftp):\S+|image)/ - tuned_file << para unless para.nil? - end - tuned_file - end - def output_filetypes_in_cmd(cmd_shortcut,source=nil) - #make list of file types in shortcut command (as configured), e.g. when sisu -3 is used - cf_defaults=SiSU_Env::Info_processing_flag.new - cmd_list=case cmd_shortcut.to_s - when /0/; cf_defaults.cf_0 - when /1/; cf_defaults.cf_1 - when /2/; cf_defaults.cf_2 - when /3/; cf_defaults.cf_3 - when /4/; cf_defaults.cf_4 - when /5/; cf_defaults.cf_5 - end - file_type_names=[] - file_type_names <<= if cmd_list =~ /y/; 'sisu_manifest.html' - end - file_type_names <<= if cmd_list =~ /h/; ['toc.html', 'doc.html'] - end - file_type_names <<= if cmd_list =~ /p/; ['landscape.pdf', 'portrait.pdf'] - end - file_type_names <<= if cmd_list =~ /o/; 'opendocument.odt' - end - file_type_names <<= if cmd_list =~ /b/; 'scroll.xhtml' - end - file_type_names <<= if cmd_list =~ /x/; 'sax.xml' - end - file_type_names <<= if cmd_list =~ /X/; 'dom.xml' - end - file_type_names <<= if cmd_list =~ /a/; 'plain.txt' - end - file_type_names <<= if cmd_list =~ /g/; 'wiki.txt' - end - file_type_names <<= if cmd_list =~ /w/; 'concordance.html' - end - file_type_names <<= if cmd_list =~ /N/; 'digest.txt' - end - file_type_names <<= if source and cmd_shortcut =~ /s/; source - end - file_type_names <<= if cmd_shortcut =~ /S/; 'sisupod.zip' - end - file_type_names=file_type_names.flatten - end - def substitutions_and_insertions? - data=@data - tuned_file=[] - if data[0] =~ /^#!\s*(?:\/usr\/bin\/env sisu|\/usr\/bin\/sisu)/ # remove bang from top #! (however file is stripped, so will be removed provided no content preceeds it) - data[0].gsub!(/^#!\s*\/usr\/bin\/sisu/,'') - data[0].gsub!(/^#!\s*\/usr\/bin\/env sisu/,'') - end - if data[0] =~ /^(SiSU\s+[\d.]*|sisu-[\d.]+)$/ # SiSU identifier - data[0].gsub!(/^(SiSU\s*[\d.]*)$/,'% \1') - data[0].gsub!(/^(sisu-[\d.]+)$/,'% \1') - end - data.each do |para| - para=if @md.markup_version.to_f >= 0.38 - SiSU_document_structure::Structure.new(@md,para).structure_markup_normalize - else para - end - #para.gsub!(//,'\1') #consider, would permit use of text hyperlinks if desired, dal_syntax more appropriate? - para.gsub!(/^((?:[1-9]|:?[A-C])~\S*)\s*$/,'\1~ [Note: heading marker::required title missing]~#') #conditional header for incorporated document 2004w12 - if para =~/^@\S+?:/ - para.gsub!(/^@(\S+?):\s+/,'0~\1 ') - para.gsub!(/^@(\S+?):([+-])\s+/,'0~\1\2 ') - end - if para !~/^%+\s/ and - para =~/^(?:_\*\s+)?\{(?:~\^\s+)?(.+?)\s\[(?:\d(?:[sS]+))\]\}(?:\.\.\/\S+?\/|\S+?\.(?:sst|ssm)\b)(?:\s+~\{.+?\}~)?(?:\s+\*~\S+)*\s*$/ - txt,cmd,source,url_dir,note,manifest=nil,nil,nil,nil,nil,nil - url_and_stub=SiSU_Env::Info_env.new.url - if defined? url_and_stub.remote - @output_url="#{url_and_stub.remote}" - if para =~/\{(.+?)\s\[(\d[sS]*)\]\}((\S+?)\.ss[tm])(\s+~\{.+?\}~)?/ - #syntax e.g.: { "Sphinx or Robot", Leena Krohn [3sS]}sphinx_or_robot.leena_krohn.1996.sst - txt,cmd,source,url_dir,note=$1,$2,$3,$4,$5 - elsif para =~/\{(.+?)\s\[(\d[sS]*)\]\}\.\.\/(\S+?)\/(\s+~\{.+?\}~)?/ - #syntax e.g.: { "Sphinx or Robot", Leena Krohn [3sS]}../sphinx_or_robot.leena_krohn.1996/ - txt,cmd,url_dir,note=$1,$2,$3,$4 - end - manifest="{#{txt} }#@output_url/#{url_dir}/toc.html#{note}\n\n" - else - puts "error, does currently support relative paths (reltive paths were removed, as had problems for citation, and was not suited to all output types should possibly reconsider) #{__FILE__} #{__LINE__}" - if para =~/\{(?:~\^\s+)?(.+?)\s\[(\d[sS]*)\]\}\.\.\/(\S+?)\/(\s+~\{.+?\}~)?/ - txt,cmd,url_dir,note=$1,$2,$3,$4 - manifest="{ #{txt} }../#{url_dir}/toc.html#{note}\n\n" - end - end - tuned_file << manifest - output_filetypes_in_cmd(cmd,source).each do |o_f| - describe = case o_f - when /sisu_manifest.html/; '~^ document manifest' - when /toc.html/; ' html, segmented text' - when /doc.html/; ' html, scroll, document in one' - when /landscape.pdf/; ' pdf, landscape' - when /portrait.pdf/; ' pdf, portrait' - when /opendocument.odt/; ' open document' - when /scroll.xhtml/; ' xhtml scroll' - when /sax.xml/; ' xml, sax' - when /dom.xml/; ' xml, dom' - when /plain.txt/; ' plain text utf-8' - when /wiki.txt/; ' wiki text' - when /concordance.html/; ' concordance' - when /digest.txt/; ' dcc, document content certificate (digests)' - when /#{source}/; ' markup source text' - when /sisupod.zip/; ' zipped markup source pod' - else nil - end - if describe - if @output_url - tuned_file << "_1 {#{describe} }#@output_url/#{url_dir}/#{o_f}\n\n" if describe - else - tuned_file << "_1 { #{describe} }../#{url_dir}/#{o_f}\n\n" - end - end - end - elsif para =~/<:insert\d+!?>/ and para !~/^%\s+/ - @skin.select - ins=SiSU_Viz::Inserts.new - case para - when /^\s*<:insert1>\s*$/ - para=[] - ins.insert1.split(/\n\n/).each{|x| para << x } - when /^\s*<:insert2>\s*$/ - para=[] - ins.insert2.split(/\n\n/).each{|x| para << x } - when /^\s*<:insert3>\s*$/ - para=[] - ins.insert3.split(/\n\n/).each{|x| para << x << "\n"} - para=ins.insert3 - when /^\s*<:insert4>\s*$/ - para=[] - ins.insert4.split(/\n\n/).each{|x| para << x << "\n"} - para=ins.insert4 - when /^\s*<:insert5>\s*$/ - para=[] - ins.insert5.split(/\n\n/).each{|x| para << x << "\n"} - when /^\s*<:insert6>\s*$/ - para=[] - ins.insert6.split(/\n\n/).each{|x| para << x << "\n"} - when /^\s*<:insert7>\s*$/ - para=[] - ins.insert7.split(/\n\n/).each{|x| para << x << "\n"} - end - para.each{|x| tuned_file << x } - else tuned_file << para - end - tuned_file.flatten! - tuned_file.compact! - end - tuned_file - end - def numbering_song - data=@data - data=SiSU_DAL::Make.new(@md,data).number_plaintext_para - data=SiSU_DAL::Make.new(@md,data).name_endnote_seg - data=SiSU_DAL::Make.new(@md,data).auto_number_heading_ie_title - data=SiSU_DAL::Make.new(@md,data).ocn unless @md.markup =~/not_to/ - data=SiSU_DAL::Make.new(@md,data).minor_numbering #unless @md.markup =~/not_to/ - data=SiSU_DAL::Make.new(@md,data).name_para_seg_filename - data=SiSU_DAL::Make.new(@md,data).set_heading_seg unless @md.set_heading_seg - data=SiSU_DAL::Make.new(@md,data).set_heading_top unless @md.set_heading_top - data=SiSU_DAL::Make.new(@md,data).set_header_title unless @md.set_header_title - data - end - def number_plaintext_para - data=@data - @tuned_file=[] - data.each do |para| - para.gsub!(/(^|[^<][^v][^>])\n/,'\1 ') #messy, but idea is that tables should retain breaks - para.gsub!(/^/,"\n") unless para =~/¡/ - para.gsub!(/^\s+|\s$/,"\n") - @tuned_file << para - end - @tuned_file - end - def name_endnote_seg - data=@data - @tuned_file=[] - data.each do |para| - para.gsub!(/<:3>\s*<:ee>/, - "#{@@endnote['special_align']}


\r " + - "#{@@endnote['seg_name_3']}

" + - "#{@@endnote['special_align_close']}") - para.gsub!(/<:2>\s*<:ee>/, - "#{@@endnote['special_align']}


\r " + - "#{@@endnote['seg_name_2']}

" + - "#{@@endnote['special_align_close']}") - para.gsub!(/<:1>\s*<:ee>/, - "#{@@endnote['special_align']}


\r " + - "#{@@endnote['seg_name_1']}

" + - "#{@@endnote['special_align_close']}") - @tuned_file << para - end - # debug 2003w46 adding revision control info - if @md.flag_auto_endnotes and @md.flag_separate_endnotes_make - @tuned_file << "\n4~endnotes Endnotes <~0;0:0;u0>" #prob numbering, revisit - end - @tuned_file << "\n" - @tuned_file - end - def owner_details_seg - data << '4~owner.details Owner Details' - end - def number_sub_heading(para,num,title_no) - case para - when /#{num}~- /; para.gsub!(/#{num}~- /,"#{title_no} ") - when /^#{num}~#\s*/; para.gsub!(/^#{num}~#\s*/,"#{title_no} ") - when /^#{num}~[a-z_\.]+ / - para.gsub!(/^#{num}~([a-z_\.]+)\s+(.+)/i,%{#{num}~\\1 #{title_no} \\2 <:name##{title_no}>}) - else para.gsub!(/^#{num}~ /,"#{num}~#{title_no} #{title_no} ") #main - end - if @md.toc_lev_limit and @md.toc_lev_limit < num - para.gsub!(/^[5-8]~(?:~\S+)?\s*/,'!_ ') - end - para - end - def auto_number_heading_ie_title #also does some segment naming - data=@data - @tuned_file=[] - if @md.markup =~/num_top/ or @md.num_top # watch, 2003w23 - input="#{@md.markup}"[/num_top\=([1-6])/,1] if @md.markup - input||=@md.num_top if @md.num_top !~/^$/ - end - num_top=input.to_i - t_no1=t_no2=t_no3=t_no4=0 - no1=num_top; no2=(num_top + 1); no3=(num_top + 2); no4=(num_top + 3) - t_not=0 - data.each do |para| #@md.seg_names << [additions to segment names] - if (@md.markup =~/num_top/ or (@md.num_top and @md.num_top !~/^$/)) and para !~/^0~/ - if (para =~/^(?:#{no1}|^#{no2}|^#{no3}#{no4})~#/ and para !~/^4~endnotes?/) - t_not+=1 #; t_no2=0; t_no3=0 - para.gsub!(/^(#{no1})~#\s*/,"\\1~ps#{t_not} ") - para.gsub!(/^(#{no2})~#\s*/,"\\1~ps#{t_not} ") - para.gsub!(/^(#{no3})~#\s*/,"\\1~ps#{t_not} ") - para.gsub!(/^(#{no4})~#\s*/,"\\1~ps#{t_not} ") - end - if para =~/#{no1}~/ - @subnumber=1 - @subnumber=0 if para =~/#{no1}~/ - end - if para =~/^[0-6]~[ \w-]/ and para !~ /(?:[0-6]~[\w-]+-|4~endnotes|^[0-6]~([a-z_\.]+)\s+[\d.]+)\s/ and para !~/<~#>|<-#>/ - if para =~/^#{no1}~/ - t_no1+=1; t_no2=0; t_no3=0 - title_no="#{t_no1}" - if not @md.seg_names.nil? and not @md.seg_names.include?(title_no) - para.gsub!(/^#{no1}~\s+(\S+)#/,"#{no1}~#{title_no} \\1 #{title_no} ") #shift placement of auto-number to after first word, e.g. Article # not # Article, added on occasion of ABF (20040329) - para.gsub!(/^#{no1}\{\s+(Article|Clause|Section)\s+#/i,%{#{no1}~#{title_no} \\1 #{title_no}. }) - unless para =~/^#{no1}~\s+[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review - para.gsub!(/^#{no1}~\s+/,"#{no1}~#{title_no} #{title_no}. ") - end - @md.seg_names << title_no - #else puts "warning segment name #{title_no} already exists" - end - unless para =~/^#{no1}~([a-z_\.]+)\s+[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required - para.gsub!(/^#{no1}~([a-z_\.]+)\s+(.+)/i,%{#{no1}~\\1 #{title_no}. \\2 <:name##{title_no}>}) - end - para.gsub!(/^#{no1}~#\s*/,"#{title_no}. ") - end - if para =~/^#{no2}~/ - t_no2+=1; t_no3=0 - title_no="#{t_no1}.#{t_no2}" - para=number_sub_heading(para,no2,title_no) - end - if para =~/^#{no3}~/ - t_no3+=1 - title_no="#{t_no1}.#{t_no2}.#{t_no3}" - para=number_sub_heading(para,no3,title_no) - end - elsif para =~ /^[0-6]~[\w-]+-/ # endnotes, watch2005 - para.gsub!(/^#{no1}~([a-z_\.]+)- /,"#{no1}~\\1 ") - para.gsub!(/^#{no2}~([a-z_\.]+)- /,"#{no2}~\\1 ") - para.gsub!(/^#{no3}~([a-z_\.]+)- /,"#{no3}~\\1 ") - end - elsif @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4 - unless para =~ /^[0-6]~\S+/ #endnotes watch? - if para =~/^[1-6]~\s+([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d - name_num=$1 - para.gsub!(/^([1-6]~)\s+/,"\\1#{name_num} ") - end - end - if @md.toc_lev_limit - end - end - @tuned_file << para - end - @tuned_file - end - def ocn #and auto segment numbering increment - data=@data - @tuned_file=[] - object_array=SiSU_document_structure::OCN.new(@md,data).ocn - object_array.each do |o| - @tuned_file <<= if o.ocn; "#{o.txt} <~#{o.ocn};#{o.lv};#{o.type}>" - else o.txt - end - end - @tuned_file - end - def minor_numbering #and auto segment numbering increment - data=@data - @tuned_file=[] - number_small,letter_small=0,0 - letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z ) - data.each do |para| - if para =~/\w|\S|<|\(/ - if para !~/^%% |^0~|^4~endnotes|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^<:p[bn]>|^<:\#|<:- |<[:!]!4|^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$|||||<\/tr>|


|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|/i #ocn here #  added with Tune.code #¡ - if para=~/^[1-8]~/; number_small,letter_small=0,0 #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later) - end - if para =~/^#[ 1]/ - letter_small=0 - number_small=0 if para =~ /^#1/ - number_small+=1 - para.gsub!(/^#[ 1]/,"#{number_small}. ") #change 2004 - end - if para =~/^_# / - para.gsub!(/^_# /,"<:i1> #{letter[letter_small]}. ") #change 2004 - letter_small+=1 - end - end - end - @tuned_file << para - end - @tuned_file - end - def name_para_seg_filename - # paragraph name/numbering rules - # manual naming overrides, manual naming may be - # alpha-numeric characters mixed, - # numeric only (a number), if - # all segments have been named, - # the numbers used are over 1000 or - # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented) - # [for now a warning is printed for such documents on use of maintenance or very-verbose flag] - # auto-naming takes the form of giving numbers to segments - # the rules for which are as follows - # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.) - # otherwise the level 4 segment number from the embedded document structure info is used - # if there is none a sequential number is designated, preceded by an underscore - data=@data - @tuned_file=[] - art_filename_auto=1 - @counter=1 - @unique_auto_name=[] - puts 'manual segment names, numbers used as names, risk warning (segmented html)' if not @md.seg_autoname_safe and @md.cmd =~/[MV]/ - data.each do |para| - para=SiSU_document_structure::Structure.new(@md,para).structure_markup - if para !~/^0~/ - if para =~/^[456]~ / - if para=~/^4/ and not @md.set_heading_seg - @md.set_heading_seg=true - end - if para =~/^[456]~(?:\s\S+)?\s+([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name - pattern=$1 - pattern.gsub!(/(?:[:,-]|\W)/,'.') - pattern.gsub!(/\.$/,'') - if not @md.seg_names.nil? and not @md.seg_names.include?(pattern) - para.gsub!(/^([456])~\s*/,"\\1~#{pattern} ") - @md.seg_names << pattern - else puts 'warn, there may be a conflicting numbering scheme' if @md.cmd =~/[VM]/ - end - end - if para =~/^4~\s.+?;4:(\d+);/m #extract segment name from embedded document structure info - pattern=$1 - pattern.gsub!(/(?:[:,-]|\W)/,'.') - pattern.gsub!(/\.$/,'') - if not @md.seg_names.nil? and not @md.seg_names.include?(pattern) - para.gsub!(/^(4)~\s*/,"\\1~#{pattern} ") - @md.seg_names << pattern - else - para.gsub!(/^(4)~\s*/,"\\1~~#{pattern} ") - @md.seg_names << "~#{pattern}" - end - end - if para =~/^4~\s+/ #if still not segment name, provide a numerical one - if not @md.seg_names.nil? and not @md.seg_names.include?(art_filename_auto) - para.gsub!(/^4~\s+/,%{4~_#{art_filename_auto} }) - @md.seg_names << art_filename_auto - else puts 'segment name (numbering) error' - end - art_filename_auto+=1 - end - end - end - @tuned_file << if para =~/^([1-6])~/m and (@md.pagenew or @md.pagebreak); m=$1 #watch ref~ - para_tmp=[] - if @md.pagenew.to_s =~/#{m}/; para_tmp << "<:pn>\n" << para - end - if @md.pagebreak.to_s =~/#{m}/; para_tmp << "<:pb>\n" << para - end - para_result=unless para_tmp.length > 0; para - else para_tmp - end - else para - end - end - if @md.seg_names.length > 0 - @md.set_heading_seg=true - end - @tuned_file=@tuned_file.flatten - end - def set_heading_top #% make sure no false positives - unless @md.set_heading_top - puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.cmd =~/[MV]/ - data=@data - @tuned_file=[] - data.each do |para| - unless @md.set_heading_top - if para !~/^(?:@\S+:|0~\S+)\s/m and para !~/\A\s*\Z/m - @md.set_heading_top=true - head=if @md.title ; "1~ #{@md.title}" - else '1~ [no title provided]' - end - @tuned_file << head - end - end - @tuned_file << para - end - @tuned_file - end - end - def set_heading_seg #% make sure no false positives - unless @md.set_heading_seg - puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.cmd =~/[MV]/ - data=@data - @tuned_file=[] - data.each do |para| - unless @md.set_heading_seg - if para !~/^(?:@\S+:|0~\S+|[123]~)/m and para !~/\A\s*\Z/m and para !~/<:p[bn]>/ - @md.set_heading_seg=true - head=if @md.title ; "4~seg [#{@md.title}]" - else '4~seg [segment]' - end - @tuned_file << head - end - end - @tuned_file << para - end - @tuned_file - end - end - def set_header_title #% make sure no false positives - unless @md.set_header_title - puts "\t no document title provided, (will have to manufacture one)" if @md.cmd =~/[MV]/ - data=@data - @tuned_file=[] - data.each do |para| - unless @md.set_header_title - if para !~/^%{1,2}\s/m and para !~/\A\s*\Z/m - @tuned_file << "0~title #{@md.heading_seg_first}" - @md.title=@md.heading_seg_first - @md.set_header_title=true - end - end - @tuned_file << para - end - @tuned_file - end - end - def endnotes - data=@data - @tuned_file=[] - endnote_no,endnote_ref=1,1 - #% endnote work zone - data.each do |para| - # manually numbered endnotes --> - if @md.mod.inspect =~/--no-asterisk|--no-annotate/ - para.gsub!(/~\[[*]\s.+?\]~/,'') - end - if @md.mod.inspect =~/--no-dagger|--no-annotate/ - para.gsub!(/~\[[+]\s.+?\]~/,'') - end - case para - # auto-numbered endnotes --> - when /~\{\s+.+?\}~|~\[[*+]\s+.+?\]~/ - para.gsub!(/\s*(\}~|\]~)/,' \1') # required 2003w31 - @word_mode=para.scan(/\S+/) - word_mode=SiSU_DAL::Make.new(@md,@word_mode).endnote_call_number - para=word_mode.join(' ') - endnote_ref+=1 - when /~\^(?:\s|$)|<:e>/ - #%Note inserts endnotes previously gathered from /^(|[-~]\{{3})/ (in earlier loop) - word_mode=para.scan(/\S+/) - word_mode=SiSU_DAL::Make.new(@md,word_mode).endnote_call_number - para=word_mode.join(' ') - endnote_ref+=1 - end - @tuned_file << para - end - @tuned_file - end - def endnote_call_number - data=@data - data.each do |word| - case word - when /~\{/ - unless word =~/~\{[*+]+/ - word.gsub!(/~\{/,"~\{#{@@endnote_counter} ") - @@endnote_counter+=1 - end - when /~\[/ - if word =~/~\[[+]/ - word.gsub!(/~\[[+]/,"~\[\+#{@@endnote_counter_dag} ") - @@endnote_counter_dag+=1 - else - word.gsub!(/~\[[*]?/,"~\[\*#{@@endnote_counter_asterisk} ") - @@endnote_counter_asterisk+=1 - end - when /~\^|<:e>/ - word.gsub!(/~\^|<:e>/,"#{@@endnote_array[@@endnote_counter-1]}") - @@endnote_counter+=1 - end - end - end - def metadata - data=@data - meta,@dc,@rc,@cvs,dctitle,add=Array.new(6){[]} - dir=SiSU_Env::Info_env.new(@md.fns) - base_html="#{dir.url.root}/#{@md.fnb}" - ocnm=ocnd=ocnv=0 - ocnm+=1 - header0='<:pn>' - header1="\n1~ Document Information <~0;0:0;m#{ocnm}>" - ocnm+=1 - header4="\n4~metadata MetaData <~0;m#{ocnm};m#{ocnm}>" - ocnm+=1; ocnd+=1 - head_no_dc="<~0;m#{ocnm};d#{ocnd}>" - ocnm+=1; ocnd+=1 - head_no_dc_tag="<~0;m#{ocnm};d#{ocnd}>" - data.each do |para| - case para - when /^0~(title|creator|author|translator|translated_by|illustrator|illustrated_by|prepared_by|digitized_by|description|publisher|contributor|date\.created|date\.issued|date\.available|date\.valid|date\.modified|date|type|format|rights|identifier|source|language)/i - m=$1 - ocnm+=1; ocnd+=1 - @dc << case para - when /^0~title/ - "\n#{@tr.dc_title}: #{@md.dc_title} <~0;m#{ocnm};d#{ocnd}>" - when /^0~(?:creator|author)/ - "\n#{@tr.creator}: #{@md.dc_creator} <~0;m#{ocnm};d#{ocnd}>" - when /0~(?:translator|translated_by)/ - "\n#{@tr.translator}: #{@md.translator} <~0;m#{ocnm};d#{ocnd}>" - when /^0~(?:illustrator|illustrated_by)/ - "\n#{@tr.illustrator}: #{@md.illustrator} <~0;m#{ocnm};d#{ocnd}>" - when /^0~prepared_by/ - "\n#{@tr.prepared_by}: #{@md.prepared_by} <~0;m#{ocnm};d#{ocnd}>" - when /^0~digitized_by/ - "\n#{@tr.digitized_by}: #{@md.digitized_by} <~0;m#{ocnm};d#{ocnd}>" - when /^0~description/ - "\n#{@tr.description}: #{@md.dc_description} <~0;m#{ocnm};d#{ocnd}>" - when /^0~subject/ - "\n#{@tr.subject}: #{@md.dc_subject} <~0;m#{ocnm};d#{ocnd}>" - when /^0~abstract/ - "\n#{@tr.abstract}: #{@md.dc_abstract} <~0;m#{ocnm};d#{ocnd}>" - when /^0~publisher/ - "\n#{@tr.publisher}: #{@md.dc_publisher} <~0;m#{ocnm};d#{ocnd}>" - when /^0~contributor/ - "\n#{@tr.contributor}: #{@md.dc_contributor} <~0;m#{ocnm};d#{ocnd}>" - when /^0~date.created/ - "\n#{@tr.date_created}: #{@md.dc_date_created} <~0;m#{ocnm};d#{ocnd}>" - when /^0~date.issued/ - "\n#{@tr.date_issued}: #{@md.dc_date_issued} <~0;m#{ocnm};d#{ocnd}>" - when /^0~date.available/ - "\n#{@tr.date_available}: #{@md.dc_date_available} <~0;m#{ocnm};d#{ocnd}>" - when /^0~date.modified/ - "\n#{@tr.date_modified}: #{@md.dc_date_modified} <~0;m#{ocnm};d#{ocnd}>" - when /^0~date.valid/ - "\n#{@tr.date_valid}: #{@md.dc_date_valid} <~0;m#{ocnm};d#{ocnd}>" - when /^0~date/ - "\n#{@tr.date}: #{@md.dc_date} <~0;m#{ocnm};d#{ocnd}>" - when /^0~type/ - "\n#{@tr.type}: #{@md.dc_type} <~0;m#{ocnm};d#{ocnd}>" - when /^0~format/ - "\n#{@tr.format}: #{@md.dc_format} <~0;m#{ocnm};d#{ocnd}>" - when /^0~rights/ - "\n#{@tr.rights}: #{@md.dc_rights} <~0;m#{ocnm};d#{ocnd}>" - when /^0~identifier/ - "\n#{@tr.identifier}: #{@md.dc_identifier} <~0;m#{ocnm};d#{ocnd}>" - when /^0~source/ - "\n#{@tr.source}: #{@md.dc_source} <~0;m#{ocnm};d#{ocnd}>" - when /^0~language/ - "\n#{@tr.language}: #{@md.dc_language} <~0;m#{ocnm};d#{ocnd}>" - when /^0~language.original/ - "\n#{@tr.language_original}: #{@md.language_original} <~0;m#{ocnm};d#{ocnd}>" - when /^0~relation/ - "\n#{@tr.relation}: #{@md.dc_relation} <~0;m#{ocnm};d#{ocnd}>" - when /^0~coverage/ - "\n#{@tr.coverage}: #{@md.dc_coverage} <~0;m#{ocnm};d#{ocnd}>" - when /^0~keywords/ - "\n#{@tr.keywords}: #{@md.keywords} <~0;m#{ocnm};d#{ocnd}>" - when /^0~comments/ - "\n#{@tr.comments}: #{@md.comments} <~0;m#{ocnm};d#{ocnd}>" - when /^0~cls_loc/ - "\n#{@cls_dewey}: #{@md.cls_dewey} <~0;m#{ocnm};d#{ocnd}>" - when /^0~cls_dewey/ - "\n#{@tr.cls_dewey}: #{@md.cls_dewey} <~0;m#{ocnm};d#{ocnd}>" - when /^0~cls_gutenberg|0~cls_pg/ - "\n#{@tr.cls_gutenberg}: #{@md.cls_gutenberg} <~0;m#{ocnm};d#{ocnd}>" - #"\n#{@tr.cls_gutenberg}: #{@md.cls_pg} <~0;m#{ocnm};d#{ocnd}>" - when /^0~cls_isbn/ - "\n#{@tr.cls_isbn}: #{@md.cls_isbn} <~0;m#{ocnm};d#{ocnd}>" - when /^0~prefix(?:_a)?/ - "\n#{@tr.prefix_a}: #{@md.prefix_a} <~0;m#{ocnm};d#{ocnd}>" - when /^0~prefix_b/ - "\n#{@tr.prefix_b}: #{@md.prefix_b} <~0;m#{ocnm};d#{ocnd}>" - else para.gsub(/^0~(#{m})\s+(.+)/m,"\n#{m.capitalize}: \\2 <~0;m#{ocnm};d#{ocnd}>") - end - end - end - ocnm+=1; ocnv+=1 - head_no_rc="<~0;m#{ocnm};v#{ocnv}>" - ocnm+=1; ocnv+=1 - head_no_rc_tag="<~0;m#{ocnm};v#{ocnv}>" - data.each do |para| - case para - when /^0~(?:cvs|rcs)\+\s+/ #note the + sign to turn on use of cvs id - ocnm+=1; ocnv+=1 - @cvs << "#{@tr.sc_number}: #{@md.sc_number} <~0;m#{ocnm};v#{ocnv}>" - ocnm+=1; ocnv+=1 - @cvs << "#{@tr.sc_date}: #{@md.sc_date} <~0;m#{ocnm};v#{ocnv}>" - ocnm+=1; ocnv+=1 - @cvs << "CVS/RCS time: #{@md.sc_time} <~0;m#{ocnm};v#{ocnv}>" - ocnm+=1; ocnv+=1 - when /^0~cvs[+\s]/ #enable pattern above instead if you wish the default to be to include cvs tags from all documents KEEP - when /^0~cvs\s+/ #enable pattern above instead if you wish the default to be to include cvs tags from all documents KEEP - end - end - if true #default version information - ocnm+=1; ocnv+=1 - if @md.sc_filename and @md.sc_filename.length > 3 - @rc << "#{@tr.sourcefile}: #{@md.sc_filename} <~0;m#{ocnm};v#{ocnv}>" - else @rc << "#{@tr.sourcefile}: #{@md.fns} <~0;m#{ocnm};v#{ocnv}>" - end - ocnm+=1; ocnv+=1 - if @md.file_encoding and @md.file_encoding.length > 3 #translate - @rc << "Filetype: #{@md.file_encoding} <~0;m#{ocnm};v#{ocnv}>" - end - ocnm+=1; ocnv+=1 - if @md.dgst #change. enable by default - @rc << "#{@tr.sourcefile_digest}, #{@md.dgst[0]} #{@md.dgst[1]} <~0;m#{ocnm};v#{ocnv}>" - ocnm+=1; ocnv+=1 - end - if @md.dgst_skin #change. enable by default - @rc << "Skin_Digest: #{@md.dgst_skin[0]} #{@md.dgst_skin[1]} <~0;m#{ocnm};v#{ocnv}>" - ocnm+=1; ocnv+=1 - end - @rc << "Generated #{head_no_rc}" if @rc.length > 0 - @rc << "#{@tr.last_generated}: #{Time.now} <~0;m#{ocnm};v#{ocnv}>" - ocnm+=1; ocnv+=1 - if @md.sisu_version[:version] - @rc << "#{@tr.sisu_version}: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]}) <~0;m#{ocnm};v#{ocnv}>" - ocnm+=1; ocnv+=1 - end - @rc << "#{@tr.ruby_version}: #{@md.ruby_version} <~0;m#{ocnm};v#{ocnv}>" - end - meta << header0 - meta << header1 - meta << header4 - meta << "Document Manifest @\n #{base_html}/#{@md.fn[:manifest]} <~0;m#{ocnm};m#{ocnm}>" - meta << "Dublin Core (DC) #{head_no_dc}" if @dc.length > 0 - meta << "DC tags included with this document are provided here. #{head_no_dc_tag}" if @dc.length > 0 - @dc.each { |x| meta << x } - meta << "Version Information #{head_no_rc}" if @rc.length > 0 - if @cvs.length > 0 - meta << "Note the version information provided here, is specific to the host site. #{head_no_rc_tag}" - @cvs.each { |x| meta << x } - end - @rc.each { |x| meta << x } - ## ENDNOTE RELATED endnote related - meta << "\n" - meta=SiSU_DAL::Make.new(@md,meta).object_digest - end - def stamped(para,hash_class) - @tuned=[] - para=strip_clean_extra_spaces(para) - digest_all=hash_class.hexdigest(para) # print "#{hash_class.name}: "; puts digest_all #length==32 or 64 - stripped=strip_clean_of_markup(para) - digest_strip=hash_class.hexdigest(stripped) - case para - when /~\{[\d*+]+\s+.+?\}~|~\[[*+]\d+\s+.+?\]~/ - en_and_para,en_and_para_digest=[],[] - para.gsub!(/\s*(\}~|\]~)/,' \1') #watch - para_plus_en=para.scan(/.*?~\{.+?\}~|.*?~\[.+?\]~/) - para_tail=if para =~/(?:.*?~\{.+?\}~|.*?~\[.+?\]~)+([\s\S]+)/ - /(?:.*?~\{.+?\}~|.*?~\[.+?\]~)+(.+?<~\d+;(?:\w|[0-6]:)\d+;\w\d+>)/.match(para)[1] - else '' - end - para_plus_en << para_tail - en_and_para_digest << SiSU_DAL::Make.new(@md,para_plus_en).endnote_digest - para_new=en_and_para_digest.join(' ') - @tuned << para_new + '<' + digest_strip + ':' + digest_all + '>' unless para.nil? - else @tuned << para + '<' + digest_strip + ':' + digest_all + '>' unless para.nil? - end - @tuned.join - end - def object_digest - # 1. clean/stripped text without any markup, paragraph, headings etc. without endnotes - # 2. endnotes clean/stripped text digest only (there may be several endnotes within a paragraph) - # 3. whole object, text with markup and any endnotes, (question: with or without the endnote digests??? presumption better without, [however may be easier to check with?]) - # [digests should not include other digests] - # vim==/<[0-9a-f]\{#{@@dl}\}\(:[0-9a-f]\{#{@@dl}\}\)\?>/ - require 'digest/md5' - require 'digest/sha2' - data=@data - @tuned_file=[] - data.each do |para| - if para=~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+>/ - if @env.digest.type =~/sha256/ - for hash_class in [ Digest::SHA256 ] - @tuned_file << stamped(para,hash_class) - end - else - for hash_class in [ Digest::MD5 ] - @tuned_file << stamped(para,hash_class) - end - end - else @tuned_file << para unless para.nil? - end - end - @tuned_file - #use md5 or to create hash of each dal object including ocn, & add into to each dal object - end - def endnote_digest - data=@data - para_bit=[] - data.each do |en_plus| - para_bit <<= case en_plus - when /~\{|~\[/ - if en_plus =~/~\{.+?\}~|~\[.+?\]~/ - para_txt,en_open,en_txt,en_close=/(.*?)(~\{|~\[)(.+?)(\}~|\]~)/m.match(en_plus)[1..4] - stripped_en=strip_clean_of_markup(en_txt) - if @env.digest.type =~/sha256/ - digest_en_strip=Digest::SHA256.hexdigest(stripped_en) - else - digest_en_strip=Digest::MD5.hexdigest(stripped_en) - end - para_txt + en_open + en_txt + '<' + digest_en_strip + '>' + en_close - else puts "Error Exception - problem encountered with:\n#{en_plus}" #arbitrary exception, tidy up - end - else en_plus - end - end - para_bit.join - end - def strip_clean_extra_spaces(s) # dal output tuned - s=s.dup - s=s.gsub(/[ ]+([,.;:?](?:$|\s))/,'\1') - s=s.gsub(/ [ ]+/,' ') - s=s.gsub(/^ [ ]+/,'') - s=s.gsub(/ [ ]+$/,'') - s=s.gsub(/(<\/[bi]>')[ ]+(s )/,'\1\2') - end - def strip_clean_of_markup(s) # used for digest, define rules, make same as in db clean - #consider: <\/?[ib]>|<(?:\/ )?br>|(.+?)<\/del> - s=s.dup - s=s.gsub(/(?:<\/?[ib]>|<~\d+;(?:\w|[0-6]:)\d+;\w\d+>|<#@dp:#@dp>|^[1-6]~\S+|~\{\d+\s.+?\}~)/,'') # markup and endnotes removed - #% same as db clean --> - s=s.gsub(/(.+?)<\/del>/,'DELETED(\1)') # deletions - s=s.gsub(/(\d+)<\/sup>/,'[\1]') - s=s.gsub(/(?: \\;)+/,' ') - #s=s.gsub(//,"[TABLE]\n") # tables - #s=s.gsub(//,'\1') # tables - #s=s.gsub(/¡¡\d+¡/,' ') # tables - #s=s.gsub(/¡/,' ') # tables tidy later - #s=s.gsub(/<.+?>/,'') - s=s.gsub(/\{.+?\.(?:png|jpg|gif).+?\}(?:https?|ftp)\\\:\S+ /,' [image] ') # else image names found in search - s=s.gsub(/\s\s+/,' ') - s=s.strip - end - end -end -__END__ -dal output, rules to simplify parsing -nodes === objects === paragraphs === text blocks separated by \n\n - -dal output: -:verse :group and :code have -end -:table is not used -- cgit v1.2.3