aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v0/dal.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/v0/dal.rb')
-rw-r--r--lib/sisu/v0/dal.rb335
1 files changed, 6 insertions, 329 deletions
diff --git a/lib/sisu/v0/dal.rb b/lib/sisu/v0/dal.rb
index 25b7528e..aa4758b7 100644
--- a/lib/sisu/v0/dal.rb
+++ b/lib/sisu/v0/dal.rb
@@ -65,6 +65,8 @@ module SiSU_DAL
require "#{SiSU_lib}/param"
require "#{SiSU_lib}/dal_syntax"
require "#{SiSU_lib}/dal_doc_str"
+ require "#{SiSU_lib}/dal_idx"
+ require "#{SiSU_lib}/dal_numbering"
require "#{SiSU_lib}/i18n"
require "#{SiSU_lib}/shared_sem"
include SiSU_Env
@@ -211,7 +213,8 @@ module SiSU_DAL
data=character_check(data)
data=images(data)
data=SiSU_document_structure::Tables.new(@md,data).tables
- data=numbering_song(data) #tr issue
+ data=SiSU_numbering::Numbering.new(@md,data).numbering_song
+ data=SiSU_book_index::Book_index.new(data).indexing_song if @md.book_index
data=endnotes(data)
data=object_digest(data)
meta=metadata(data)
@@ -449,7 +452,7 @@ module SiSU_DAL
end
def substitutions_and_insertions?(data)
data_expand=[]
- if data[0] =~ /^#!\s*(?:\/usr\/bin\/env sisu|\/usr\/bin\/sisu)/ # remove bang from top #! (however file is stripped, so will be removed provided no content preceeds it)
+ if data[0] =~ /^#!\s*(?:\/usr\/bin\/env sisu|\/usr\/bin\/sisu)/ # remove bang from top #! (however file is stripped, so will be removed provided no content precedes it)
data[0].gsub!(/^#!\s*\/usr\/bin\/sisu/,'')
data[0].gsub!(/^#!\s*\/usr\/bin\/env sisu/,'')
end
@@ -507,332 +510,6 @@ module SiSU_DAL
end
end
end
- def numbering_song(data)
- data=number_plaintext_para(data)
- data=name_endnote_seg(data) #tr issue
- data=auto_number_heading_ie_title(data) #tr issue
- data=ocn(data) #watch
- data=minor_numbering(data)
- data=name_para_seg_filename(data)
- data=set_heading_seg(data) unless @md.set_heading_seg
- data=set_heading_top(data) unless @md.set_heading_top
- data=set_header_title(data) unless @md.set_header_title
- data
- end
- def number_plaintext_para(data)
- @tuned_file=[]
- data.each do |para|
- if para !~/#{Mx[:gr_o]}(?:code|group|alt|poem|verse)#{Mx[:gr_c]}|#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/
- para.gsub!(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks
- end
- para.gsub!(/^/,"\n") unless para =~/#{Mx[:tc_p]}/u
- para.gsub!(/^\s+|\s$/,"\n")
- @tuned_file << para
- end
- @tuned_file=@tuned_file.flatten
- end
- def name_endnote_seg(data)
- @tuned_file=[]
- data.each do |para|
- para.gsub!(/<:3>\s*<:ee>/, <<-WOK
-#{@@endnote['special_align']} <p /><br />\r
-#{@@endnote['seg_name_3']} <p />
-#{@@endnote['special_align_close']}
- WOK
- )
- para.gsub!(/<:2>\s*<:ee>/, <<-WOK
-#{@@endnote['special_align']} <p /><br />\r
-#{@@endnote['seg_name_2']} <p />
-#{@@endnote['special_align_close']}
- WOK
- )
- para.gsub!(/<:1>\s*<:ee>/, <<-WOK
-#{@@endnote['special_align']} <p /><br />\r
-#{@@endnote['seg_name_1']} <p />
-#{@@endnote['special_align_close']}
- WOK
- )
- @tuned_file << para
- end
- # debug 2003w46 adding revision control info
- if @md.flag_auto_endnotes \
- and @md.flag_separate_endnotes_make
- @tuned_file << "\n#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}Endnotes #{Mx[:gl_o]}-##{Mx[:gl_c]} #{Mx[:id_o]}~0;0:0;u0#{Mx[:id_c]}"
- end
- @tuned_file << "\n#{Mx[:br_endnotes]}" #DECIDE ON
- @tuned_file=@tuned_file.flatten
- end
- def owner_details_seg
- data << "#{Mx[:lv_o]}4:owner.details#{Mx[:lv_c]}Owner Details"
- end
- def number_sub_heading(para,num,title_no)
- case para
- when /#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/; para.gsub!(/#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/,"#{title_no} ")
- when /^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/; para.gsub!(/^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/,"#{title_no} ")
- when /^#{Mx[:lv_o]}#{num}:[a-z_\.]+#{Mx[:lv_c]}/
- para.gsub!(/^#{Mx[:lv_o]}#{num}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i,%{#{Mx[:lv_o]}#{num}:\\1#{Mx[:lv_c]} #{title_no} \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}})
- when /^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}\s*#{title_no}/
- para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]}") #where title contains title number
- else para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]} #{title_no} ") #main, where title number is to be provided
- end
- if @md.toc_lev_limit \
- and @md.toc_lev_limit < num
- para.gsub!(/^#{Mx[:lv_o]}[5-9]:\S*?#{Mx[:lv_c]}/,'!_ ') #bold line, watch
- end
- para
- end
- def auto_number_heading_ie_title(data) #also does some segment naming
- @tuned_file=[]
- if @md.markup =~/num_top/ \
- or @md.num_top # watch, 2003w23
- input="#{@md.markup}"[/num_top\=([1-6])/,1] if @md.markup
- input||=@md.num_top if @md.num_top !~/^$/
- end
- num_top=input.to_i
- t_no1=t_no2=t_no3=t_no4=0
- no1=num_top; no2=(num_top + 1); no3=(num_top + 2); no4=(num_top + 3)
- t_not=0
- data.each do |para| #@md.seg_names << [additions to segment names]
- if (@md.markup =~/num_top/ \
- or (@md.num_top \
- and @md.num_top !~/^$/)) \
- and para !~/^#{Rx[:meta]}/
- if (para =~/^(?:#{no1}|^#{no2}|^#{no3}#{no4})~#/ \
- and para !~/^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}/)
- t_not+=1 #; t_no2=0; t_no3=0
- para.gsub!(/^(#{Mx[:lv_o]}#{no1}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2")
- para.gsub!(/^(#{Mx[:lv_o]}#{no2}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2")
- para.gsub!(/^(#{Mx[:lv_o]}#{no3}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2")
- para.gsub!(/^(#{Mx[:lv_o]}#{no4}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2")
- end
- if para =~/#{Mx[:lv_o]}#{no1}:/
- @subnumber=1
- @subnumber=0 if para =~/#{Mx[:lv_o]}#{no1}:/
- end
- if para =~/^#{Mx[:lv_o]}[1-6]:[\w-]*#{Mx[:lv_c]}/ \
- and para !~ /(?:#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}|#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^#{Mx[:lv_o]}[1-6]:[a-z_\.]+#{Mx[:lv_c]}\s*[\d.]+)\s/ \
- and para !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/
- if para =~/^#{Mx[:lv_o]}#{no1}:/
- t_no1+=1; t_no2=0; t_no3=0
- title_no="#{t_no1}"
- if not @md.seg_names.nil? \
- and not @md.seg_names.include?(title_no)
- para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*(\S+)#/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]} \\1 #{title_no} ") #shift placement of auto-number to after first word, e.g. Article # not # Article, added on occasion of ABF (20040329)
- para.gsub!(/^#{no1}\{\s+(Article|Clause|Section)\s+#/i,%{#{no1}~#{title_no} \\1 #{title_no}. })
- unless para =~/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review
- para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]}#{title_no}. ")
- end
- @md.seg_names << title_no
- #else puts "warning segment name #{title_no} already exists"
- end
- unless para =~/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required
- para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i,
- %{#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}#{title_no}. \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}})
- end
- para.gsub!(/^#{Mx[:lv_o]}#{no1}:##{Mx[:lv_c]}/,"#{title_no}. ") #watch
- para.gsub!(/^#{no1}~#\s*/,"#{title_no}. ")
- end
- if para =~/^#{Mx[:lv_o]}#{no2}:\S*?#{Mx[:lv_c]}/
- t_no2+=1; t_no3=0
- title_no="#{t_no1}.#{t_no2}"
- para=number_sub_heading(para,no2,title_no)
- end
- if para =~/^#{Mx[:lv_o]}#{no3}:\S*?#{Mx[:lv_c]}/
- t_no3+=1
- title_no="#{t_no1}.#{t_no2}.#{t_no3}"
- para=number_sub_heading(para,no3,title_no)
- end
- elsif para =~/^#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}/ # endnotes, watch2005
- para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}") #"#{no1}~\\1 ")
- para.gsub!(/^#{Mx[:lv_o]}#{no2}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no2}:\\1#{Mx[:lv_c]}")
- para.gsub!(/^#{Mx[:lv_o]}#{no3}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no3}:\\1#{Mx[:lv_c]}")
- end
- elsif @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4
- if para =~/^#{Mx[:lv_o]}[1-9]:#{Mx[:lv_c]}([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d
- name_num=$1
- para.gsub!(/^#{Mx[:lv_o]}([1-9]:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{name_num}#{Mx[:lv_c]}")
- end
- if @md.toc_lev_limit
- end
- end
- @tuned_file << para
- end
- @tuned_file=@tuned_file.flatten
- end
- def ocn(data) #and auto segment numbering increment
- @tuned_file=[]
- object_array=SiSU_document_structure::OCN.new(@md,data).ocn
- object_array.each do |o|
- @tuned_file <<= if o.ocn; "#{o.txt} #{Mx[:id_o]}~#{o.ocn};#{o.lv};#{o.type}#{Mx[:id_c]}" #main ocn descriptor
- else o.txt
- end
- end
- @tuned_file=@tuned_file.flatten
- end
- def minor_numbering(data) #and auto segment numbering increment
- @tuned_file=[]
- number_small,letter_small=0,0
- letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z )
- data.each do |para|
- if para =~/\w|\S|<|\(/
- if para !~/^%% |#{Rx[:meta]}|^0~|^#{Mx[:lv_o]}endnotes:|^4~endnotes|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^#{Mx[:fa_o]}:p[bn]#{Mx[:fa_c]}|^<:\#|<:- |<[:!]!4|#{Mx[:gr_o]}(?:alt|code|group|poem|table)#{Mx[:gr_c]}|^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$|<table|<\/table>|<td|<\/td>|<th|<\/th>|<tr>|<\/tr>|<hr width|<:4-endnotes>|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|#{Mx[:br_endnotes]}/i #ocn here #&nbsp; added with Tune.code #ยก
- if para=~/^#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/; number_small,letter_small=0,0 #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later)
- end
- if para =~/^#[ 1]/
- letter_small=0
- number_small=0 if para =~ /^#1/
- number_small+=1
- para.gsub!(/^#[ 1]/,"#{number_small}. ") #change 2004
- end
- if para =~/^_# /
- para.gsub!(/^_# /,"#{Mx[:fa_o]}:i1#{Mx[:fa_c]} #{letter[letter_small]}. ") #change 2004
- letter_small+=1
- end
- end
- end
- @tuned_file << para
- end
- @tuned_file=@tuned_file.flatten
- end
- def name_para_seg_filename(data)
- # paragraph name/numbering rules
- # manual naming overrides, manual naming may be
- # alpha-numeric characters mixed,
- # numeric only (a number), if
- # all segments have been named,
- # the numbers used are over 1000 or
- # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented)
- # [for now a warning is printed for such documents on use of maintenance or very-verbose flag]
- # auto-naming takes the form of giving numbers to segments
- # the rules for which are as follows
- # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.)
- # otherwise the level 4 segment number from the embedded document structure info is used
- # if there is none a sequential number is designated, preceded by an underscore
- @tuned_file=[]
- art_filename_auto=1
- @counter=1
- @unique_auto_name=[]
- if not @md.seg_autoname_safe and @md.cmd =~/[MV]/
- puts 'manual segment names, numbers used as names, risk warning (segmented html)'
- end
- data.each do |para|
- para=SiSU_document_structure::Structure.new(@md,para).structure_markup
- if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}/
- if para=~/^#{Mx[:lv_o]}[4]:#{Mx[:lv_c]}/ \
- and not @md.set_heading_seg
- @md.set_heading_seg=true
- end
- if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}(?:\s*\S+)?\s+([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name
- pattern=$1
- pattern.gsub!(/(?:[:,-]|\W)/,'.')
- pattern.gsub!(/\.$/,'')
- if not @md.seg_names.nil? \
- and not @md.seg_names.include?(pattern)
- para.gsub!(/^#{Mx[:lv_o]}([456]):#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1:#{pattern}#{Mx[:lv_c]}")
- @md.seg_names << pattern
- else puts 'warn, there may be a conflicting numbering scheme' if @md.cmd =~/[VM]/
- end
- end
- if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}.+?;4:(\d+);/m #extract segment name from embedded document structure info
- pattern=$1
- pattern.gsub!(/(?:[:,-]|\W)/,'.')
- pattern.gsub!(/\.$/,'')
- if not @md.seg_names.nil? \
- and not @md.seg_names.include?(pattern)
- para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{pattern}#{Mx[:lv_c]}")
- @md.seg_names << pattern
- else
- para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1~#{pattern}#{Mx[:lv_c]}")
- @md.seg_names << "~#{pattern}"
- end
- end
- if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}/ #if still not segment name, provide a numerical one
- if not @md.seg_names.nil? \
- and not @md.seg_names.include?(art_filename_auto)
- para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,%{#{Mx[:lv_o]}\\1~#{art_filename_auto}#{Mx[:lv_c]}})
- @md.seg_names << art_filename_auto
- else puts 'segment name (numbering) error'
- end
- art_filename_auto+=1
- end
- end
- @tuned_file << if para =~/^#{Mx[:lv_o]}([1-6]):\S*?#{Mx[:lv_c]}/m \
- and (@md.pagenew or @md.pagebreak)
- m=$1 #watch ref~
- para_tmp=[]
- if @md.pagenew.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page_new]}\n" << para
- elsif @md.pagebreak.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page]}\n" << para
- end
- para_result=unless para_tmp.length > 0; para
- else para_tmp
- end
- else para
- end
- end
- if @md.seg_names.length > 0
- @md.set_heading_seg=true
- end
- @tuned_file=@tuned_file.flatten
- end
- def set_heading_top(data) #% make sure no false positives
- unless @md.set_heading_top
- puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.cmd =~/[MV]/
- @tuned_file=[]
- data.each do |para|
- unless @md.set_heading_top
- if para !~/^(?:#{Rx[:meta]}|@\S+:|0~\S+)\s/m \
- and para !~/\A\s*\Z/m
- @md.set_heading_top=true
- head=if @md.title ; "#{Mx[:lv_o]}1:#{Mx[:lv_c]} #{@md.title}"
- else "#{Mx[:lv_o]}1:#{Mx[:lv_c]} [no title provided]"
- end
- @tuned_file << head
- end
- end
- @tuned_file << para
- end
- @tuned_file=@tuned_file.flatten
- end
- end
- def set_heading_seg(data) #% make sure no false positives
- unless @md.set_heading_seg
- puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.cmd =~/[MV]/
- @tuned_file=[]
- data.each do |para|
- unless @md.set_heading_seg
- if para !~/^(?:#{Rx[:meta]}|#{Mx[:lv_o]}[123]:\S*?#{Mx[:lv_c]})/m \
- and para !~/\A\s*\Z/m \
- and para !~/#{Mx[:br_page]}|#{Mx[:br_page_new]}/
- @md.set_heading_seg=true
- head=if @md.title ; "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [#{@md.title}]"
- else "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [segment]"
- end
- @tuned_file << head
- end
- end
- @tuned_file << para
- end
- @tuned_file=@tuned_file.flatten
- end
- end
- def set_header_title(data) #% make sure no false positives
- unless @md.set_header_title
- puts "\t no document title provided, (will have to manufacture one)" if @md.cmd =~/[MV]/
- @tuned_file=[]
- data.each do |para|
- unless @md.set_header_title
- if para !~/^%{1,2}\s/m \
- and para !~/\A\s*\Z/m
- @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}"
- @md.title=@md.heading_seg_first
- @md.set_header_title=true
- end
- end
- @tuned_file << para
- end
- @tuned_file=@tuned_file.flatten
- end
- end
def endnotes(data)
@tuned_file=[]
endnote_no,endnote_ref=1,1
@@ -1058,7 +735,7 @@ module SiSU_DAL
para.gsub!(/\s*(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m,' \1') #watch
para_plus_en=para.scan(/.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m)
para_tail=if para =~/(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+([\s\S]+)/m
- /(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+(.+?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]})/m.match(para)[1]
+ /(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+(.*?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]})/m.match(para)[1]
else ''
end
para_plus_en << para_tail