# coding: utf-8 =begin * Name: SiSU * Description: a framework for document structuring, publishing and search * Author: Ralph Amissah * Copyright: (C) 1997 - 2009 Ralph Amissah All Rights Reserved. * License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: * SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system * Hompages: * Download: * Ralph Amissah ** Description: system environment, resource control and configuration details =end module SiSU_numbering class Numbering def initialize(md,data) @md,@data=md,data end def numbering_song data=@data data=number_plaintext_para(data) data=name_endnote_seg(data) #tr issue data=auto_number_heading_ie_title(data) #tr issue data=ocn(data) #watch data=minor_numbering(data) data=name_para_seg_filename(data) data=set_heading_seg(data) unless @md.set_heading_seg data=set_heading_top(data) unless @md.set_heading_top data=set_header_title(data) unless @md.set_header_title data end def number_plaintext_para(data) @tuned_file=[] data.each do |para| if para !~/#{Mx[:gr_o]}(?:code|group|alt|poem|verse)#{Mx[:gr_c]}|#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ para.gsub!(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks end para.gsub!(/^/,"\n") unless para =~/#{Mx[:tc_p]}/u para.gsub!(/^\s+|\s$/,"\n") @tuned_file << para end @tuned_file=@tuned_file.flatten end def name_endnote_seg(data) tuned_file=data if @md.flag_auto_endnotes \ and @md.flag_separate_endnotes_make tuned_file << "\n#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}Endnotes #{Mx[:gl_o]}-##{Mx[:gl_c]} #{Mx[:id_o]}~0;0:0;u0#{Mx[:id_c]}" end tuned_file << "\n#{Mx[:br_endnotes]}" #DECIDE ON tuned_file=tuned_file.flatten end def owner_details_seg data << "#{Mx[:lv_o]}4:owner.details#{Mx[:lv_c]}Owner Details" end def number_sub_heading(para,num,title_no) case para when /#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/; para.gsub!(/#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/,"#{title_no} ") when /^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/; para.gsub!(/^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/,"#{title_no} ") when /^#{Mx[:lv_o]}#{num}:[a-z_\.]+#{Mx[:lv_c]}/ para.gsub!(/^#{Mx[:lv_o]}#{num}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i,%{#{Mx[:lv_o]}#{num}:\\1#{Mx[:lv_c]} #{title_no} \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}}) when /^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}\s*#{title_no}/ para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]}") #where title contains title number else para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]} #{title_no} ") #main, where title number is to be provided end if @md.toc_lev_limit \ and @md.toc_lev_limit < num para.gsub!(/^#{Mx[:lv_o]}[5-9]:\S*?#{Mx[:lv_c]}/,'!_ ') #bold line, watch end para end def auto_number_heading_ie_title(data) #also does some segment naming @tuned_file=[] if @md.markup =~/num_top/ \ or @md.num_top # watch, 2003w23 input="#{@md.markup}"[/num_top\=([1-6])/,1] if @md.markup input||=@md.num_top if @md.num_top !~/^$/ end num_top=input.to_i t_no1=t_no2=t_no3=t_no4=0 no1=num_top; no2=(num_top + 1); no3=(num_top + 2); no4=(num_top + 3) t_not=0 data.each do |para| #@md.seg_names << [additions to segment names] if (@md.markup =~/num_top/ \ or (@md.num_top \ and @md.num_top !~/^$/)) \ and para !~/^#{Rx[:meta]}/ if (para =~/^(?:#{no1}|^#{no2}|^#{no3}#{no4})~#/ \ and para !~/^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}/) t_not+=1 #; t_no2=0; t_no3=0 para.gsub!(/^(#{Mx[:lv_o]}(?:#{no1}|#{no2}|#{no3}|#{no4})):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") end if para =~/#{Mx[:lv_o]}#{no1}:/ @subnumber=1 @subnumber=0 if para =~/#{Mx[:lv_o]}#{no1}:/ end if para =~/^#{Mx[:lv_o]}[1-6]:[\w-]*#{Mx[:lv_c]}/ \ and para !~ /(?:#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}|#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^#{Mx[:lv_o]}[1-6]:[a-z_\.]+#{Mx[:lv_c]}\s*[\d.]+)\s/ \ and para !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ if para =~/^#{Mx[:lv_o]}#{no1}:/ t_no1+=1; t_no2=0; t_no3=0 title_no="#{t_no1}" if not @md.seg_names.nil? \ and not @md.seg_names.include?(title_no) para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*(\S+)#/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]} \\1 #{title_no} ") #shift placement of auto-number to after first word, e.g. Article # not # Article, added on occasion of ABF (20040329) para.gsub!(/^#{no1}\{\s+(Article|Clause|Section)\s+#/i,%{#{no1}~#{title_no} \\1 #{title_no}. }) unless para =~/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]}#{title_no}. ") end @md.seg_names << title_no #else puts "warning segment name #{title_no} already exists" end unless para =~/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i, %{#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}#{title_no}. \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}}) end para.gsub!(/^#{Mx[:lv_o]}#{no1}:##{Mx[:lv_c]}/,"#{title_no}. ") #watch para.gsub!(/^#{no1}~#\s*/,"#{title_no}. ") end if para =~/^#{Mx[:lv_o]}#{no2}:\S*?#{Mx[:lv_c]}/ t_no2+=1; t_no3=0 title_no="#{t_no1}.#{t_no2}" para=number_sub_heading(para,no2,title_no) end if para =~/^#{Mx[:lv_o]}#{no3}:\S*?#{Mx[:lv_c]}/ t_no3+=1 title_no="#{t_no1}.#{t_no2}.#{t_no3}" para=number_sub_heading(para,no3,title_no) end elsif para =~/^#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}/ # endnotes, watch2005 para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}") #"#{no1}~\\1 ") para.gsub!(/^#{Mx[:lv_o]}#{no2}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no2}:\\1#{Mx[:lv_c]}") para.gsub!(/^#{Mx[:lv_o]}#{no3}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no3}:\\1#{Mx[:lv_c]}") end elsif @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4 if para =~/^#{Mx[:lv_o]}[1-9]:#{Mx[:lv_c]}([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d name_num=$1 para.gsub!(/^#{Mx[:lv_o]}([1-9]:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{name_num}#{Mx[:lv_c]}") end if @md.toc_lev_limit end end @tuned_file << para end @tuned_file=@tuned_file.flatten end def ocn(data) #and auto segment numbering increment @tuned_file=[] object_array=SiSU_document_structure::OCN.new(@md,data).ocn object_array.each do |o| @tuned_file <<= if o.ocn; "#{o.txt} #{Mx[:id_o]}~#{o.ocn};#{o.lv};#{o.type}#{Mx[:id_c]}" #main ocn descriptor else o.txt end end @tuned_file=@tuned_file.flatten end def minor_numbering(data) #and auto segment numbering increment @tuned_file=[] number_small,letter_small=0,0 letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z ) data.each do |para| if para =~/\w|\S|<|\(/ if para !~/^%% |#{Rx[:meta]}|^0~|^#{Mx[:lv_o]}endnotes:|^4~endnotes|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^#{Mx[:fa_o]}:p[bn]#{Mx[:fa_c]}|^<:\#|<:- |<[:!]!4|#{Mx[:gr_o]}(?:alt|code|group|poem|table)#{Mx[:gr_c]}|^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$|||||<\/tr>|
|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|#{Mx[:br_endnotes]}/i #ocn here #  added with Tune.code #ยก if para=~/^#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/; number_small,letter_small=0,0 #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later) end if para =~/^#[ 1]/ letter_small=0 number_small=0 if para =~ /^#1/ number_small+=1 para.gsub!(/^#[ 1]/,"#{number_small}. ") #change 2004 end if para =~/^_# / para.gsub!(/^_# /,"#{Mx[:fa_o]}:i1#{Mx[:fa_c]} #{letter[letter_small]}. ") #change 2004 letter_small+=1 end end end @tuned_file << para end @tuned_file=@tuned_file.flatten end def name_para_seg_filename(data) # paragraph name/numbering rules # manual naming overrides, manual naming may be # alpha-numeric characters mixed, # numeric only (a number), if # all segments have been named, # the numbers used are over 1000 or # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented) # [for now a warning is printed for such documents on use of maintenance or very-verbose flag] # auto-naming takes the form of giving numbers to segments # the rules for which are as follows # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.) # otherwise the level 4 segment number from the embedded document structure info is used # if there is none a sequential number is designated, preceded by an underscore @tuned_file=[] art_filename_auto=1 @counter=1 @unique_auto_name=[] if not @md.seg_autoname_safe and @md.cmd =~/[MV]/ puts 'manual segment names, numbers used as names, risk warning (segmented html)' end data.each do |para| para=SiSU_document_structure::Structure.new(@md,para).structure_markup if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}/ if para=~/^#{Mx[:lv_o]}[4]:#{Mx[:lv_c]}/ \ and not @md.set_heading_seg @md.set_heading_seg=true end if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}(?:\s*\S+)?\s+([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name pattern=$1 pattern.gsub!(/(?:[:,-]|\W)/,'.') pattern.gsub!(/\.$/,'') if not @md.seg_names.nil? \ and not @md.seg_names.include?(pattern) para.gsub!(/^#{Mx[:lv_o]}([456]):#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1:#{pattern}#{Mx[:lv_c]}") @md.seg_names << pattern else puts 'warn, there may be a conflicting numbering scheme' if @md.cmd =~/[VM]/ end end if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}.+?;4:(\d+);/m #extract segment name from embedded document structure info pattern=$1 pattern.gsub!(/(?:[:,-]|\W)/,'.') pattern.gsub!(/\.$/,'') if not @md.seg_names.nil? \ and not @md.seg_names.include?(pattern) para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{pattern}#{Mx[:lv_c]}") @md.seg_names << pattern else para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1~#{pattern}#{Mx[:lv_c]}") @md.seg_names << "~#{pattern}" end end if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}/ #if still not segment name, provide a numerical one if not @md.seg_names.nil? \ and not @md.seg_names.include?(art_filename_auto) para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,%{#{Mx[:lv_o]}\\1~#{art_filename_auto}#{Mx[:lv_c]}}) @md.seg_names << art_filename_auto else puts 'segment name (numbering) error' end art_filename_auto+=1 end end @tuned_file << if para =~/^#{Mx[:lv_o]}([1-6]):\S*?#{Mx[:lv_c]}/m \ and (@md.pagenew or @md.pagebreak) m=$1 #watch ref~ para_tmp=[] if @md.pagenew.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page_new]}\n" << para elsif @md.pagebreak.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page]}\n" << para end para_result=unless para_tmp.length > 0; para else para_tmp end else para end end if @md.seg_names.length > 0 @md.set_heading_seg=true end @tuned_file=@tuned_file.flatten end def set_heading_top(data) #% make sure no false positives unless @md.set_heading_top puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.cmd =~/[MV]/ @tuned_file=[] data.each do |para| unless @md.set_heading_top if para !~/^(?:#{Rx[:meta]}|@\S+:|0~\S+)\s/m \ and para !~/\A\s*\Z/m @md.set_heading_top=true head=if @md.title ; "#{Mx[:lv_o]}1:#{Mx[:lv_c]} #{@md.title}" else "#{Mx[:lv_o]}1:#{Mx[:lv_c]} [no title provided]" end @tuned_file << head end end @tuned_file << para end @tuned_file=@tuned_file.flatten end end def set_heading_seg(data) #% make sure no false positives unless @md.set_heading_seg puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.cmd =~/[MV]/ @tuned_file=[] data.each do |para| unless @md.set_heading_seg if para !~/^(?:#{Rx[:meta]}|#{Mx[:lv_o]}[123]:\S*?#{Mx[:lv_c]})/m \ and para !~/\A\s*\Z/m \ and para !~/#{Mx[:br_page]}|#{Mx[:br_page_new]}/ @md.set_heading_seg=true head=if @md.title ; "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [#{@md.title}]" else "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [segment]" end @tuned_file << head end end @tuned_file << para end @tuned_file=@tuned_file.flatten end end def set_header_title(data) #% make sure no false positives unless @md.set_header_title puts "\t no document title provided, (will have to manufacture one)" if @md.cmd =~/[MV]/ @tuned_file=[] data.each do |para| unless @md.set_header_title if para !~/^%{1,2}\s/m \ and para !~/\A\s*\Z/m @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}" @md.title=@md.heading_seg_first @md.set_header_title=true end end @tuned_file << para end @tuned_file=@tuned_file.flatten end end end end __END__