diff options
author | Ralph Amissah <ralph@amissah.com> | 2012-10-03 00:11:08 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2012-10-03 00:11:08 -0400 |
commit | 804a103722aa7731ca7f2062ee2ebf533607e6aa (patch) | |
tree | a480caebb78925848807692c57c017b3ae5e6839 /lib/sisu/v4/dal_numbering.rb | |
parent | v3: 3.3.3 version & changelog, dates touched (diff) |
v4: 4.0.0 new branch & version & changelog "opened"
Diffstat (limited to 'lib/sisu/v4/dal_numbering.rb')
-rw-r--r-- | lib/sisu/v4/dal_numbering.rb | 469 |
1 files changed, 469 insertions, 0 deletions
diff --git a/lib/sisu/v4/dal_numbering.rb b/lib/sisu/v4/dal_numbering.rb new file mode 100644 index 00000000..f81563f3 --- /dev/null +++ b/lib/sisu/v4/dal_numbering.rb @@ -0,0 +1,469 @@ +# encoding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012 Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <http://www.fsf.org/licensing/licenses/gpl.html> + <http://www.gnu.org/licenses/gpl.html> + + <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html> + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + <http://www.jus.uio.no/sisu> + <http://www.sisudoc.org> + + * Download: + <http://www.sisudoc.org/sisu/en/SiSU/download.html> + + * Ralph Amissah + <ralph@amissah.com> + <ralph.amissah@gmail.com> + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_DAL_Numbering + class Numbering + attr_accessor :obj,:osp,:ocn,:lv,:name,:index,:comment + def initialize(md,data) + @md,@data=md,data + @obj=@type=@ocn=@lv=@name=@index=@comment=nil + end + def numbering_song + data=@data + data=number_plaintext_para(data) + data=auto_number_heading_ie_title(data.compact) #tr issue + data=ocn(data.compact) #watch + data=xml(data.compact) + data=minor_numbering(data.compact) + data,tags_map,ocn_html_seg_map=name_para_seg_filename(data) + data=set_heading_top(data) unless @md.set_heading_top + [data,tags_map,ocn_html_seg_map] + end + def number_plaintext_para(data) + @tuned_file=[] + data.each do |dob| + if (dob.of !=:block \ + && dob.of !=:comment \ + && dob.of !=:layout) \ + && dob.ocn_ #and dob.obj !~ /#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ #FIX + dob.obj=dob.obj.gsub(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks + end + unless dob.obj.is_a?(Array) + dob.obj=dob.obj.gsub(/^\s+/,''). + gsub(/\s$/,"\n") + end + @tuned_file << dob + end + @tuned_file=@tuned_file.flatten + end + def number_sub_heading(dob,num,title_no) + unless dob.obj =~/\d+\.|(?:chapter|article|section|clause)\s+\d+/i #name selection arbitrary, fix + dob.obj=case dob.name + when /-/; dob.obj.gsub(/^/,"#{title_no} ") + when /^#/; dob.obj.gsub(/^/,"#{title_no} ") + when /^[a-z_\.]+/; dob.obj.gsub(/^/,"#{title_no} ") + else + dob.name=title_no if dob.name=~/^$/ #where title contains title number + dob.obj.gsub(/^/,"#{title_no} ") if title_no =~/\d+/ #main, where title number is to be provided #watch changed placement + end + if @md.toc_lev_limit \ + and @md.toc_lev_limit < num + dob.obj=dob.obj.gsub(/^/,'!_ ') #bold line, watch + end + end + dob + end + def heading_tag_clean(heading_tag) + heading_tag=heading_tag.gsub(/[ ]+/,'_'). + gsub(/["']/,''). + gsub(/[\/]/,'-'). + gsub(/#{Mx[:fa_bold_o]}|#{Mx[:fa_bold_c]}/,''). + gsub(/#{Mx[:fa_italics_o]}|#{Mx[:fa_italics_c]}/,''). + gsub(/#{Mx[:fa_underscore_o]}|#{Mx[:fa_underscore_c]}/,''). + gsub(/#{Mx[:fa_cite_o]}|#{Mx[:fa_cite_c]}/,''). + gsub(/#{Mx[:fa_insert_o]}|#{Mx[:fa_insert_c]}/,''). + gsub(/#{Mx[:fa_strike_o]}|#{Mx[:fa_strike_c]}/,''). + gsub(/#{Mx[:fa_superscript_o]}|#{Mx[:fa_superscript_c]}/,''). + gsub(/#{Mx[:fa_subscript_o]}|#{Mx[:fa_subscript_c]}/,''). + gsub(/#{Mx[:fa_hilite_o]}|#{Mx[:fa_hilite_c]}/,''). + gsub(/#{Mx[:gl_bullet]}/,'') + end + def auto_number_heading_ie_title(data) #also does some segment naming + @tuned_file=[] + if defined? @md.make.num_top \ + and @md.make.num_top \ + and @md.make.num_top !~/^$/ + input||=@md.make.num_top + end + num_top=(input ? input.to_i : nil) + t_no1=t_no2=t_no3=t_no4=0 + if num_top + no1=num_top; no2=(num_top + 1); no3=(num_top + 2); no4=(num_top + 3) + end + t_not=0 + chapter_number_counter=0 + data=data.compact + data.each do |dob| #@md.seg_names << [additions to segment names] + title_no=nil + dob=SiSU_DAL_DocumentStructureExtract::Structure.new(@md,dob).structure_markup #must happen earlier, node info etc. require + if dob.is ==:heading \ + && dob.autonum_ \ + and defined? @md.make.num_top \ + and @md.make.num_top !~/^$/ + if dob.lv=='1' \ + and dob.obj =~/^#\s|\s#(?:\s|$)/ + chapter_number_counter +=1 + dob.obj=dob.obj.gsub(/^#\s/,"#{chapter_number_counter} "). + gsub(/#([:,]?\s|[.]?$)/,"#{chapter_number_counter}\\1") + end + if dob.ln==no1 + @subnumber=1 + @subnumber=0 if dob.ln==no1 + end + if dob.ln.to_s =~/^[1-6]/ \ + and not dob.toc_ \ + and dob.obj !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ # <-- fix + if dob.ln==no1 + t_no1+=1; t_no2=0; t_no3=0 + title_no="#{t_no1}" + if @md.seg_names.is_a?(Array) \ + and not @md.seg_names.include?(title_no) + if dob.ln==no1 + dob.name="#{title_no}" if not dob.name + dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs + tag=dob.obj.gsub(/(Article|Clause|Section|Chapter)\s+/,"\\1_#{title_no}").downcase + tag=heading_tag_clean(tag) + dob.tags=[tag,dob.tags].flatten if tag !~/^\d+$/ #check whether will work across file types with stop signs + dob.obj=(dob.obj =~/(Article|Clause|Section)\s+/) \ + ? (dob.obj.gsub(/(Article|Clause|Section)\s+/,"\\1 #{title_no} ")) + : (dob.obj.gsub(/^/,"#{title_no}. ")) #fix stop later + end + if dob.ln !=no1 \ + and dob.obj =~/^[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review + dob.name ="#{title_no}" if not dob.name + dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs + dob.obj=dob.obj.gsub(/^/,"#{title_no}. ") + end + @md.seg_names << title_no + end + if dob.ln!=no1 \ + and dob.name!~/^[a-z_\.]+$/ \ + and dob.obj !~/[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required # not fixed, work on + dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs + dob.obj=dob.obj.gsub(/^/i,"#{title_no}. ") + end + end + if dob.ln==no1 #watch because here you change dob.name + dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs + end + if dob.ln==no2 #watch because here you change dob.name + t_no2+=1; t_no3=0 + title_no="#{t_no1}.#{t_no2}" + dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs + dob=number_sub_heading(dob,no2,title_no) + end + if dob.ln==no3 #watch because here you change dob.name + t_no3+=1 + title_no="#{t_no1}.#{t_no2}.#{t_no3}" + dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs + dob=number_sub_heading(dob,no3,title_no) + end + elsif dob.ln.to_s =~/^[1-6]/ \ + and dob.name =~ /^[\w-]+-/ # endnotes, watch2005# endnotes, watch2005 + dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs + dob.name.gsub(/^([a-z_\.]+)-$/,'\1') + end + elsif dob.is ==:heading \ + and dob.autonum_ \ + and @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4 + #here lies a bug, as is nil when run from -Dv --update, FIX + if (dob.name.nil? or dob.name.empty?) \ + and dob.ln.to_s =~/^[1-9]/ \ + and dob.obj =~ /^([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d + dob.name=$1 + dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs + end + if @md.toc_lev_limit + end + elsif defined? dob.name \ + and dob.name + dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs + end + dob.tags=dob.tags.uniq if defined? dob.tags + @tuned_file << dob + end + @tuned_file=@tuned_file.flatten + end + def ocn(data) #and auto segment numbering increment + @tuned_file=SiSU_DAL_DocumentStructureExtract::OCN.new(@md,data).ocn + @tuned_file + end + def xml(data) + @tuned_file=SiSU_DAL_DocumentStructureExtract::XML.new(@md,data).dom + @tuned_file + end + def minor_numbering(data) #and auto segment numbering increment + @tuned_file=[] + number_small,letter_small=0,0 + letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z ) + data.each do |dob| + if dob.of ==:heading \ + || dob.of ==:heading_insert \ + || dob.of ==:para \ + || dob.of ==:block + if dob.is ==:heading \ + and dob.ln.to_s=~/^[1-9]/ #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later) + number_small,letter_small=0,0 + elsif dob.is ==:para + if dob.obj =~/^#[ 1]/ \ + and dob.obj !~/^#\s+(?:~#)?$/ + letter_small=0 + number_small=0 if dob.obj =~ /^#1/ + number_small+=1 + dob.obj=dob.obj.gsub(/^#[ 1]/,"#{number_small}. ") + end + if dob.obj =~/^_# / + dob.obj=dob.obj.gsub(/^_# /,"#{letter[letter_small]}. ") + dob.indent='1' + letter_small+=1 + end + end + end + @tuned_file << dob + end + @tuned_file=@tuned_file.flatten + end + def name_para_seg_filename(data) #segment naming, remaining + # paragraph name/numbering rules + # manual naming overrides, manual naming may be + # alpha-numeric characters mixed, + # numeric only (a number), if + # all segments have been named, + # the numbers used are over 1000 or + # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented) + # [for now a warning is printed for such documents on use of maintenance or very-verbose flag] + # auto-naming takes the form of giving numbers to segments + # the rules for which are as follows + # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.) + # otherwise the level 4 segment number from the embedded document structure info is used + # if there is none a sequential number is designated, preceded by an underscore + @tuned_file,@unique_auto_name=[],[] + tags={} + art_filename_auto=1 + @counter=1 + if not @md.seg_autoname_safe and @md.opt.cmd =~/[MV]/ + puts 'manual segment names, numbers used as names, risk warning (segmented html)' + end + ocn_html_seg=[] + data.each do |dob| + if dob.is==:heading \ + && dob.ln \ + and dob.ln.to_s =~/^[456]/ + if dob.ln==4 \ + and not dob.name \ + and not @md.set_heading_seg + @md.set_heading_seg=true + end + if dob.name !~/^\S+/ \ + and dob.obj =~/^\s*(?:\S+\s+)?([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name + possible_seg_name=$1 + possible_seg_name=possible_seg_name.gsub(/(?:[:,-]|\W)/,'.'). + gsub(/\.$/,'') + if @md.seg_names.is_a?(Array) \ + and not @md.seg_names.include?(possible_seg_name) + dob.name=possible_seg_name + dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ + @md.seg_names << possible_seg_name + else puts 'warn, there may be a conflicting numbering scheme' if @md.opt.cmd =~/[VM]/ + end + end + if dob.ln==4 \ + and dob.name #extract segment name from embedded document structure info + if @md.seg_names.is_a?(Array) \ + and not @md.seg_names.include?(dob.name) + dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ + @md.seg_names << dob.name + end + end + if dob.ln==4 \ + and not dob.name #if still no segment name, provide a numerical one + pf='_' #pg='' #may use e.g. '' or '~' or '_' + segn_auto="#{pf}#{art_filename_auto.to_s}" + if @md.seg_names.is_a?(Array) \ + and not @md.seg_names.include?(segn_auto) + dob.name=segn_auto + dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs + @md.seg_names << segn_auto + else puts 'segment name (numbering) error' + end + art_filename_auto+=1 + end + if dob.ln==4 \ + and not dob.name #should not occur + puts "e r r o r -\t#{__FILE__}::#{__LINE__}\n#{dob.inspect}" + end + end + if (dob.is ==:heading \ + || dob.is ==:heading_insert) \ + && dob.ln==4 + @seg=dob.name + end + @tuned_file << if dob.is==:heading \ + && (@md.pagenew || @md.pagebreak) + m=dob.ln.to_s + dob_tmp=[] + if @md.pagenew.inspect =~/#{m}/ + dob_tmp << SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new]) << dob + elsif @md.pagebreak.inspect =~/#{m}/ + dob_tmp << SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page]) << dob + end + para_result=unless dob_tmp.length > 0; dob + else dob_tmp + end + else dob + end + if defined? dob.ocn \ + and dob.ocn + @segname=((dob.is==:heading || dob.is==:heading_insert) && dob.ln==4 && (defined? dob.name)) \ + ? (dob.name) + : @segname + tags["#{dob.ocn}"]={ segname: @segname } + ocn_html_seg[dob.ocn]=if (dob.is==:heading || dob.is==:heading_insert) + x=if dob.ln =~/[1-3]/ + { seg: nil, level: dob.ln } + else #elsif dob.ln =~/[4-6]/ + { seg: @seg, level: dob.ln } + end + else + { seg: @seg, level: nil } + end + end + dob.tags=dob.tags.uniq if defined? dob.tags + if defined? dob.tags \ + and dob.tags.length > 0 + #@segname=((dob.is=='heading'|| dob.is=='heading_insert') && dob.ln==4 && (defined? dob.name)) \ + #? (dob.name) \ + #: @segname + dob.tags.each do |x| + tags[x]={ ocn: dob.ocn.to_s, segname: @segname } + end + end + dob + end + ocn_html_seg.each_with_index do |ocn,i| + if ocn \ + and ocn[:level].to_s=~/[1-3]/ + ocn_seg=nil + (1..4).each do |x| + if ocn_html_seg[i+x] and ocn_html_seg[i+x][:level]==4 + ocn[:seg]=ocn_html_seg[i+x][:seg] + end + end + end + end + if @md.seg_names.length > 0 + @md.set_heading_seg=true + end + tuned_file=@tuned_file.flatten + [tuned_file,tags,ocn_html_seg] + end + def set_heading_top(data) #% make sure no false positives + unless @md.set_heading_top + puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.opt.cmd =~/[MV]/ + @tuned_file=[] + data.each do |t_o| + unless @md.set_heading_top + if t_o !~/^(?:#{Rx[:meta]}|@\S+:)\s/m \ + and t_o !~/\A\s*\Z/m + @md.set_heading_top=true + if defined? @md.title \ + and @md.title \ + and defined? @md.title.full \ + and defined? @md.creator \ + and @md.creator + head=@md.title.main ? ([@lv='1',@obj=@md.title.main]) : ([@lv='1',@obj='[no title provided]']) + @tuned_file << head + end + end + end + @tuned_file << t_o + end + @tuned_file=@tuned_file.flatten + end + end + def set_heading_seg(data) #% make sure no false positives + unless @md.set_heading_seg + puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.opt.cmd =~/[MV]/ + @tuned_file=[] + data.each do |dob| + unless @md.set_heading_seg + if defined? dob.ln and dob.ln.to_s !~/^[123]/m \ + and dob.obj !~/\A\s*\Z/m \ + and dob.is !=:layout + @md.set_heading_seg=true + head=@md.title.main \ + ? (dob.ln,dob.name,dob.obj=4,'seg',@md.title.main) + : (dob.ln,dob.name,dob.obj=4,'seg','[segment]') + @tuned_file << head + end + end + @tuned_file << dob + end + @tuned_file=@tuned_file.flatten + end + end + def set_header_title(data) #% make sure no false positives + unless @md.set_header_title + puts "\t no document title provided, (will have to manufacture one)" if @md.opt.cmd =~/[MV]/ + @tuned_file=[] + data.each do |t_o| + unless @md.set_header_title + if t_o !~/^%{1,2}\s/m \ + and t_o !~/\A\s*\Z/m + @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}" + @md.title.main=@md.heading_seg_first + @md.set_header_title=true + end + end + @tuned_file << t_o + end + @tuned_file=@tuned_file.flatten + end + end + end +end +__END__ |