aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v3dv/dal_numbering.rb
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2012-01-10 22:37:26 -0500
committerRalph Amissah <ralph@amissah.com>2012-01-10 22:42:20 -0500
commit75e3bf86382edf99275a25895b362647158e25c1 (patch)
tree7ec458f15d0bf981c7e044244a8cbf55205141b7 /lib/sisu/v3dv/dal_numbering.rb
parentv3: date, year 2012, update (diff)
v3dv, add dev branch (use to make some changes to module & class names & test)
* (intended as) short term branch, merge back into v3 once tested * sisu --dev (to invoke)
Diffstat (limited to 'lib/sisu/v3dv/dal_numbering.rb')
-rw-r--r--lib/sisu/v3dv/dal_numbering.rb465
1 files changed, 465 insertions, 0 deletions
diff --git a/lib/sisu/v3dv/dal_numbering.rb b/lib/sisu/v3dv/dal_numbering.rb
new file mode 100644
index 00000000..4b12793f
--- /dev/null
+++ b/lib/sisu/v3dv/dal_numbering.rb
@@ -0,0 +1,465 @@
+# encoding: utf-8
+=begin
+
+ * Name: SiSU
+
+ * Description: a framework for document structuring, publishing and search
+
+ * Author: Ralph Amissah
+
+ * Copyright: (C) 1997 - 2012, Ralph Amissah, All Rights Reserved.
+
+ * License: GPL 3 or later:
+
+ SiSU, a framework for document structuring, publishing and search
+
+ Copyright (C) Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see <http://www.gnu.org/licenses/>.
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ <http://www.fsf.org/licensing/licenses/gpl.html>
+ <http://www.gnu.org/copyleft/gpl.html>
+
+ <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
+ <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
+ <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>
+
+ * SiSU uses:
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+ * Hompages:
+ <http://www.jus.uio.no/sisu>
+ <http://www.sisudoc.org>
+
+ * Download:
+ <http://www.jus.uio.no/sisu/SiSU/download.html>
+
+ * Ralph Amissah
+ <ralph@amissah.com>
+ <ralph.amissah@gmail.com>
+
+ ** Description: system environment, resource control and configuration details
+
+=end
+module SiSU_numbering
+ class Numbering
+ attr_accessor :obj,:osp,:ocn,:lv,:name,:index,:comment
+ def initialize(md,data)
+ @md,@data=md,data
+ @obj=@type=@ocn=@lv=@name=@index=@comment=nil
+ end
+ def numbering_song
+ data=@data
+ data=number_plaintext_para(data)
+ data=auto_number_heading_ie_title(data.compact) #tr issue
+ data=ocn(data.compact) #watch
+ data=xml(data.compact)
+ data=minor_numbering(data.compact)
+ data,tags_map,ocn_html_seg_map=name_para_seg_filename(data)
+ data=set_heading_top(data) unless @md.set_heading_top
+ [data,tags_map,ocn_html_seg_map]
+ end
+ def number_plaintext_para(data)
+ @tuned_file=[]
+ data.each do |dob|
+ if dob.of !~/(?:block|comment|layout)/ and dob.ocn_ #and dob.obj !~ /#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ #FIX
+ dob.obj.gsub!(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks
+ end
+ unless dob.obj.class==Array
+ dob.obj.gsub!(/^\s+/,'')
+ dob.obj.gsub!(/\s$/,"\n")
+ end
+ @tuned_file << dob
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def number_sub_heading(dob,num,title_no)
+ unless dob.obj =~/\d+\.|(?:chapter|article|section|clause)\s+\d+/i #name selection arbitrary, fix
+ case dob.name
+ when /-/; dob.obj.gsub!(/^/,"#{title_no} ")
+ when /^#/; dob.obj.gsub!(/^/,"#{title_no} ")
+ when /^[a-z_\.]+/
+ dob.obj.gsub!(/^/,"#{title_no} ")
+ else
+ dob.name=title_no if dob.name=~/^$/ #where title contains title number
+ dob.obj.gsub!(/^/,"#{title_no} ") if title_no =~/\d+/ #main, where title number is to be provided #watch changed placement
+ end
+ if @md.toc_lev_limit \
+ and @md.toc_lev_limit < num
+ dob.obj.gsub!(/^/,'!_ ') #bold line, watch
+ end
+ end
+ dob
+ end
+ def heading_tag_clean(heading_tag)
+ heading_tag.gsub!(/[ ]+/,'_')
+ heading_tag.gsub!(/["']/,'')
+ heading_tag.gsub!(/[\/]/,'-')
+ heading_tag.gsub!(/#{Mx[:fa_bold_o]}|#{Mx[:fa_bold_c]}/,'')
+ heading_tag.gsub!(/#{Mx[:fa_italics_o]}|#{Mx[:fa_italics_c]}/,'')
+ heading_tag.gsub!(/#{Mx[:fa_underscore_o]}|#{Mx[:fa_underscore_c]}/,'')
+ heading_tag.gsub!(/#{Mx[:fa_cite_o]}|#{Mx[:fa_cite_c]}/,'')
+ heading_tag.gsub!(/#{Mx[:fa_insert_o]}|#{Mx[:fa_insert_c]}/,'')
+ heading_tag.gsub!(/#{Mx[:fa_strike_o]}|#{Mx[:fa_strike_c]}/,'')
+ heading_tag.gsub!(/#{Mx[:fa_superscript_o]}|#{Mx[:fa_superscript_c]}/,'')
+ heading_tag.gsub!(/#{Mx[:fa_subscript_o]}|#{Mx[:fa_subscript_c]}/,'')
+ heading_tag.gsub!(/#{Mx[:fa_hilite_o]}|#{Mx[:fa_hilite_c]}/,'')
+ heading_tag.gsub!(/#{Mx[:gl_bullet]}/,'')
+ heading_tag
+ end
+ def auto_number_heading_ie_title(data) #also does some segment naming
+ @tuned_file=[]
+ if defined? @md.make.num_top \
+ and @md.make.num_top \
+ and @md.make.num_top !~/^$/
+ input||=@md.make.num_top
+ end
+ num_top=(input ? input.to_i : nil)
+ t_no1=t_no2=t_no3=t_no4=0
+ if num_top
+ no1=num_top; no2=(num_top + 1); no3=(num_top + 2); no4=(num_top + 3)
+ end
+ t_not=0
+ data.compact!
+ chapter_number_counter=0
+ data.each do |dob| #@md.seg_names << [additions to segment names]
+ title_no=nil
+ dob=SiSU_document_structure_extract::Structure.new(@md,dob).structure_markup #must happen earlier, node info etc. require
+ if dob.is =='heading' \
+ and dob.autonum_ \
+ and defined? @md.make.num_top \
+ and @md.make.num_top !~/^$/
+ if dob.lv=='1' \
+ and dob.obj =~/^#\s|\s#(?:\s|$)/
+ chapter_number_counter +=1
+ dob.obj.gsub!(/^#\s/,"#{chapter_number_counter} ")
+ dob.obj.gsub!(/#([:,]?\s|[.]?$)/,"#{chapter_number_counter}\\1")
+ end
+ if dob.ln==no1
+ @subnumber=1
+ @subnumber=0 if dob.ln==no1
+ end
+ if dob.ln.to_s =~/^[1-6]/ \
+ and not dob.toc_ \
+ and dob.obj !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ # <-- fix
+ if dob.ln==no1
+ t_no1+=1; t_no2=0; t_no3=0
+ title_no="#{t_no1}"
+ if not @md.seg_names.nil? \
+ and not @md.seg_names.include?(title_no)
+ if dob.ln==no1
+ dob.name="#{title_no}" if not dob.name
+ dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
+ tag=dob.obj.gsub(/(Article|Clause|Section|Chapter)\s+/,"\\1_#{title_no}").downcase
+ tag=heading_tag_clean(tag)
+ dob.tags=[tag,dob.tags].flatten if tag !~/^\d+$/ #check whether will work across file types with stop signs
+ (dob.obj =~/(Article|Clause|Section)\s+/) \
+ ? (dob.obj.gsub!(/(Article|Clause|Section)\s+/,"\\1 #{title_no} "))
+ : (dob.obj.gsub!(/^/,"#{title_no}. ")) #fix stop later
+ end
+ if dob.ln !=no1 \
+ and dob.obj =~/^[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review
+ dob.name ="#{title_no}" if not dob.name
+ dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.obj.gsub!(/^/,"#{title_no}. ")
+ end
+ @md.seg_names << title_no
+ end
+ if dob.ln!=no1 \
+ and dob.name!~/^[a-z_\.]+$/ \
+ and dob.obj !~/[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required # not fixed, work on
+ dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.obj.gsub!(/^/i,"#{title_no}. ")
+ end
+ end
+ if dob.ln==no1 #watch because here you change dob.name
+ dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
+ end
+ if dob.ln==no2 #watch because here you change dob.name
+ t_no2+=1; t_no3=0
+ title_no="#{t_no1}.#{t_no2}"
+ dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
+ dob=number_sub_heading(dob,no2,title_no)
+ end
+ if dob.ln==no3 #watch because here you change dob.name
+ t_no3+=1
+ title_no="#{t_no1}.#{t_no2}.#{t_no3}"
+ dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
+ dob=number_sub_heading(dob,no3,title_no)
+ end
+ elsif dob.ln.to_s =~/^[1-6]/ \
+ and dob.name =~ /^[\w-]+-/ # endnotes, watch2005# endnotes, watch2005
+ dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.name.gsub(/^([a-z_\.]+)-$/,'\1')
+ end
+ elsif dob.is =='heading' \
+ and dob.autonum_ \
+ and @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4
+ #here lies a bug, as is nil when run from -Dv --update, FIX
+ if (dob.name.nil? or dob.name.empty?) \
+ and dob.ln.to_s =~/^[1-9]/ \
+ and dob.obj =~ /^([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d
+ dob.name=$1
+ dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
+ end
+ if @md.toc_lev_limit
+ end
+ elsif defined? dob.name \
+ and dob.name
+ dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
+ end
+ dob.tags=dob.tags.uniq if defined? dob.tags
+ @tuned_file << dob
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def ocn(data) #and auto segment numbering increment
+ @tuned_file=SiSU_document_structure_extract::OCN.new(@md,data).ocn
+ @tuned_file
+ end
+ def xml(data)
+ @tuned_file=SiSU_document_structure_extract::XML.new(@md,data).dom
+ @tuned_file
+ end
+ def minor_numbering(data) #and auto segment numbering increment
+ @tuned_file=[]
+ number_small,letter_small=0,0
+ letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z )
+ data.each do |dob|
+ if dob.of =~/heading|para|block/
+ if dob.is =='heading' \
+ and dob.ln.to_s=~/^[1-9]/ #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later)
+ number_small,letter_small=0,0
+ elsif dob.is =~/para/
+ if dob.obj =~/^#[ 1]/ \
+ and dob.obj !~/^#\s+(?:~#)?$/
+ letter_small=0
+ number_small=0 if dob.obj =~ /^#1/
+ number_small+=1
+ dob.obj.gsub!(/^#[ 1]/,"#{number_small}. ") #change 2004
+ end
+ if dob.obj =~/^_# /
+ dob.obj.gsub!(/^_# /,"#{letter[letter_small]}. ") #change 2004
+ dob.indent='1'
+ letter_small+=1
+ end
+ end
+ end
+ @tuned_file << dob
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def name_para_seg_filename(data) #segment naming, remaining
+ # paragraph name/numbering rules
+ # manual naming overrides, manual naming may be
+ # alpha-numeric characters mixed,
+ # numeric only (a number), if
+ # all segments have been named,
+ # the numbers used are over 1000 or
+ # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented)
+ # [for now a warning is printed for such documents on use of maintenance or very-verbose flag]
+ # auto-naming takes the form of giving numbers to segments
+ # the rules for which are as follows
+ # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.)
+ # otherwise the level 4 segment number from the embedded document structure info is used
+ # if there is none a sequential number is designated, preceded by an underscore
+ @tuned_file,@unique_auto_name=[],[]
+ tags={}
+ art_filename_auto=1
+ @counter=1
+ if not @md.seg_autoname_safe and @md.opt.cmd =~/[MV]/
+ puts 'manual segment names, numbers used as names, risk warning (segmented html)'
+ end
+ ocn_html_seg=[]
+ data.each do |dob|
+ if dob.is=='heading' \
+ and dob.ln \
+ and dob.ln.to_s =~/^[456]/
+ if dob.ln==4 \
+ and not dob.name \
+ and not @md.set_heading_seg
+ @md.set_heading_seg=true
+ end
+ if dob.name !~/^\S+/ \
+ and dob.obj =~/^\s*(?:\S+\s+)?([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name
+ possible_seg_name=$1
+ possible_seg_name.gsub!(/(?:[:,-]|\W)/,'.')
+ possible_seg_name.gsub!(/\.$/,'')
+ if not @md.seg_names.nil? \
+ and not @md.seg_names.include?(possible_seg_name)
+ dob.name=possible_seg_name
+ dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/
+ @md.seg_names << possible_seg_name
+ else puts 'warn, there may be a conflicting numbering scheme' if @md.opt.cmd =~/[VM]/
+ end
+ end
+ if dob.ln==4 \
+ and dob.name #extract segment name from embedded document structure info
+ if not @md.seg_names.nil? \
+ and not @md.seg_names.include?(dob.name)
+ dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/
+ @md.seg_names << dob.name
+ end
+ end
+ if dob.ln==4 \
+ and not dob.name #if still no segment name, provide a numerical one
+ pf='_' #pg='' #may use e.g. '' or '~' or '_'
+ segn_auto="#{pf}#{art_filename_auto.to_s}"
+ if not @md.seg_names.nil? \
+ and not @md.seg_names.include?(segn_auto)
+ dob.name=segn_auto
+ dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
+ @md.seg_names << segn_auto
+ else puts 'segment name (numbering) error'
+ end
+ art_filename_auto+=1
+ end
+ if dob.ln==4 \
+ and not dob.name #should not occur
+ puts "e r r o r -\t#{__FILE__}::#{__LINE__}\n#{dob.inspect}"
+ end
+ end
+ if dob.is =~/heading/ \
+ and dob.ln==4
+ @seg=dob.name
+ end
+ @tuned_file << if dob.is=='heading' \
+ and (@md.pagenew or @md.pagebreak)
+ m=dob.ln.to_s
+ dob_tmp=[]
+ if @md.pagenew.inspect =~/#{m}/
+ dob_tmp << SiSU_document_structure::Object_layout.new.break(Hx[:br_page_new]) << dob
+ elsif @md.pagebreak.inspect =~/#{m}/
+ dob_tmp << SiSU_document_structure::Object_layout.new.break(Hx[:br_page]) << dob
+ end
+ para_result=unless dob_tmp.length > 0; dob
+ else dob_tmp
+ end
+ else dob
+ end
+ if defined? dob.ocn \
+ and dob.ocn
+ @segname=((dob.is=='heading'|| dob.is=='heading_insert') && dob.ln==4 && (defined? dob.name)) \
+ ? (dob.name)
+ : @segname
+ tags["#{dob.ocn}"]={ segname: @segname }
+ ocn_html_seg[dob.ocn]=if dob.is =~/heading/
+ x=if dob.ln =~/[1-3]/
+ { seg: nil, level: dob.ln }
+ else #elsif dob.ln =~/[4-6]/
+ { seg: @seg, level: dob.ln }
+ end
+ else
+ { seg: @seg, level: nil }
+ end
+ end
+ dob.tags=dob.tags.uniq if defined? dob.tags
+ if defined? dob.tags \
+ and dob.tags.length > 0
+ #@segname=((dob.is=='heading'|| dob.is=='heading_insert') && dob.ln==4 && (defined? dob.name)) \
+ #? (dob.name) \
+ #: @segname
+ dob.tags.each do |x|
+ tags[x]={ ocn: dob.ocn.to_s, segname: @segname }
+ end
+ end
+ dob
+ end
+ ocn_html_seg.each_with_index do |ocn,i|
+ if ocn \
+ and ocn[:level].to_s=~/[1-3]/
+ ocn_seg=nil
+ (1..4).each do |x|
+ if ocn_html_seg[i+x] and ocn_html_seg[i+x][:level]==4
+ ocn[:seg]=ocn_html_seg[i+x][:seg]
+ end
+ end
+ end
+ end
+ if @md.seg_names.length > 0
+ @md.set_heading_seg=true
+ end
+ tuned_file=@tuned_file.flatten
+ [tuned_file,tags,ocn_html_seg]
+ end
+ def set_heading_top(data) #% make sure no false positives
+ unless @md.set_heading_top
+ puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.opt.cmd =~/[MV]/
+ @tuned_file=[]
+ data.each do |t_o|
+ unless @md.set_heading_top
+ if t_o !~/^(?:#{Rx[:meta]}|@\S+:)\s/m \
+ and t_o !~/\A\s*\Z/m
+ @md.set_heading_top=true
+ if defined? @md.title \
+ and @md.title \
+ and defined? @md.title.full \
+ and defined? @md.creator \
+ and @md.creator
+ head=@md.title.main ? ([@lv='1',@obj=@md.title.main]) : ([@lv='1',@obj='[no title provided]'])
+ @tuned_file << head
+ end
+ end
+ end
+ @tuned_file << t_o
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ end
+ def set_heading_seg(data) #% make sure no false positives
+ unless @md.set_heading_seg
+ puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.opt.cmd =~/[MV]/
+ @tuned_file=[]
+ data.each do |dob|
+ unless @md.set_heading_seg
+ if defined? dob.ln and dob.ln.to_s !~/^[123]/m \
+ and dob.obj !~/\A\s*\Z/m \
+ and dob.is !='layout'
+ @md.set_heading_seg=true
+ head=if @md.title.main ; dob.ln,dob.name,dob.obj=4,'seg',@md.title.main
+ else dob.ln,dob.name,dob.obj=4,'seg','[segment]'
+ end
+ @tuned_file << head
+ end
+ end
+ @tuned_file << dob
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ end
+ def set_header_title(data) #% make sure no false positives
+ unless @md.set_header_title
+ puts "\t no document title provided, (will have to manufacture one)" if @md.opt.cmd =~/[MV]/
+ @tuned_file=[]
+ data.each do |t_o|
+ unless @md.set_header_title
+ if t_o !~/^%{1,2}\s/m \
+ and t_o !~/\A\s*\Z/m
+ @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}"
+ @md.title.main=@md.heading_seg_first
+ @md.set_header_title=true
+ end
+ end
+ @tuned_file << t_o
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ end
+ end
+end
+__END__