From 16c30474f06ed3774ad524a38b55b7840de057d0 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sun, 25 May 2014 10:01:43 -0400 Subject: v5: merge v6 to v5, version bump to 5.4.*, reason adds structure check * ao, document structure check, stop processing on major error (with error message & text at location of failure) [reason for version bump the addition of structure check] * utils, add ok code marker * ao, heading with no ocn, distinguish ~# from -# * ~# is general & means no ocn (for any object to which it is applied) * -# is relevant only for 1~ dummy headings & instructs that they should be removed from output where possible * applied so far to pdf, odt & plaintext * ao_images, reduce warnings when ruby RMagic absent as program used directly * reduced dependency on ruby RMagic library (as some time way back had issues) * ao, document markup structure check, skip processing file on major error * with error message & text at location of failure * texpdf, urls in creator cause breakage * texpdf, mailto markup links set for normal text objects * texpdf, '&' in heading breaks toc (now removed from toc (not heading)), bug * revisit, bug * texpdf, for urls switch to sans serif (small fontsize) * instead of typewriter, latex default * texpdf, pdf colored hyperlinks configurable * --pdf-hyperlinks-color --pdf-hyperlinks-no-color or --pdf-hyperlinks-monochrome * ['default']['pdf_hyperlinks']='color' (other options switch hyperlink color off 'no-color' 'color-off' 'monochrome') * texpdf, pdf default font size configurable (cli & sisurc.yml) (no fractions) * --pdf-fontsize-12 --pdf-fontsize-8 * default: texpdf_fontsize: 12 * texpdf, headings and table of contents representation * fixes 1~ and 2~ result in the same formatting 1~ 2~ & 3~ now differentiated, see discussion in sisu.org under #744383 * fixes :A smaller formatted than :B A~ B~ C~ now the same size, see discussion in sisu.org under #744383 * :B and :C result in the same formatting, issue explained see sisu.org * texpdf, (internal coding) fontface, rename texpdf_font texpdf_fontface * texpdf, (internal coding) use symbols to identify page orientation * param, metadata rights, line-breaks instead of semicolons separating rights * fixes remove trailing semicolon after :copyright: * digests sha512 option implemented * options sha512 sha256 md5 * command line --sha512 * rcconf.yml ['defsault']['digest'] = sha512 * xml object citation numbering (docbook fictionbook) --- lib/sisu/v5/ao_doc_str.rb | 168 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 119 insertions(+), 49 deletions(-) (limited to 'lib/sisu/v5/ao_doc_str.rb') diff --git a/lib/sisu/v5/ao_doc_str.rb b/lib/sisu/v5/ao_doc_str.rb index dd7f32f3..b66f01d1 100644 --- a/lib/sisu/v5/ao_doc_str.rb +++ b/lib/sisu/v5/ao_doc_str.rb @@ -234,10 +234,10 @@ module SiSU_AO_DocumentStructureExtract @@flag[:ocn]=:on {flag: :ocn_on} when /[~]/ - @@flag[:ocn]=:off_headings_substantive - {flag: :ocn_off, mod: :headings_substantive} - when /[-]/ - @@flag[:ocn]=:off_headings_exclude + @@flag[:ocn]=:ocn_off_headings_keep + {flag: :ocn_off, mod: :headings_keep} + when /[-]/ #of particular relevance with level 1~ which is required to precede substantive text & used e.g. in html segmented text + @@flag[:ocn]=:ocn_off_headings_dummy_lev1 {flag: :ocn_off, mod: :headings_exclude} else @@flag[:ocn]=:on @@ -286,12 +286,14 @@ module SiSU_AO_DocumentStructureExtract obj=$1 note=endnote_test?(obj) obj,tags=extract_tags(obj) - if @@flag[:ocn]==:off_headings_exclude \ - or @@flag[:ocn]==:off_headings_substantive + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ - if @@flag[:ocn]==:off_headings_exclude + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + and t_o =~/^1\~\S*\s+/m obj << ' -#' - elsif @@flag[:ocn]==:off_headings_substantive + elsif @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep obj << ' ~#' end end @@ -302,12 +304,14 @@ module SiSU_AO_DocumentStructureExtract name,obj=$1,$2 note=endnote_test?(obj) obj,tags=extract_tags(obj) - if @@flag[:ocn]==:off_headings_exclude \ - or @@flag[:ocn]==:off_headings_substantive + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ - if @@flag[:ocn]==:off_headings_exclude + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + and t_o =~/^1\~\S*\s+/m obj << ' -#' - elsif @@flag[:ocn]==:off_headings_substantive + elsif @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep obj << ' ~#' end end @@ -318,12 +322,14 @@ module SiSU_AO_DocumentStructureExtract name,obj=$1,$2 note=endnote_test?(obj) obj,tags=extract_tags(obj,name) - if @@flag[:ocn]==:off_headings_exclude \ - or @@flag[:ocn]==:off_headings_substantive + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ - if @@flag[:ocn]==:off_headings_exclude + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + and t_o =~/^1\~\S*\s+/m obj << ' -#' - elsif @@flag[:ocn]==:off_headings_substantive + elsif @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep obj << ' ~#' end end @@ -345,8 +351,8 @@ module SiSU_AO_DocumentStructureExtract note=endnote_test?(obj) obj,tags=extract_tags(obj) unless obj=~/\A\s*\Z/m - if @@flag[:ocn]==:off_headings_exclude \ - or @@flag[:ocn]==:off_headings_substantive + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ obj << ' ~#' end @@ -368,8 +374,8 @@ module SiSU_AO_DocumentStructureExtract note=endnote_test?(obj) obj,tags=extract_tags(obj) unless obj=~/\A\s*\Z/m - if @@flag[:ocn]==:off_headings_exclude \ - or @@flag[:ocn]==:off_headings_substantive + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ obj << ' ~#' end @@ -391,8 +397,8 @@ module SiSU_AO_DocumentStructureExtract image=image_test(t_o) note=endnote_test?(t_o) obj,tags=extract_tags(t_o) - if @@flag[:ocn]==:off_headings_exclude \ - or @@flag[:ocn]==:off_headings_substantive + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ obj << ' ~#' end @@ -847,18 +853,43 @@ module SiSU_AO_DocumentStructureExtract if status==:error node_ln=/^([0-6])/.match(node)[1].to_i node_parent_ln=/^([0-6])/.match(node_parent)[1].to_i - puts %{current level: #{structure_info.lv[node_ln]} (possible parent levels: #{structure_info.possible_parents(structure_info.lv[node_ln])}) + STDERR.puts %{current level: #{structure_info.lv[node_ln]} (possible parent levels: #{structure_info.possible_parents(structure_info.lv[node_ln])}) parent level: #{structure_info.lv[node_parent_ln]} (possible child levels: #{structure_info.possible_children(structure_info.lv[node_parent_ln])}) --- } +SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}"} + $process_document = :skip end end end def warning_incorrect_parent_level_or_level(txt) - puts %{warning, -#{txt} -has incorrect level and/or parent level} + puts %{ERROR. There is an error in markup of heading levels either here or in the parent heading. +The current header reads: +"#{txt}" +has incorrect level and/or parent level +--} + end + def required_headers_present? + unless (defined? @md.title \ + and @md.title.full) + STDERR.puts %{required header missing: + +@title: +SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}" +} + $process_document = :skip + end + unless (defined? @md.creator.author \ + and @md.creator.author) + STDERR.puts %{required header missing: + +@creator: + :author: anonymous? +SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}" +} + $process_document = :skip + end end def ocn #and auto segment numbering increment + required_headers_present? data=@data @o_array=[] node=ocn=ocn_dv=ocn_sp=ocnh=ocnh0=ocnh1=ocnh2=ocnh3=ocnh4=ocnh5=ocnh6=ocno=ocnp=ocnt=ocnc=ocng=ocni=ocnu=0 # h heading, o other, t table, g group, i image @@ -866,6 +897,7 @@ has incorrect level and/or parent level} parent=node1=node2=node3=node4=node5=node6=nil node0='0:0;0' @collapsed_lv0=0 + @lev_occurences={ a: 0, b: 0, c: 0, d: 0, l1: 0, l2: 0, l3: 0 } data.each do |dob| h={} if (dob.obj !~ regex_exclude_ocn_and_node || dob.is==:code) \ @@ -875,7 +907,7 @@ has incorrect level and/or parent level} && dob.ocn_ #dob.ln now is determined, and set earlier, check how best to remove this --> if dob.is==:heading - ln=case dob.lv + @ln=ln=case dob.lv when 'A' then 0 when 'B' then 1 when 'C' then 2 @@ -905,11 +937,13 @@ has incorrect level and/or parent level} end if ln==0 \ or ln=~@md.lv0 + @lev_occurences[:a] += 1 if not dob.obj =~/~#|-#/ ocn_flag=true ocnh0+=1 #heading node0="0:#{ocnh0};#{ocn}" else + #document_structure_check_info(node0,node0,:error) #fix ocn_flag=false node0="0:0;0" end @@ -919,11 +953,13 @@ has incorrect level and/or parent level} node,ocn_sp,parent=node0,"h#{ocnh}",'ROOT' elsif ln==1 \ or ln=~@md.lv1 + @lev_occurences[:b] += 1 if not dob.obj =~/~#|-#/ ocn_flag=true ocnh1+=1 #heading node1="1:#{ocnh1};#{ocn}" else + #document_structure_check_info(node0,node0,:error) #fix ocn_flag=false node1="1:0;0" end @@ -932,6 +968,7 @@ has incorrect level and/or parent level} @collapsed_lv1=@collapsed_lv0+1 node0 else + warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node0,node0,:error) node0 end @@ -939,11 +976,13 @@ has incorrect level and/or parent level} node,ocn_sp,parent=node1,"h#{ocnh}",node0 #FIX elsif ln==2 \ or ln=~@md.lv2 + @lev_occurences[:c] += 1 if not dob.obj =~/~#|-#/ ocn_flag=true ocnh2+=1 node2="2:#{ocnh2};#{ocn}" else + #document_structure_check_info(node0,node0,:error) #fix ocn_flag=false node2="2:0;0" end @@ -952,6 +991,7 @@ has incorrect level and/or parent level} @collapsed_lv2=@collapsed_lv1+1 node1 else + warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node2,node0,:error) node0 end @@ -959,11 +999,13 @@ has incorrect level and/or parent level} node,ocn_sp=node2,"h#{ocnh}" elsif ln==3 \ or ln=~@md.lv3 + @lev_occurences[:d] += 1 if not dob.obj =~/~#|-#/ ocn_flag=true ocnh3+=1 node3="3:#{ocnh3};#{ocn}" else + #document_structure_check_info(node0,node0,:error) #fix ocn_flag=false node3="3:0;0" end @@ -988,6 +1030,7 @@ or this level should be level :B~ rather than #{dob.lv}} node,ocn_sp=node3,"h#{ocnh}" elsif ln==4 \ or ln=~@md.lv4 + @lev_occurences[:l1] += 1 if not dob.obj =~/~#|-#/ ocn_flag=true ocnh4+=1 @@ -1021,6 +1064,7 @@ or this level should be level :B~ rather than #{dob.lv}} node,ocn_sp=node4,"h#{ocnh}" elsif ln==5 \ or ln=~@md.lv5 + @lev_occurences[:l2] += 1 if not dob.obj =~/~#|-#/ ocn_flag=true ocnh5+=1 @@ -1049,6 +1093,7 @@ or this level should be level :B~ rather than #{dob.lv}} @collapsed_lv5=@collapsed_lv1+1 node1 else + warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node5,node0,:error) node0 end @@ -1056,6 +1101,7 @@ or this level should be level :B~ rather than #{dob.lv}} node,ocn_sp=node5,"h#{ocnh}" elsif ln==6 \ or ln=~@md.lv6 + @lev_occurences[:l3] += 1 if not dob.obj =~/~#|-#/ ocn_flag=true ocnh6+=1 @@ -1092,6 +1138,7 @@ or this level should be 5~ rather #{dob.lv}" #level 6 @collapsed_lv6=@collapsed_lv1+1 node1 else + warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node6,node0,:error) node0 end @@ -1099,6 +1146,29 @@ or this level should be 5~ rather #{dob.lv}" #level 6 node,ocn_sp=node6,"h#{ocnh}" end else + unless @lev_occurences[:l1] > 0 + STDERR.puts %{Substantive text objects must follow a level 1~ heading and there are none at this point in processing: #{@lev_occurences[:l1]} +} + end + unless @ln >= 4 + lev=case @ln + when 0 then 'A' + when 1 then 'B' + when 2 then 'C' + when 3 then 'D' + when 4 then '1' + when 5 then '2' + when 6 then '3' + when 7 then '4' + when 8 then '5' + when 9 then '6' + end + STDERR.puts %{Substantive text objects must follow a level 1~ 2~ or 3~ heading: #{lev}~ +SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}"} + puts dob.obj.gsub(/^(.{1,80})/,'"\1"') + $process_document = :skip + break + end if not dob.obj =~/~#|-#/ ocn_flag=true else @@ -1129,9 +1199,15 @@ or this level should be 5~ rather #{dob.lv}" #level 6 dob.ln,dob.node,dob.ocn,dob.ocn_,dob.odv,dob.osp,dob.parent,dob.lc=ln,node,ocn,ocn_flag,ocn_dv,ocn_sp,parent,collapsed_level else ocnu+=1 - dob.obj=dob.obj.gsub(/#{Mx[:fa_o]}[~-]##{Mx[:fa_c]}/,'') if dob.obj - ocn_dv,ocn_sp="u#{ocnu}","u#{ocnu}" - dob.ln,dob.node,dob.ocn,dob.ocn_,dob.odv,dob.osp,dob.parent,dob.lc=ln,node,nil,ocn_flag,ocn_dv,ocn_sp,parent,collapsed_level + heading_use=:ok + if dob.obj=~/#{Mx[:pa_non_object_no_heading]}/ + dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_no_heading]}/,'') + heading_use=:ok + elsif dob.obj=~/#{Mx[:pa_non_object_dummy_heading]}/ + dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_dummy_heading]}/,'') + heading_use=:dummy + end + dob.ln,dob.node,dob.ocn,dob.ocn_,dob.use_,dob.odv,dob.osp,dob.parent,dob.lc=ln,node,nil,ocn_flag,heading_use,ocn_dv,ocn_sp,parent,collapsed_level end else if dob.of !=:meta \ @@ -1148,24 +1224,6 @@ or this level should be 5~ rather #{dob.lv}" #level 6 end end h - elsif dob.obj=~/#{Mx[:pa_non_object_no_heading]}/ - dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_no_heading]}/,'') - if dob.is==:para - h={ obj: dob.obj, ocn_: false, ocn: nil, hang: dob.hang, indent: dob.indent, bullet_: dob.bullet_, tags: dob.tags, parent: dob.parent } - dob=SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h,dob) - elsif dob.is==:heading - h={ obj: dob.obj, ocn_: false, ocn: nil, toc_: true, parent: dob.parent } - dob=SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) - end - elsif dob.obj=~/#{Mx[:pa_non_object_dummy_heading]}/ - dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_dummy_heading]}/,'') - if dob.is==:para - h={ obj: dob.obj, ocn_: false, ocn: nil, hang: dob.hang, indent: dob.indent, bullet_: dob.bullet_, tags: dob.tags, parent: dob.parent } - dob=SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h,dob) - elsif dob.is==:heading - h={ obj: dob.obj, ocn_: false, ocn: nil, toc_: false, parent: dob.parent } - dob=SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) - end else dob end if dob.is==:code \ @@ -1177,6 +1235,18 @@ or this level should be 5~ rather #{dob.lv}" #level 6 end @o_array << dob end + unless @lev_occurences[:a] == 1 + STDERR.puts %{The number of level A~ in this document: #{@lev_occurences[:a]} +There must be one level A~ (no more and no less) +SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}"} + $process_document = :skip + end + unless @lev_occurences[:l1] > 0 + STDERR.puts %{The number of level 1~ in this document: #{@lev_occurences[:l1]} +There must be at least one level 1~ (and as many as required) +SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}"} + $process_document = :skip + end @o_array end end -- cgit v1.2.3