aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v0/shared_xml.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/v0/shared_xml.rb')
-rw-r--r--lib/sisu/v0/shared_xml.rb110
1 files changed, 42 insertions, 68 deletions
diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb
index c93eff5b..3c34e67f 100644
--- a/lib/sisu/v0/shared_xml.rb
+++ b/lib/sisu/v0/shared_xml.rb
@@ -59,75 +59,27 @@
=end
module SiSU_text_parts
- class Split_text_object
+ require "#{SiSU_lib}/shared_structure"
+ class Split_text_object < SiSU_Structure::Split_text_object
require "#{SiSU_lib}/param"
require "#{SiSU_lib}/xml_format"
include SiSU_Viz
include SiSU_XML_format
@@alt_id_count=0
@@dp=nil
- attr_reader :format,:text,:ocn,:lev_para_ocn
- def initialize(md,para)
- @md,@para=md,para
- @format,@ocn='null','null'
- #@format,@ocn=nil,nil
- @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern
- end
- def lev_segname_para_ocn #using shared_txt instead, watch #% watch closely
- if @para =~/^(\d~|<:.+?>).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
- if /^([1-6])~(\S+)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
- @format,segname,@text,@ocn=$1,$2,$3,$4
- @format="#@format~#{segname}" #
- elsif /^([1-6]~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
- @format,@text,@ocn=$1,$2,$3
- elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
- @format,@text,@ocn=$1,$2,$3
- elsif /^([1-6])~(\S+)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
- @@alt_id_count+=1
- @format,segname,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}"
- @format="#@format~#{segname}" #
- elsif /^([1-6]~)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
- @@alt_id_count+=1
- @format,@text,@ocn=$1,$2,"x#{@@alt_id_count}"
- elsif /^(?:<:i([1-9])>\s*_\*)\s+(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
- @format,@text,@ocn="_#{$1}\*",$2,$3,$4
- elsif /^(_\*)\s+(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
- @format,@text,@ocn=$1,$2,$3
- elsif /<:(i[1-9])>\s*(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
- @format,@text,@ocn=$1,$2,$3
- end
- else
- if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
- @text,@ocn=$1,$2
- end
- if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ #added 2002w06
- @text=/(.+?)/m.match(@para)[1]
- end
- if /^(\d)~\S*\s+(.+)/m.match(@para)
- @format,@text=$1,$2
- end
- end
- @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
- SiSU_XML_format::Format_scroll.new(@md,@format,@text,@ocn)
- else
- SiSU_XML_format::Format_scroll.new(@md,@format,@text,"<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>")
- end
- self
- end
def lev_segname_para
- if @para =~/^(\d~|<:.+?>).+/
- if /^([1-6])~(\S+)\s+(\S.+)/m.match(@para)
+ if @para =~/^#{Mx[:lv_o]}(?:\d:|<:.+?>).+/
+ if /^#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]}\s*(\S.+)/m.match(@para)
@format,segname,@text=$1,$2,$3
- @format="#@format~#{segname}" #
- elsif /^([1-6]~)\s+(\S.+)/m.match(@para)
+ elsif /^#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]}\s*(\S.+)/m.match(@para)
@format,@text=$1,$2
elsif /<:(.+?)>\s*(\S.+?)/m.match(@para)
@format,@text=$1,$2
- elsif /^([1-6])~(\S+)\s+(\S.+?)/m.match(@para)
+ elsif /^#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]}\s*(\S.+?)/m.match(@para)
@@alt_id_count+=1
@format,segname,@text=$1,$2,$3
- @format="#@format~#{segname}" #
- elsif /^([1-6]~)\s+(\S.+?)/m.match(@para)
+ #@format="#@format:#{segname}" #
+ elsif /^#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]}\s*(\S.+?)/m.match(@para)
@@alt_id_count+=1
@format,@text=$1,$2
end
@@ -135,17 +87,19 @@ module SiSU_text_parts
if /(.+?)/m.match(@para)
@text=$1
end
- if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ #added 2002w06
+ if @para !~/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$|^$/ #added 2002w06
@text=/(.+?)/m.match(@para)[1]
end
if /^(\d)~\S*\s+(.+)/m.match(@para)
@format,@text=$1,$2
end
end
- @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
+ @format="#@format:#{segname}" #
+#follow this search beneath for heading_body1-6
+ @lev_para_ocn=if @para =~/.+#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
SiSU_XML_format::Format_scroll.new(@md,@format,@text,@ocn)
else
- SiSU_XML_format::Format_scroll.new(@md,@format,@text,"<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>")
+ SiSU_XML_format::Format_scroll.new(@md,@format,@text,"#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}")
end
self
end
@@ -396,19 +350,40 @@ module SiSU_XML_munge
def markup(para='')
wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17
para=tidywords(wordlist).join(' ').strip
- para.gsub!(/(^|\s+)<\s+/,'\1&lt; '); para.gsub!(/\s+>(\s+|$)/,' &gt;\1')
- para.gsub!(/<:pb>\s*/,'')
+ para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'<br />')
+ para.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'')
+ para.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;')
+ para.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;')
+ para.gsub!(/(^|#{Mx[:gl_c]}|\s+)<\s+/,'\1&lt; '); para.gsub!(/\s+>(\s+|$)/,' &gt;\1')
+ #para.gsub!(/#{Mx[:fa_emphasis_o]}(.+?)#{Mx[:fa_emphasis_c]}/,'<em>\1</em>') #reinstate
+ para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'<b>\1</b>')
+ para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'<i>\1</i>')
+ para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<u>\1</u>')
+ para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'<sup>\1</sup>')
+ para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'<sub>\1</sub>')
+ para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'<ins>\1</ins>')
+ para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'<cite>\1</cite>')
+ para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'<del>\1</del>')
+ para.gsub!(/<:pb>\s*/,'') #Fix
para.gsub!(/<+[-~]#>+/,'')
- para.gsub!(/<0;\w\d+;[um]\d+><#@dp:#@dp>/,'')
- if para !~/^<:code>/
+ para.gsub!(/#{Mx[:id_o]}0;\w\d+;[um]\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'')
+ if para !~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/
#embeds a red-bullet image -->
+ para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'<b>\1</b>')
+ para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'<i>\1</i>')
+ para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<u>\1</u>')
+ para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'<del>\1</del>')
+ para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'<br />')
+ para.gsub!(/#{Mx[:br_page]}\s*/,'')
+ para.gsub!(/#{Mx[:br_page_new]}\s*/,'')
+ para.gsub!(/#{Mx[:pa_non_object_no_heading]}|#{Mx[:pa_non_object_dummy_heading]}/,''); para.gsub!(/<[-~]#>/,'')
para.gsub!(/(?:^|[^_\\])\{\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?\}(https?:\/\/\S+)/,
%{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1" width="\\2" height="\\3" />[\\1] \\4})
para.gsub!(/(?:^|[^_\\])\{\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?\}(https?:\/\/\S+)/,
%{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1"/>\\1})
- para.gsub!(/(^|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/,
+ para.gsub!(/(^|#{Mx[:gl_c]}|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/,
'\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\3">\2</link>\4') #watch, compare html_tune
- para.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,
+ para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,
%{\\1#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\2">\\2</link>#{@url_brace.xml_close}\\3})
para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,
'<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later
@@ -429,7 +404,7 @@ module SiSU_XML_munge
para.gsub!(/<br(\s*\/)?>/,'<br />')
para.gsub!(/<:pb>\s*/,'')
para.gsub!(/<[-~]#>/,'')
- para.gsub!(/(^|\s)&\s+/,'\1&amp; ') #sort
+ para.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1&amp; ') #sort
para.gsub!(/&([^;]{1,5})/,'&amp;\1') #sort, rough estimate, revisit #WATCH found in node not sax
para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/,
"<image.path>#{@dir.url.images_local}\/\\1</image.path>")
@@ -452,7 +427,7 @@ module SiSU_XML_munge
para.gsub!(/<:\S+?>/,'')
#<-- temporary
para.gsub!(/<[-~]#>/,'')
- para.gsub!(/(^|\s)&\s+/,'\1&amp; ') #sort
+ para.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1&amp; ') #sort
para.gsub!(/&([^;]{1,5})/,'&amp;\1') #sort, rough estimate, revisit #WATCH found in node not sax
para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/,
"<image.path>#{@dir.url.images_local}\/\\1</image.path>")
@@ -728,4 +703,3 @@ module SiSU_Tables
require "#{SiSU_lib}/xml_tables"
end
__END__
-