aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v0/xml.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/v0/xml.rb')
-rw-r--r--lib/sisu/v0/xml.rb91
1 files changed, 45 insertions, 46 deletions
diff --git a/lib/sisu/v0/xml.rb b/lib/sisu/v0/xml.rb
index 23125e05..9f897266 100644
--- a/lib/sisu/v0/xml.rb
+++ b/lib/sisu/v0/xml.rb
@@ -118,7 +118,7 @@ module SiSU_XML_SAX
@env,@md,@dal_array=particulars.env,particulars.md,particulars.dal_array
@vz=SiSU_Env::Get_init.instance.skin
@dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern
- @regx=/^(?:(?:<:p[bn]>\s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
+ @regx=/^(?:(?:#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}\s*)?#{Mx[:lv_o]}\d:(\S*?)#{Mx[:lv_c]})?\s*(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
@tab="\t"
@trans=SiSU_XML_munge::Trans.new(@md)
@sys=SiSU_Env::System_call.new
@@ -130,16 +130,16 @@ module SiSU_XML_SAX
publish
end
protected
- def embedded_endntoes(para='')
- para.gsub!(/~\{(\d+)\s+(.+?)\s*<#@dp>\}~/,
+ def embedded_endnotes(para='')
+ para.gsub!(/#{Mx[:en_a_o]}(\d+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/,
'<endnote><number>\1</number><note>\2</note></endnote> ')
- para.gsub!(/~\[([*+]\d+)\s+(.+?)\s*<#@dp>\]~/,
+ para.gsub!(/#{Mx[:en_b_o]}([*+]\d+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_b_c]}/,
'<endnote><symbol>\1</symbol><note>\2</note></endnote> ')
- para.gsub!(/~\{([*+]+)\s+(.+?)\s*<#@dp>\}~/,
+ para.gsub!(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/,
'<endnote><symbol>\1</symbol><note>\2</note></endnote> ')
end
def extract_endnotes(para='')
- notes=para.scan(/~[{\[]([\d*+]+\s+.+?)\s*<#@dp>[}\]]~/)
+ notes=para.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/)
notes.flatten.each do |e|
s=e.to_s
util=SiSU_text_utils::Wrap.new(s,70)
@@ -176,7 +176,7 @@ WOK
end
def xml_head(meta)
txt=meta.text
- txt.gsub!(/<br(?: \/)?>/,'')
+ txt.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,' ')
txt.gsub!(/ & /,' and ')
el=meta.el.gsub(/\./,'_')
el_txt=meta.el.gsub(/\./,' ')
@@ -219,7 +219,8 @@ WOK
n3=lv + 2
lv=nil if lv == 0
extract_endnotes(para)
- para.gsub!(/~[{\[]([\d*+]+)\s+(?:.+?)\s*<#@dp>[}\]]~/,'<en>\1</en>') #footnote/endnote clean
+ para.gsub!(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/,'<en>\1</en>') #footnote/endnote clean
+ para.gsub!(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_b_c]}/,'<en>\1</en>') #footnote/endnote clean
if para[@regx]
paragraph="#{para[@regx,2]}"
util=SiSU_text_utils::Wrap.new(paragraph,70)
@@ -238,9 +239,10 @@ WOK
@endnotes=[]
end
def group_structure(para='',ocn='')
- para.gsub!(/<:group(?:-end)?>/,'')
+ para.gsub!(/#{Mx[:gr_o]}group(?:-end)?#{Mx[:gr_c]}/,'')
extract_endnotes(para)
- para.gsub!(/~[{\[]([\d*+]+)\s+(?:.+?)\s*<#@dp>[}\]]~/,'<en>\1</en>') #footnote/endnote clean
+ para.gsub!(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/,'<en>\1</en>') #footnote/endnote clean
+ para.gsub!(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_b_c]}/,'<en>\1</en>') #footnote/endnote clean
para=@trans.markup_group(para)
para.strip!
@@xml[:body] << %{#{@tab*0}<object id="#{ocn}">} << "\n"
@@ -253,7 +255,7 @@ WOK
@endnotes=[]
end
def poem_structure(para='',ocn='')
- para.gsub!(/<:verse(?:-end)?>/,'')
+ para.gsub!(/#{Mx[:gr_o]}verse(?:-end)?#{Mx[:gr_c]}/,'')
para=@trans.markup_group(para)
#para.gsub!(/\s\s/,'&#160;&#160;')
para.strip!
@@ -265,7 +267,7 @@ WOK
@@xml[:body] << "#{@tab*0}</object>" << "\n"
end
def code_structure(para='',ocn='')
- para.gsub!(/<:code(?:-end)?>/,'')
+ para.gsub!(/#{Mx[:gr_o]}code(?:-end)?#{Mx[:gr_c]}/,'')
para=@trans.markup_group(para)
para.gsub!(/\s\s/,'&#160;&#160;')
para.strip!
@@ -291,82 +293,79 @@ WOK
data.each do |para|
para=@trans.markup(para)
@trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8
- if para =~/^0~(\S+)\s+(.+?)$/ # for headers
+ if para =~/^#{Rx[:meta]}\s*.+?$/ # for headers
d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta
if d_meta; xml_head(d_meta)
end
end
if @rcdc==false \
- and (para =~/~metadata/ or para =~/1~meta\s+Document Information/)
+ and (para =~/~metadata/ \
+ or para =~/#{Mx[:lv_o]}1:meta#{Mx[:lv_c]}\s*Document Information/)
@rcdc=true
end
- if para !~/(^0~|<ENDNOTES>|<EOF>)/
- if para =~/.+?<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
+ if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|#{Mx[:br_endnotes]})/
+ if para =~/.+?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
paranum=para[@regx,3]
@p_num=SiSU_XML_format::Paragraph_number.new(@md,paranum)
end
- @sto=SiSU_text_parts::Split_text_object.new(@md,para).lev_segname_para_ocn
- ### problem in scroll, it appears tables are getting paragraph numbers
+ @sto=SiSU_text_parts::Split_text_object.new(@md,para).xml
+ #@sto=SiSU_text_parts::Split_text_object.new(@md,para).lev_segname_para_ocn
unless @rcdc
- m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
+ m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
if para =~m
- format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[12]|null/
+ format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[1-9]|null/
case @sto.format
- when /^(1)~(?:(\S+))?/
+ when /^(1):(\S*)/
xml_structure(para,$1,@sto.ocn,$2)
para=@sto.lev_para_ocn.heading_body1
- when /^(2)~(?:(\S+))?/
+ when /^(2):(\S*)/
xml_structure(para,$1,@sto.ocn,$2)
para=@sto.lev_para_ocn.heading_body2
- when /^(3)~(?:(\S+))?/
+ when /^(3):(\S*)/
xml_structure(para,$1,@sto.ocn,$2)
para=@sto.lev_para_ocn.heading_body3
- when /^(4)~(\S+)/ # work on see Split_text_object
+ when /^(4):(\S+)/ # work on see Split_text_object
xml_structure(para,$1,@sto.ocn,$2)
para=@sto.lev_para_ocn.heading_body4
- when /^(5)~(?:(\S+))?/
+ when /^(5):(\S*)/
xml_structure(para,$1,@sto.ocn,$2)
para=@sto.lev_para_ocn.heading_body5
- when /^(6)~(?:(\S+))?/
+ when /^(6):(\S*)/
xml_structure(para,$1,@sto.ocn,$2)
para=@sto.lev_para_ocn.heading_body6
else
- matched=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/mi.match(para)
+ matched=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/mi.match(para)
stamp,ocn=matched[0],matched[1]
- if para =~ /<:verse>/
+ if para =~ /#{Mx[:gr_o]}verse#{Mx[:gr_c]}/
para.gsub!(/#{stamp}/,'')
poem_structure(para,ocn)
- elsif para =~ /<:group>/
+ elsif para =~ /#{Mx[:gr_o]}group#{Mx[:gr_c]}/
para.gsub!(/#{stamp}/,'')
group_structure(para,ocn)
- elsif para =~ /<:code>/
+ elsif para =~ /#{Mx[:gr_o]}code#{Mx[:gr_c]}/
para.gsub!(/#{stamp}/,'')
code_structure(para,ocn)
- elsif para =~/<!Th?.+/ # tables come as single block #work area 2005w13
+ elsif para =~/#{Mx[:gr_o]}Th?.+/ # tables come as single block #work area 2005w13
table=SiSU_Tables::Table_xml.new(para,ocn)
para=table.table_split
table_structure(para)
- elsif para =~ /^\s*(?:<:i([1-9])> )?_\*/ #uncomment
+ elsif para =~ /^\s*(?:#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]})?#{Mx[:gl_bullet]}/ #uncomment
m=$1
- para.gsub!(/^(\s*(?:<:i[1-9]> )?)_\*/,'\1')
+ para.gsub!(/^(\s*(?:#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]})?)#{Mx[:gl_bullet]}/,'\1')
xml_structure(para,nil,nil,nil,"indent_bullet#{m}")
- elsif para =~ /<:i([1-9])>/
+ elsif para =~ /#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/
xml_structure(para,nil,nil,nil,"indent#{$1}")
else xml_structure(para,nil,nil,nil)
end
- #@@xml[:body] << "#{@tab*6}<object>" << "\n" if para[@regx]
- #@@xml[:body] << "#{@tab*7}<ocn>#{para[@regx,3]}</ocn>" << "\n" if para[@regx,3]
- #@@xml[:body] << "#{@tab*7}<text>#{para[@regx,2]}</text>\n" if para[@regx,2] # main text, contents, body KEEP
- #@@xml[:body] << "#{@tab*6}</object>" << "\n" if para[@regx]
end
- elsif para =~/(Note|Endnotes?)/ \
- and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
+ elsif para =~/(#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ \
+ and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
elsif para =~/(MetaData)/ \
- and para =~/<~(\d+);[m]\d+;\w\d+><#@dp:#@dp>$/ #debug 2003w46 add rc info
+ and para =~/#{Mx[:id_o]}~\d+;[m]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #debug 2003w46 add rc info
format_scroll=Format_scroll.new(@md,'<br /><a name="metadata">MetaData</a>')
para=format_scroll.bold_para
elsif para =~/(Owner Details)/ \
- and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
+ and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
format_scroll=Format_scroll.new(@md,'<br /><a name="owner.details">Owner Details</a>')
@@xml[:owner_details]=format_scroll.bold_para
para=''
@@ -379,8 +378,8 @@ WOK
and para =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/ # -endnote
para=''
end
- if para =~/.*<:#>.*$/
- para=if para =~ /<:i[1-9]>/
+ if para =~/.*<:#>.*$/ #investigate removal
+ para=if para =~ /#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/
format_text=Format_text_object.new(para,'')
format_text.scr_inden_ocn_e_no_paranum
end
@@ -395,7 +394,7 @@ WOK
end
else #
end
- para.gsub!(/<:\S+?>|<!.+!>/,'') if para
+ para.gsub!(/#{Mx[:pa_o]}:\S+#{Mx[:pa_c]}/,'') if para
end
end
6.downto(4) do |x|
@@ -451,7 +450,7 @@ WOK
SiSU_Env::SiSU_file.new(@md).mkdir
filename_xml=SiSU_Env::SiSU_file.new(@md,@md.fn[:sax]).mkfile
@data.each do |para|
- para.gsub!(/<:\S+?>|<!.+?!>/,'')
+ para.gsub!(/#{Mx[:pa_o]}:\S+#{Mx[:pa_c]}/,'') #; para.gsub!(/<:\S+?>|<!.+?!>/,'')
para.gsub!(/^\s*\n$/,'')
filename_xml.puts para unless para.empty?
end