From ec9282e23d3262a2746f9837e0bc57e9aa7a48d5 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sun, 24 Feb 2008 21:05:47 +0000 Subject: enable conversion from sst to various forms of input xml and back including 0.66 tags sst_to_s_xml (sax/dom/node) semantic xml mockup naive conversion of known abbreviated tags, in flux, e.g. sisu --to-sax autonomy_markup0.sst back to sst markup with: sisu --from-sxml autonomy_markup0.sxs.xml sst_to_s_dom fix to footnote/endnote conversion --- lib/sisu/v0/shared_xml.rb | 89 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 84 insertions(+), 5 deletions(-) (limited to 'lib/sisu/v0/shared_xml.rb') diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index fd27c664..abc6cc1a 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -1,3 +1,4 @@ +# coding: utf-8 =begin * Name: SiSU @@ -154,10 +155,44 @@ module SiSU_XML_munge class Trans require "#{SiSU_lib}/defaults" def initialize(md) + @md=md @sys=SiSU_Env::System_call.new - @dir=SiSU_Env::Info_env.new(md.fns) + @dir=SiSU_Env::Info_env.new(@md.fns) @dp=SiSU_Env::Info_env.new.digest.pattern @url_brace=SiSU_Viz::Skin.new.url_decoration + if @md.sem_tag + @ab ||=semantic_tags.default + end + end + def semantic_tags + def default + { + :pub => 'publication', + :ref => 'reference', + :desc => 'description', + :conv => 'convention', + :vol => 'volume', + :pg => 'page', + :ct => 'cite', + :cty => 'city', + :org => 'organization', + :d => 'date', + :t => 'title', + :a => 'author', + :n => 'name', + :fn => 'firstname', + :f => 'firstname', + :mn => 'middlename', + :m => 'middlename', + :ln => 'lastname', + :l => 'lastname', + :i => 'initials', + :q => 'quote', + :y => 'year', + :ab => 'abreviation', + } + end + self end def char_enc #character encode def utf8(para='') @@ -348,10 +383,8 @@ module SiSU_XML_munge end end def markup(para='') - #if para !~/^<:code>/ - wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 - para=tidywords(wordlist).join(' ').strip - #end + wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 + para=tidywords(wordlist).join(' ').strip para.gsub!(/(^|\s+)<\s+/,'\1< '); para.gsub!(/\s+>(\s+|$)/,' >\1') para.gsub!(/<:pb>\s*/,'') para.gsub!(/<+[-~]#>+/,'') @@ -426,6 +459,52 @@ module SiSU_XML_munge para.gsub!(/<(\/?en)>/,'<\1>') para end + def xml_sem_block_paired(matched) # colon depth: many, recurs + matched.gsub!(/\b(a):\{(.+?)\}:\1\b/m, %{\\2}) # sem : + matched.gsub!(/\b(vol):\{(.+?)\}:\1\b/m, %{\\2}) # sem : + matched.gsub!(/\b(pub):\{(.+?)\}:\1\b/m, %{\\2}) # sem : + matched.gsub!(/\b(ref):\{(.+?)\}:\1\b/m, %{\\2}) # sem : + matched.gsub!(/\b(desc):\{(.+?)\}:\1\b/m,%{\\2}) # sem : + matched.gsub!(/\b(conv):\{(.+?)\}:\1\b/m,%{\\2}) # sem : + matched.gsub!(/\b(ct):\{(.+?)\}:\1\b/m, %{\\2}) # sem : + matched.gsub!(/\b(cty):\{(.+?)\}:\1\b/m, %{\\2}) # sem : + matched.gsub!(/\b(org):\{(.+?)\}:\1\b/m, %{\\2}) # sem : + matched.gsub!(/\b(d):\{(.+?)\}:\1\b/m, %{\\2}) # sem : + matched.gsub!(/\b(n):\{(.+?)\}:\1\b/m, %{\\2}) # sem : + matched.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m,'\2') # sem : + end + def xml_semantic_tags(para) + if @md.sem_tag + para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : + para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : + para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : + #colon one / single / flat / shallow + para.gsub!(/:\{(.+?)\}:a\b/m, %{\\1}) # sem : + para.gsub!(/:\{(.+?)\}:n\b/m, %{\\1}) # sem : + para.gsub!(/:\{(.+?)\}:t\b/m, %{\\1}) # sem : + para.gsub!(/:\{(.+?)\}:ref\b/m, %{\\1}) # sem : + para.gsub!(/:\{(.+?)\}:desc\b/m, %{\\1}) # sem : + para.gsub!(/:\{(.+?)\}:cty\b/m, %{\\1}) # sem : + para.gsub!(/:\{(.+?)\}:org\b/m, %{\\1}) # sem : + para.gsub!(/:\{(.+?)\}:([a-z]+(?:[_:.][a-z]+)*)/m,'\1') # sem : + #semicolon zero / none + para.gsub!(/;\{([^}]+(?![;]))\};t\b/m, %{\\1}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};q\b/m, %{\\1}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};ref\b/m, %{\\1}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};desc\b/m,%{\\1}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};y\b/m, %{\\1}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};ab\b/m, %{\\1}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};pg\b/m, %{\\1}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};fn?\b/m, %{\\1}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};mn?\b/m, %{\\1}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};ln?\b/m, %{\\1}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};i\b/m, %{\\1}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};org\b/m, %{\\1}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};cty\b/m, %{\\1}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};([a-z]+(?:[_:.][a-z]+)*)/m,'\1') # sem ; + end + para + end end end module SiSU_XML_tags #Format -- cgit v1.2.3