aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2008-02-24 21:05:47 +0000
committerRalph Amissah <ralph@amissah.com>2008-02-24 21:05:47 +0000
commitec9282e23d3262a2746f9837e0bc57e9aa7a48d5 (patch)
treef1fdab5e27bad144a1e2472a8242348a45d03914
parentmake possible to strip 0.66 markup in dal commit (diff)
enable conversion from sst to various forms of input xml and back including 0.66 tags
sst_to_s_xml (sax/dom/node) semantic xml mockup naive conversion of known abbreviated tags, in flux, e.g. sisu --to-sax autonomy_markup0.sst back to sst markup with: sisu --from-sxml autonomy_markup0.sxs.xml sst_to_s_dom fix to footnote/endnote conversion
-rw-r--r--lib/sisu/v0/shared_xml.rb89
-rw-r--r--lib/sisu/v0/sst_from_xml.rb7
-rw-r--r--lib/sisu/v0/sst_to_s_xml_dom.rb14
-rw-r--r--lib/sisu/v0/sst_to_s_xml_node.rb11
-rw-r--r--lib/sisu/v0/sst_to_s_xml_sax.rb10
5 files changed, 121 insertions, 10 deletions
diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb
index fd27c664..abc6cc1a 100644
--- a/lib/sisu/v0/shared_xml.rb
+++ b/lib/sisu/v0/shared_xml.rb
@@ -1,3 +1,4 @@
+# coding: utf-8
=begin
* Name: SiSU
@@ -154,10 +155,44 @@ module SiSU_XML_munge
class Trans
require "#{SiSU_lib}/defaults"
def initialize(md)
+ @md=md
@sys=SiSU_Env::System_call.new
- @dir=SiSU_Env::Info_env.new(md.fns)
+ @dir=SiSU_Env::Info_env.new(@md.fns)
@dp=SiSU_Env::Info_env.new.digest.pattern
@url_brace=SiSU_Viz::Skin.new.url_decoration
+ if @md.sem_tag
+ @ab ||=semantic_tags.default
+ end
+ end
+ def semantic_tags
+ def default
+ {
+ :pub => 'publication',
+ :ref => 'reference',
+ :desc => 'description',
+ :conv => 'convention',
+ :vol => 'volume',
+ :pg => 'page',
+ :ct => 'cite',
+ :cty => 'city',
+ :org => 'organization',
+ :d => 'date',
+ :t => 'title',
+ :a => 'author',
+ :n => 'name',
+ :fn => 'firstname',
+ :f => 'firstname',
+ :mn => 'middlename',
+ :m => 'middlename',
+ :ln => 'lastname',
+ :l => 'lastname',
+ :i => 'initials',
+ :q => 'quote',
+ :y => 'year',
+ :ab => 'abreviation',
+ }
+ end
+ self
end
def char_enc #character encode
def utf8(para='')
@@ -348,10 +383,8 @@ module SiSU_XML_munge
end
end
def markup(para='')
- #if para !~/^<:code>/
- wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17
- para=tidywords(wordlist).join(' ').strip
- #end
+ wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17
+ para=tidywords(wordlist).join(' ').strip
para.gsub!(/(^|\s+)<\s+/,'\1&lt; '); para.gsub!(/\s+>(\s+|$)/,' &gt;\1')
para.gsub!(/<:pb>\s*/,'')
para.gsub!(/<+[-~]#>+/,'')
@@ -426,6 +459,52 @@ module SiSU_XML_munge
para.gsub!(/&lt;(\/?en)&gt;/,'<\1>')
para
end
+ def xml_sem_block_paired(matched) # colon depth: many, recurs
+ matched.gsub!(/\b(a):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:a]} depth="many">\\2</sem:#{@ab[:a]}>}) # sem :
+ matched.gsub!(/\b(vol):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:vol]} depth="many">\\2</sem:#{@ab[:vol]}>}) # sem :
+ matched.gsub!(/\b(pub):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:pub]} depth="many">\\2</sem:#{@ab[:pub]}>}) # sem :
+ matched.gsub!(/\b(ref):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:ref]} depth="many">\\2</sem:#{@ab[:ref]}>}) # sem :
+ matched.gsub!(/\b(desc):\{(.+?)\}:\1\b/m,%{<sem:#{@ab[:desc]} depth="many">\\2</sem:#{@ab[:desc]}>}) # sem :
+ matched.gsub!(/\b(conv):\{(.+?)\}:\1\b/m,%{<sem:#{@ab[:conv]} depth="many">\\2</sem:#{@ab[:conv]}>}) # sem :
+ matched.gsub!(/\b(ct):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:ct]} depth="many">\\2</sem:#{@ab[:ct]}>}) # sem :
+ matched.gsub!(/\b(cty):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:cty]} depth="many">\\2</sem:#{@ab[:cty]}>}) # sem :
+ matched.gsub!(/\b(org):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:org]} depth="many">\\2</sem:#{@ab[:org]}>}) # sem :
+ matched.gsub!(/\b(d):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:d]} depth="many">\\2</sem:#{@ab[:d]}>}) # sem :
+ matched.gsub!(/\b(n):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:n]} depth="many">\\2</sem:#{@ab[:n]}>}) # sem :
+ matched.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m,'<sem:\1 depth="many">\2</sem:\1>') # sem :
+ end
+ def xml_semantic_tags(para)
+ if @md.sem_tag
+ para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem :
+ para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem :
+ para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem :
+ #colon one / single / flat / shallow
+ para.gsub!(/:\{(.+?)\}:a\b/m, %{<sem:#{@ab[:a]} depth="one">\\1</sem:#{@ab[:a]}>}) # sem :
+ para.gsub!(/:\{(.+?)\}:n\b/m, %{<sem:#{@ab[:n]} depth="one">\\1</sem:#{@ab[:n]}>}) # sem :
+ para.gsub!(/:\{(.+?)\}:t\b/m, %{<sem:#{@ab[:t]} depth="one">\\1</sem:#{@ab[:t]}>}) # sem :
+ para.gsub!(/:\{(.+?)\}:ref\b/m, %{<sem:#{@ab[:ref]} depth="one">\\1</sem:#{@ab[:ref]}>}) # sem :
+ para.gsub!(/:\{(.+?)\}:desc\b/m, %{<sem:#{@ab[:desc]} depth="one">\\1</sem:#{@ab[:desc]}>}) # sem :
+ para.gsub!(/:\{(.+?)\}:cty\b/m, %{<sem:#{@ab[:cty]} depth="one">\\1</sem:#{@ab[:cty]}>}) # sem :
+ para.gsub!(/:\{(.+?)\}:org\b/m, %{<sem:#{@ab[:org]} depth="one">\\1</sem:#{@ab[:org]}>}) # sem :
+ para.gsub!(/:\{(.+?)\}:([a-z]+(?:[_:.][a-z]+)*)/m,'<sem:\2 depth="one">\1</sem:\2>') # sem :
+ #semicolon zero / none
+ para.gsub!(/;\{([^}]+(?![;]))\};t\b/m, %{<sem:#{@ab[:t]} depth="zero">\\1</sem:#{@ab[:t]}>}) # sem ;
+ para.gsub!(/;\{([^}]+(?![;]))\};q\b/m, %{<sem:#{@ab[:q]} depth="zero">\\1</sem:#{@ab[:q]}>}) # sem ;
+ para.gsub!(/;\{([^}]+(?![;]))\};ref\b/m, %{<sem:#{@ab[:ref]} depth="zero">\\1</sem:#{@ab[:ref]}>}) # sem ;
+ para.gsub!(/;\{([^}]+(?![;]))\};desc\b/m,%{<sem:#{@ab[:desc]} depth="zero">\\1</sem:#{@ab[:desc]}>}) # sem ;
+ para.gsub!(/;\{([^}]+(?![;]))\};y\b/m, %{<sem:#{@ab[:y]} depth="zero">\\1</sem:#{@ab[:y]}>}) # sem ;
+ para.gsub!(/;\{([^}]+(?![;]))\};ab\b/m, %{<sem:#{@ab[:ab]} depth="zero">\\1</sem:#{@ab[:ab]}>}) # sem ;
+ para.gsub!(/;\{([^}]+(?![;]))\};pg\b/m, %{<sem:#{@ab[:pg]} depth="zero">\\1</sem:#{@ab[:pg]}>}) # sem ;
+ para.gsub!(/;\{([^}]+(?![;]))\};fn?\b/m, %{<sem:#{@ab[:fn]} depth="zero">\\1</sem:#{@ab[:fn]}>}) # sem ;
+ para.gsub!(/;\{([^}]+(?![;]))\};mn?\b/m, %{<sem:#{@ab[:mn]} depth="zero">\\1</sem:#{@ab[:mn]}>}) # sem ;
+ para.gsub!(/;\{([^}]+(?![;]))\};ln?\b/m, %{<sem:#{@ab[:ln]} depth="zero">\\1</sem:#{@ab[:ln]}>}) # sem ;
+ para.gsub!(/;\{([^}]+(?![;]))\};i\b/m, %{<sem:#{@ab[:i]} depth="zero">\\1</sem:#{@ab[:i]}>}) # sem ;
+ para.gsub!(/;\{([^}]+(?![;]))\};org\b/m, %{<sem:#{@ab[:org]} depth="zero">\\1</sem:#{@ab[:org]}>}) # sem ;
+ para.gsub!(/;\{([^}]+(?![;]))\};cty\b/m, %{<sem:#{@ab[:cty]} depth="zero">\\1</sem:#{@ab[:cty]}>}) # sem ;
+ para.gsub!(/;\{([^}]+(?![;]))\};([a-z]+(?:[_:.][a-z]+)*)/m,'<sem:\2 depth="zero">\1</sem:\2>') # sem ;
+ end
+ para
+ end
end
end
module SiSU_XML_tags #Format
diff --git a/lib/sisu/v0/sst_from_xml.rb b/lib/sisu/v0/sst_from_xml.rb
index d93e68b8..af43e611 100644
--- a/lib/sisu/v0/sst_from_xml.rb
+++ b/lib/sisu/v0/sst_from_xml.rb
@@ -1,3 +1,4 @@
+# coding: utf-8
=begin
* Name: SiSU
@@ -99,6 +100,10 @@ module SiSU_sst_from_xml
text.gsub!(/<i>(.+?)<\/i>/,'/{\1}/')
text.gsub!(/<b>(.+?)<\/b>/,'*{\1}*')
text.gsub!(/<u>(.+?)<\/u>/,'_{\1}_')
+ text.gsub!(/<sem:([a-z_]+)\s+depth=['"]zero['"]>(\s*.+?\s*)<\/sem:\1>/,';{ \2 };\1')
+ text.gsub!(/<sem:([a-z_]+)\s+depth=['"]one['"]>(\s*.+?\s*)<\/sem:\1>/,':{ \2 }:\1')
+ text.gsub!(/<sem:([a-z_]+)\s+depth=['"]many['"]>(\s*.+?\s*)<\/sem:\1>/,'\1:{ \2 }:\1')
+ text.gsub!(/<sem:([a-z_]+)>(\s*.+?\s*)<\/sem:\1>/,'\1:{ \2 }:\1')
text.gsub!(/\s +/,' ')
text.strip!
#text.gsub!(/<header>(.+?)<\/header/,"@#{x.name}: \\1\n\n")
@@ -126,7 +131,7 @@ module SiSU_sst_from_xml
sax
end
def dom
- raise "#{__FILE}::#{__LINE__} xml dom representation to sst not yet implemented (experimental simple xml representations sax and node to sst are in place)."
+ raise "#{__FILE__}::#{__LINE__} xml dom representation to sst not yet implemented (experimental simple xml representations sax and node to sst are in place)."
end
def xml_to_sisu
unless @opt.files.empty?
diff --git a/lib/sisu/v0/sst_to_s_xml_dom.rb b/lib/sisu/v0/sst_to_s_xml_dom.rb
index a1c81532..f9c190bf 100644
--- a/lib/sisu/v0/sst_to_s_xml_dom.rb
+++ b/lib/sisu/v0/sst_to_s_xml_dom.rb
@@ -1,3 +1,4 @@
+# coding: utf-8
=begin
* Name: SiSU
@@ -161,7 +162,7 @@ module SiSU_simple_xml_model_dom
para.gsub!(/~\{([*+]+)\s+(.+?)\}~/,
'<endnote><symbol>\1</symbol><note>\2</note></endnote> ')
para.gsub!(/~\{(.+?)\}~/,
- '<endnote><note>\2</note></endnote> ')
+ '<endnote><note>\1</note></endnote> ')
end
def xml_head(meta)
txt=meta.text
@@ -356,7 +357,7 @@ WOK
(0..6).each { |x| @cont[x]=@level[x]=false }
(4..6).each { |x| @xml_contents_close[x]='' }
data.each do |para|
- para=SiSU_sem::Tags.new(para).rm.all
+ #para=SiSU_sem::Tags.new(para,@md).rm.all
wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17
para=tidywords(wordlist).join(' ').strip
para.gsub!(/<[-~]#>/,'')
@@ -546,10 +547,19 @@ WOK
include SiSU_Env
def initialize(data,md)
@data,@md=data,md
+ @trans=SiSU_XML_munge::Trans.new(@md)
end
def xml
@sisu=[]
@data.each do |para|
+ if para !~/^\s*(?:%+ |<:code>)/
+ if @md.sem_tag and para =~/[:;]\{|\}[:;]/
+ para=@trans.xml_semantic_tags(para)
+ end
+ if para =~/[:;]\{|\}[:;]/
+ para=SiSU_sem::Tags.new(para,@md).rm.all
+ end
+ end
para.gsub!(/\/\{(.+?)\}\//,'<italic>\1</italic>')
para.gsub!(/\*\{(.+?)\}\*/,'<bold>\1</bold>')
para.gsub!(/!\{(.+?)\}!/,'<bold>\1</bold>')
diff --git a/lib/sisu/v0/sst_to_s_xml_node.rb b/lib/sisu/v0/sst_to_s_xml_node.rb
index b6d7c9b2..a2656e3e 100644
--- a/lib/sisu/v0/sst_to_s_xml_node.rb
+++ b/lib/sisu/v0/sst_to_s_xml_node.rb
@@ -1,3 +1,4 @@
+# coding: utf-8
=begin
* Name: SiSU
@@ -399,7 +400,6 @@ WOK
@data=@data.join.split("\n\n")
@data=SiSU_document_structure::Code.new(@md,@data).code
@data.each do |para|
- para=SiSU_sem::Tags.new(para).rm.all
data << SiSU_document_structure::Structure.new(@md,para).structure
end
data=Syntax::Markup.new(@md,data).songsheet
@@ -416,6 +416,15 @@ WOK
obj.each do |o|
para=o.txt unless o.txt =~/^%% / #comments are lost, consider
if para
+ if para !~/^\s*(?:%+ |<:code>)/
+ if @md.sem_tag and para =~/[:;]\{|\}[:;]/
+ para=@trans.xml_semantic_tags(para)
+ end
+ if para =~/[:;]\{|\}[:;]/
+ para=SiSU_sem::Tags.new(para,@md).rm.all
+ end
+ end
+ para=SiSU_sem::Tags.new(para,@md).rm.all
para=@trans.markup_light(para)
@trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8
if para =~/\A(?:@|0~)(\S+?):?\s+(.+?)\Z/m # for headers
diff --git a/lib/sisu/v0/sst_to_s_xml_sax.rb b/lib/sisu/v0/sst_to_s_xml_sax.rb
index 5e4eb5ea..4625779d 100644
--- a/lib/sisu/v0/sst_to_s_xml_sax.rb
+++ b/lib/sisu/v0/sst_to_s_xml_sax.rb
@@ -1,3 +1,4 @@
+# coding: utf-8
=begin
* Name: SiSU
@@ -262,7 +263,14 @@ WOK
data << SiSU_document_structure::Structure.new(@md,para).structure
end
data.each do |para|
- para=SiSU_sem::Tags.new(para).rm.all
+ if para !~/^\s*(?:%+ |<:code>)/
+ if @md.sem_tag and para =~/[:;]\{|\}[:;]/
+ para=@trans.xml_semantic_tags(para)
+ end
+ if para =~/[:;]\{|\}[:;]/
+ para=SiSU_sem::Tags.new(para,@md).rm.all
+ end
+ end
para=@trans.markup_light(para)
@trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8
if para =~/\A(?:@|0~)(\S+?):?\s+(.+?)\Z/m # for headers