diff options
Diffstat (limited to 'lib/sisu/v1/shared_xml.rb')
-rw-r--r-- | lib/sisu/v1/shared_xml.rb | 739 |
1 files changed, 0 insertions, 739 deletions
diff --git a/lib/sisu/v1/shared_xml.rb b/lib/sisu/v1/shared_xml.rb deleted file mode 100644 index de1f7266..00000000 --- a/lib/sisu/v1/shared_xml.rb +++ /dev/null @@ -1,739 +0,0 @@ -# coding: utf-8 -=begin - - * Name: SiSU - - * Description: a framework for document structuring, publishing and search - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see <http://www.gnu.org/licenses/>. - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - <http://www.fsf.org/licensing/licenses/gpl.html> - <http://www.gnu.org/licenses/gpl.html> - - <http://www.jus.uio.no/sisu/gpl.fsf/toc.html> - <http://www.jus.uio.no/sisu/gpl.fsf/doc.html> - <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt> - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - <http://www.jus.uio.no/sisu> - <http://www.sisudoc.org> - - * Download: - <http://www.jus.uio.no/sisu/SiSU/download.html> - - * Ralph Amissah - <ralph@amissah.com> - <ralph.amissah@gmail.com> - - ** Description: common file for xml generation - -=end -module SiSU_text_parts - require "#{SiSU_lib}/shared_structure" - class Split_text_object < SiSU_Structure::Split_text_object - require "#{SiSU_lib}/param" - require "#{SiSU_lib}/xml_format" - include SiSU_Viz - include SiSU_XML_format - @@alt_id_count=0 - @@dp=nil - def lev_segname_para - if @para =~/^#{Mx[:lv_o]}(?:\d:|<:.+?>).+/ - if /^#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]}\s*(\S.+)/m.match(@para) - @format,segname,@text=$1,$2,$3 - elsif /^#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]}\s*(\S.+)/m.match(@para) - @format,@text=$1,$2 - elsif /<:(.+?)>\s*(\S.+?)/m.match(@para) - @format,@text=$1,$2 - elsif /^#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]}\s*(\S.+?)/m.match(@para) - @@alt_id_count+=1 - @format,segname,@text=$1,$2,$3 - #@format="#@format:#{segname}" # - elsif /^#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]}\s*(\S.+?)/m.match(@para) - @@alt_id_count+=1 - @format,@text=$1,$2 - end - else - if /(.+?)/m.match(@para) - @text=$1 - end - if @para !~/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$|^$/ #added 2002w06 - @text=/(.+?)/m.match(@para)[1] - end - if /^(\d)~\S*\s+(.+)/m.match(@para) - @format,@text=$1,$2 - end - end - @format="#@format:#{segname}" # -#follow this search beneath for heading_body1-6 - @lev_para_ocn=if @para =~/.+#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ - t_o={:format=>@format,:txt=>@text,:ocn=>@ocn} #(@format,@text,@ocn) - SiSU_XML_format::Format_scroll.new(@md,t_o) - else - t_o={:format=>@format,:txt=>@text,:ocn=>0} #(@format,@text,@ocn) - SiSU_XML_format::Format_scroll.new(@md,t_o) - #SiSU_XML_format::Format_scroll.new(@md,@format,@text,"#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}") - end - self - end - end -end -module SiSU_XML_munge - class Trans - require "#{SiSU_lib}/defaults" - def initialize(md) - @md=md - @sys=SiSU_Env::System_call.new - @dir=SiSU_Env::Info_env.new(@md.fns) - @dp=SiSU_Env::Info_env.new.digest.pattern - @url_brace=SiSU_Viz::Skin.new.url_decoration - if @md.sem_tag - #@ab ||=SiSU_Viz::Skin.new.semantic_tags.default - @ab ||=semantic_tags.default - end - end - def semantic_tags - def default - { - :pub => 'publication', - :conv => 'convention', - :vol => 'volume', - :pg => 'page', - :cty => 'city', - :org => 'organization', - :uni => 'university', - :dept => 'department', - :fac => 'faculty', - :inst => 'institute', - :co => 'company', - :com => 'company', - :conv => 'convention', - :dt => 'date', - :y => 'year', - :m => 'month', - :d => 'day', - :ti => 'title', - :au => 'author', - :ed => 'editor', #editor? - :v => 'version', #edition - :n => 'name', - :fn => 'firstname', - :mn => 'middlename', - :ln => 'lastname', - :in => 'initials', - :qt => 'quote', - :ct => 'cite', - :ref => 'reference', - :ab => 'abreviation', - :def => 'define', - :desc => 'description', - :trans => 'translate', - } - end - self - end - def char_enc #character encode - def utf8(para='') - if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn - #¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûü - #¢£¥§©ª«®°±²³µ¶¹º»¼½¾×÷ - ##para.gsub!(//, '&#;') - ##para.gsub!(//, '&;') - para.gsub!(/</u, '<') # '<' # < - para.gsub!(/>/u, '>') # '>' # > - para.gsub!(/¢/u, '¢') # '¢' # ¢ - para.gsub!(/£/u, '£') # '£' # £ - para.gsub!(/¥/u, '¥') # '¥' # ¥ - para.gsub!(/§/u, '§') # '§' # § - para.gsub!(/©/u, '©') # '©' # © - para.gsub!(/ª/u, 'ª') # 'ª' # ª - para.gsub!(/«/u, '«') # '«' # « - para.gsub!(/®/u, '®') # '®' # ® - para.gsub!(/°/u, '°') # '°' # ° - para.gsub!(/±/u, '±') # '±' # ± - para.gsub!(/²/u, '²') # '²' # ² - para.gsub!(/³/u, '³') # '³' # ³ - para.gsub!(/µ/u, 'µ') # 'µ' # µ - para.gsub!(/¶/u, '¶') # '¶' # ¶ - para.gsub!(/¹/u, '¹') # '¹' # ¹ - para.gsub!(/º/u, 'º') # 'º' # º - para.gsub!(/»/u, '»') # '»' # » - para.gsub!(/¼/u, '¼') # '¼' # ¼ - para.gsub!(/½/u, '½') # '½' # ½ - para.gsub!(/¾/u, '¾') # '¾' # ¾ - para.gsub!(/×/u, '×') # '×' # × - para.gsub!(/÷/u, '÷') # '÷' # ÷ - para.gsub!(/¿/u, '¿') # '¿' # ¿ - para.gsub!(/À/u, 'À') # 'À' # À - para.gsub!(/Á/u, 'Á') # 'Á' # Á - para.gsub!(/Â/u, 'Â') # 'Â' #  - para.gsub!(/Ã/u, 'Ã') # 'Ã' # à - para.gsub!(/Ä/u, 'Ä') # 'Ä' # Ä - para.gsub!(/Å/u, 'Å') # 'Å' # Å - para.gsub!(/Æ/u, 'Æ') # 'Æ' # Æ - para.gsub!(/Ç/u, 'Ç') # 'Ç' # Ç - para.gsub!(/È/u, 'È') # 'È' # È - para.gsub!(/É/u, 'É') # 'É' # É - para.gsub!(/Ê/u, 'Ê') # 'Ê' # Ê - para.gsub!(/Ë/u, 'Ë') # 'Ë' # Ë - para.gsub!(/Ì/u, 'Ì') # 'Ì' # Ì - para.gsub!(/Í/u, 'Í') # 'Í' # Í - para.gsub!(/Î/u, 'Î') # 'Î' # Î - para.gsub!(/Ï/u, 'Ï') # 'Ï' # Ï - para.gsub!(/Ð/u, 'Ð') # 'Ð' # Ð - para.gsub!(/Ñ/u, 'Ñ') # 'Ñ' # Ñ - para.gsub!(/Ò/u, 'Ò') # 'Ò' # Ò - para.gsub!(/Ó/u, 'Ó') # 'Ó' # Ó - para.gsub!(/Ô/u, 'Ô') # 'Ô' # Ô - para.gsub!(/Õ/u, 'Õ') # 'Õ' # Õ - para.gsub!(/Ö/u, 'Ö') # 'Ö' # Ö - para.gsub!(/Ø/u, 'Ø') # 'Ø' # Ø - para.gsub!(/Ù/u, 'Ù') # 'Ù' # Ù - para.gsub!(/Ú/u, 'Ú') # 'Ú' # Ú - para.gsub!(/Û/u, 'Û') # 'Û' # Û - para.gsub!(/Ü/u, 'Ü') # 'Ü' # Ü - para.gsub!(/Ý/u, 'Ý') # 'Ý' # Ý - para.gsub!(/Þ/u, 'Þ') # 'Þ' # Þ - para.gsub!(/ß/u, 'ß') # 'ß' # ß - para.gsub!(/à/u, 'à') # 'à' # à - para.gsub!(/á/u, 'á') # 'á' # á - para.gsub!(/â/u, 'â') # 'â' # â - para.gsub!(/ã/u, 'ã') # 'ã' # ã - para.gsub!(/ä/u, 'ä') # 'ä' # ä - para.gsub!(/å/u, 'å') # 'å' # å - para.gsub!(/æ/u, 'æ') # 'æ' # æ - para.gsub!(/ç/u, 'ç') # 'ç' # ç - para.gsub!(/è/u, 'è') # 'è' # è - para.gsub!(/é/u, 'é') # '´' # é - para.gsub!(/ê/u, 'ê') # 'ˆ' # ê - para.gsub!(/ë/u, 'ë') # 'ë' # ë - para.gsub!(/ì/u, 'ì') # 'ì' # ì - para.gsub!(/í/u, 'í') # '´' # í - para.gsub!(/î/u, 'î') # 'î' # î - para.gsub!(/ï/u, 'ï') # 'ï' # ï - para.gsub!(/ð/u, 'ð') # 'ð' # ð - para.gsub!(/ñ/u, 'ñ') # 'ñ' # ñ - para.gsub!(/ò/u, 'ò') # 'ò' # ò - para.gsub!(/ó/u, 'ó') # 'ó' # ó - para.gsub!(/ô/u, 'ô') # 'ô' # ô - para.gsub!(/õ/u, 'õ') # 'õ' # õ - para.gsub!(/ö/u, 'ö') # 'ö' # ö - para.gsub!(/ø/u, 'ø') # 'ø' # ø - para.gsub!(/ù/u, 'ú') # 'ù' # ú - para.gsub!(/ú/u, 'û') # 'ú' # û - para.gsub!(/û/u, 'ü') # 'û' # ü - para.gsub!(/ü/u, 'ý') # 'ü' # ý - para.gsub!(/þ/u, 'þ') # 'þ' # þ - para.gsub!(/ÿ/u, 'ÿ') # 'ÿ' # ÿ - para.gsub!(/‘/u, '‘') # '‘' # ‘ - para.gsub!(/’/u, '’') # '’' # ’ - para.gsub!(/“/u, '“') # “ # “ - para.gsub!(/”/u, '”') # ” # ” - para.gsub!(/–/u, '–') # – # – - para.gsub!(/—/u, '—') # — # — - para.gsub!(/∝/u, '∝') # ∝ # ∝ - para.gsub!(/∞/u, '∞') # ∞ # ∞ - para.gsub!(/™/u, '™') # ™ # ™ - para.gsub!(/✠/u, '✠') # ✗ # ✠ - para.gsub!(/ /u, ' ') # space identify - para.gsub!(/ /u, ' ') # space identify - end - end - def html(para='') - if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn - para.gsub!(/ /u, ' ') # space identify - para.gsub!(/ /u, ' ') # space identify - else - para.gsub!(/¢/u, '¢') # ¢ - para.gsub!(/£/u, '£') # £ - para.gsub!(/¥/u, '¥') # ¥ - para.gsub!(/§/u, '§') # § - para.gsub!(/©/u, '©') # © - para.gsub!(/ª/u, 'ª') # ª - para.gsub!(/«/u, '«') # « - para.gsub!(/®/u, '®') # ® - para.gsub!(/°/u, '°') # ° - para.gsub!(/±/u, '±') # ± - para.gsub!(/²/u, '²') # ² - para.gsub!(/³/u, '³') # ³ - para.gsub!(/µ/u, 'µ') # µ - para.gsub!(/¶/u, '¶') # ¶ - para.gsub!(/¹/u, '¹') # ¹ - para.gsub!(/º/u, 'º') # º - para.gsub!(/»/u, '»') # » - para.gsub!(/¼/u, '¼') # ¼ - para.gsub!(/½/u, '½') # ½ - para.gsub!(/¾/u, '¾') # ¾ - para.gsub!(/×/u, '×') # × - para.gsub!(/÷/u, '÷') # ÷ - para.gsub!(/¿/u, '¿') # ¿ - para.gsub!(/À/u, 'À') # À - para.gsub!(/Á/u, 'Á') # Á - para.gsub!(/Â/u, 'Â') #  - para.gsub!(/Ã/u, 'Ã') # à - para.gsub!(/Ä/u, 'Ä') # Ä - para.gsub!(/Å/u, 'Å') # Å - para.gsub!(/Æ/u, 'Æ') # Æ - para.gsub!(/Ç/u, 'Ç') # Ç - para.gsub!(/È/u, 'È') # È - para.gsub!(/É/u, 'É') # É - para.gsub!(/Ê/u, 'Ê') # Ê - para.gsub!(/Ë/u, 'Ë') # Ë - para.gsub!(/Ì/u, 'Ì') # Ì - para.gsub!(/Í/u, 'Í') # Í - para.gsub!(/Î/u, 'Î') # Î - para.gsub!(/Ï/u, 'Ï') # Ï - para.gsub!(/Ð/u, 'Ð') # Ð - para.gsub!(/Ñ/u, 'Ñ') # Ñ - para.gsub!(/Ò/u, 'Ò') # Ò - para.gsub!(/Ó/u, 'Ó') # Ó - para.gsub!(/Ô/u, 'Ô') # Ô - para.gsub!(/Õ/u, 'Õ') # Õ - para.gsub!(/Ö/u, 'Ö') # Ö - para.gsub!(/Ø/u, 'Ø') # Ø - para.gsub!(/Ù/u, 'Ù') # Ù - para.gsub!(/Ú/u, 'Ú') # Ú - para.gsub!(/Û/u, 'Û') # Û - para.gsub!(/Ü/u, 'Ü') # Ü - para.gsub!(/Ý/u, 'Ý') # Ý - para.gsub!(/Þ/u, 'Þ') # Þ - para.gsub!(/ß/u, 'ß') # ß - para.gsub!(/à/u, 'à') # à - para.gsub!(/á/u, 'á') # á - para.gsub!(/â/u, 'â') # â - para.gsub!(/ã/u, 'ã') # ã - para.gsub!(/ä/u, 'ä') # ä - para.gsub!(/å/u, 'å') # å - para.gsub!(/æ/u, 'æ') # æ - para.gsub!(/ç/u, 'ç') # ç - para.gsub!(/è/u, 'è') # è - para.gsub!(/é/u, '´') # é - para.gsub!(/ê/u, 'ˆ') # ê - para.gsub!(/ë/u, 'ë') # ë - para.gsub!(/ì/u, 'ì') # ì - para.gsub!(/í/u, '´') # í - para.gsub!(/î/u, 'î') # î - para.gsub!(/ï/u, 'ï') # ï - para.gsub!(/ð/u, 'ð') # ð - para.gsub!(/ñ/u, 'ñ') # ñ - para.gsub!(/ò/u, 'ò') # ò - para.gsub!(/ó/u, 'ó') # ó - para.gsub!(/ô/u, 'ô') # ô - para.gsub!(/õ/u, 'õ') # õ - para.gsub!(/ö/u, 'ö') # ö - para.gsub!(/ø/u, 'ø') # ø - para.gsub!(/ù/u, 'ù') # ú - para.gsub!(/ú/u, 'ú') # û - para.gsub!(/û/u, 'û') # ü - para.gsub!(/ü/u, 'ü') # ý - para.gsub!(/þ/u, 'þ') # þ - para.gsub!(/ÿ/u, 'ÿ') # ÿ - para.gsub!(/‘/u, '&#lsquo;') # ‘ # ‘ - para.gsub!(/’/u, '&#rsquo;') # ’ # ’ - para.gsub!(/“/u, '“') # “ # “ - para.gsub!(/”/u, '”') # ” # ” - para.gsub!(/–/u, '–') # – # – - para.gsub!(/—/u, '—') # — # — - para.gsub!(/∝/u, '∝') # ∝ # ∝ - para.gsub!(/∞/u, '∞') # ∞ # ∞ - para.gsub!(/™/u, '™') # ™ # ™ - para.gsub!(/✠/u, '✠') # ✠ - #para.gsub!(/✠/u, '†') # † # † incorrect replacement † - para.gsub!(/ /u, ' ') # space identify - para.gsub!(/ /u, ' ') # space identify - end - end - self - end - def tidywords(wordlist) - wordlist.each do |x| - #imperfect solution will not catch all possible cases - x.gsub!(/&/,'&') unless x =~/&\S+;/ - x.gsub!(/&([A-Z])/,'&\1') - end - end - def markup(para='') - wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 - para=tidywords(wordlist).join(' ').strip - para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'<br />') - para.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') - para.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;') - para.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;') - para.gsub!(/(^|#{Mx[:gl_c]}|\s+)<\s+/,'\1< '); para.gsub!(/\s+>(\s+|$)/,' >\1') - #para.gsub!(/#{Mx[:fa_emphasis_o]}(.+?)#{Mx[:fa_emphasis_c]}/,'<em>\1</em>') #reinstate - para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'<b>\1</b>') - para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'<i>\1</i>') - para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<u>\1</u>') - para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'<sup>\1</sup>') - para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'<sub>\1</sub>') - para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'<ins>\1</ins>') - para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'<cite>\1</cite>') - para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'<del>\1</del>') - para.gsub!(/<:pb>\s*/,'') #Fix - para.gsub!(/<+[-~]#>+/,'') - para.gsub!(/#{Mx[:id_o]}0;\w\d+;[um]\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'') - if para !~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/ - #embeds a red-bullet image --> - para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'<b>\1</b>') - para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'<i>\1</i>') - para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<u>\1</u>') - para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'<del>\1</del>') - para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'<br />') - para.gsub!(/#{Mx[:br_page]}\s*/,'') - para.gsub!(/#{Mx[:br_page_new]}\s*/,'') - para.gsub!(/#{Mx[:pa_non_object_no_heading]}|#{Mx[:pa_non_object_dummy_heading]}/,''); para.gsub!(/<[-~]#>/,'') - para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}(https?:\/\/\S+)/, - %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1" width="\\2" height="\\3" />[\\1] \\4}) - para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?#{Mx[:lnk_c]}(https?:\/\/\S+)/, - %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1"/>\\1}) - para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}image/, - %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1" width="\\2" height="\\3" />[\\1] \\4}) - para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?#{Mx[:lnk_c]}image/, - %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1"/>\\1}) - para.gsub!(/(^|#{Mx[:gl_c]}|\s)#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/, - '\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\3">\2</link>\4') #watch, compare html_tune - para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, - %{\\1#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\2">\\2</link>#{@url_brace.xml_close}\\3}) - para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, - '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later - else - para.gsub!(/(^|[^}])_</m,'\1<'); para.gsub!(/(^|[^}])_>/m,'\1>') #code-block: angle brackets special characters - para.gsub!(/(^|[^}])_</m,'\1<'); para.gsub!(/(^|[^}])_>/m,'\1>') - end - para.gsub!(/ |#{Mx[:nbsp]}/m,' ') - para - end - def markup_light(para='') - para.gsub!(/\/\{(.+?)\}\//,'<i>\1</i>') - para.gsub!(/[*!]\{(.+?)\}[*!]/,'<b>\1</b>') - para.gsub!(/_\{(.+?)\}_/,'<u>\1</u>') - para.gsub!(/-\{(.+?)\}-/,'<del>\1</del>') - para.gsub!(/<br(\s*\/)?>/,'<br />') - para.gsub!(/<:pb>\s*/,'') - para.gsub!(/<[-~]#>/,'') - para.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort - para.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax - para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/, - "<image.path>#{@dir.url.images_local}\/\\1</image.path>") - para.gsub!(/ |#{Mx[:nbsp]}/,' ') - #para.gsub!(/ /,' ') #clean - wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 - para=tidywords(wordlist).join(' ').strip - para - end - def markup_fictionbook(para='') - para.gsub!(/~\{([\d*+]+).+?\}~/,'[\1]') - para.gsub!(/\/\{(.+?)\}\//,'<i>\1</i>') - para.gsub!(/[*!]\{(.+?)\}[*!]/,'<b>\1</b>') - para.gsub!(/_\{(.+?)\}_/,'<u>\1</u>') - para.gsub!(/-\{(.+?)\}-/,'<del>\1</del>') - para.gsub!(/<br(\s*\/)?>/,'<br />') - para.gsub!(/<:pb>\s*/,'') - para.gsub!(/<[-~]#>/,'') - #temporary --> - para.gsub!(/<:\S+?>/,'') - #<-- temporary - para.gsub!(/<[-~]#>/,'') - para.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort - para.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax - para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/, - "<image.path>#{@dir.url.images_local}\/\\1</image.path>") - para.gsub!(/ |#{Mx[:nbsp]}/,' ') - #para.gsub!(/ /,' ') #clean - wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 - para=tidywords(wordlist).join(' ').strip - para - end - def markup_group(para='') - para.gsub!(/</,'<'); para.gsub!(/>/,'>') - para.gsub!(/<:?br(?:\s+\/)?>/,'<br />') - para.gsub!(/<(link xmlns:xlink=".+?")>/,'<\1>') - para.gsub!(/<(\/link)>/,'<\1>') - para.gsub!(/<(\/?en)>/,'<\1>') - para - end - def xml_sem_block_paired(matched) # colon depth: many, recurs - matched.gsub!(/\b(au):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:au]} depth="many">\\2</sem:#{@ab[:au]}>}) # sem : - matched.gsub!(/\b(vol):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:vol]} depth="many">\\2</sem:#{@ab[:vol]}>}) # sem : - matched.gsub!(/\b(pub):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:pub]} depth="many">\\2</sem:#{@ab[:pub]}>}) # sem : - matched.gsub!(/\b(ref):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:ref]} depth="many">\\2</sem:#{@ab[:ref]}>}) # sem : - matched.gsub!(/\b(desc):\{(.+?)\}:\1\b/m,%{<sem:#{@ab[:desc]} depth="many">\\2</sem:#{@ab[:desc]}>}) # sem : - matched.gsub!(/\b(conv):\{(.+?)\}:\1\b/m,%{<sem:#{@ab[:conv]} depth="many">\\2</sem:#{@ab[:conv]}>}) # sem : - matched.gsub!(/\b(ct):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:ct]} depth="many">\\2</sem:#{@ab[:ct]}>}) # sem : - matched.gsub!(/\b(cty):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:cty]} depth="many">\\2</sem:#{@ab[:cty]}>}) # sem : - matched.gsub!(/\b(org):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:org]} depth="many">\\2</sem:#{@ab[:org]}>}) # sem : - matched.gsub!(/\b(dt):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:dt]} depth="many">\\2</sem:#{@ab[:dt]}>}) # sem : - matched.gsub!(/\b(n):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:n]} depth="many">\\2</sem:#{@ab[:n]}>}) # sem : - matched.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m,'<sem:\1 depth="many">\2</sem:\1>') # sem : - end - def xml_semantic_tags(para) - if @md.sem_tag - para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : - para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : - para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : - #colon one / single / flat / shallow - para.gsub!(/:\{(.+?)\}:au\b/m, %{<sem:#{@ab[:au]} depth="one">\\1</sem:#{@ab[:au]}>}) # sem : - para.gsub!(/:\{(.+?)\}:n\b/m, %{<sem:#{@ab[:n]} depth="one">\\1</sem:#{@ab[:n]}>}) # sem : - para.gsub!(/:\{(.+?)\}:ti\b/m, %{<sem:#{@ab[:ti]} depth="one">\\1</sem:#{@ab[:ti]}>}) # sem : - para.gsub!(/:\{(.+?)\}:ref\b/m, %{<sem:#{@ab[:ref]} depth="one">\\1</sem:#{@ab[:ref]}>}) # sem : - para.gsub!(/:\{(.+?)\}:desc\b/m, %{<sem:#{@ab[:desc]} depth="one">\\1</sem:#{@ab[:desc]}>}) # sem : - para.gsub!(/:\{(.+?)\}:cty\b/m, %{<sem:#{@ab[:cty]} depth="one">\\1</sem:#{@ab[:cty]}>}) # sem : - para.gsub!(/:\{(.+?)\}:org\b/m, %{<sem:#{@ab[:org]} depth="one">\\1</sem:#{@ab[:org]}>}) # sem : - para.gsub!(/:\{(.+?)\}:([a-z]+(?:[_:.][a-z]+)*)/m,'<sem:\2 depth="one">\1</sem:\2>') # sem : - #semicolon zero / none - para.gsub!(/;\{([^}]+(?![;]))\};ti\b/m, %{<sem:#{@ab[:ti]} depth="zero">\\1</sem:#{@ab[:ti]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};qt\b/m, %{<sem:#{@ab[:qt]} depth="zero">\\1</sem:#{@ab[:qt]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};ref\b/m, %{<sem:#{@ab[:ref]} depth="zero">\\1</sem:#{@ab[:ref]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};ed\b/m, %{<sem:#{@ab[:ed]} depth="zero">\\1</sem:#{@ab[:ed]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};v\b/m, %{<sem:#{@ab[:v]} depth="zero">\\1</sem:#{@ab[:v]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};desc\b/m, %{<sem:#{@ab[:desc]} depth="zero">\\1</sem:#{@ab[:desc]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};def\b/m, %{<sem:#{@ab[:def]} depth="zero">\\1</sem:#{@ab[:def]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};trans\b/m, %{<sem:#{@ab[:trans]} depth="zero">\\1</sem:#{@ab[:trans]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};y\b/m, %{<sem:#{@ab[:y]} depth="zero">\\1</sem:#{@ab[:y]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};ab\b/m, %{<sem:#{@ab[:ab]} depth="zero">\\1</sem:#{@ab[:ab]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};pg\b/m, %{<sem:#{@ab[:pg]} depth="zero">\\1</sem:#{@ab[:pg]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};fn?\b/m, %{<sem:#{@ab[:fn]} depth="zero">\\1</sem:#{@ab[:fn]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};mn?\b/m, %{<sem:#{@ab[:mn]} depth="zero">\\1</sem:#{@ab[:mn]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};ln?\b/m, %{<sem:#{@ab[:ln]} depth="zero">\\1</sem:#{@ab[:ln]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};in\b/m, %{<sem:#{@ab[:in]} depth="zero">\\1</sem:#{@ab[:in]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};uni\b/m, %{<sem:#{@ab[:uni]} depth="zero">\\1</sem:#{@ab[:uni]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};fac\b/m, %{<sem:#{@ab[:fac]} depth="zero">\\1</sem:#{@ab[:fac]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};inst\b/m, %{<sem:#{@ab[:inst]} depth="zero">\\1</sem:#{@ab[:inst]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};dept\b/m, %{<sem:#{@ab[:dpt]} depth="zero">\\1</sem:#{@ab[:dept]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};org\b/m, %{<sem:#{@ab[:org]} depth="zero">\\1</sem:#{@ab[:org]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};com?\b/m, %{<sem:#{@ab[:com]} depth="zero">\\1</sem:#{@ab[:com]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};cty\b/m, %{<sem:#{@ab[:cty]} depth="zero">\\1</sem:#{@ab[:cty]}>}) # sem ; - para.gsub!(/;\{([^}]+(?![;]))\};([a-z]+(?:[_:.][a-z]+)*)/m,'<sem:\2 depth="zero">\1</sem:\2>') # sem ; - end - para - end - end -end -module SiSU_XML_tags #Format - require "#{SiSU_lib}/param" - include SiSU_Param - include SiSU_Viz - class RDF - def initialize(md='',seg_name=[],tracker=0) - @full_title=@subtitle=@author=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@copyright=@owner=@keywords='' - #seg_name=%{#{@@seg_name[@@tracker]} - } if @@seg_name[@@tracker] - @md=md - @rdfurl=%{ rdf:about="http://www.jus.uio.no/lm/toc"\n} - if @md.full_title # DublinCore 1 - title - @rdf_title=%{ dc.title="#{seg_name}#{@md.full_title}"\n} - @full_title=%{ <meta name="dc.title" content="#{@md.full_title}" />\n} - #@full_title=%{ <meta name="dc.title" content="#{seg_name}#{@md.full_title}" />\n} - end - if @md.author # DublinCore 2 - creator/author (author) - @rdf_author=%{ dc.author="#{@md.author}"\n} - content=meta_content_clean(@md.author) - @author=%{ <meta name="dc.author" content="#{content}" />\n} - end - if @md.subject # DublinCore 3 - subject (us library of congress, eric or udc, or schema???) - @rdf_subject=%{ dc.subject="#{@md.subject}"\n} - content=meta_content_clean(@md.subject) - @subject=%{ <meta name="dc.subject" content="#{content}" />\n} - end - if @md.description # DublinCore 4 - description - @rdf_description=%{ dc.description="#{@md.description}"\n} - content=meta_content_clean(@md.description) - @description=%{ <meta name="dc.description" content="#{content}" />\n} - end - if @md.publisher # DublinCore 5 - publisher (current copy published by) - @rdf_publisher=%{ dc.publisher="#{@md.publisher}"\n} - content=meta_content_clean(@md.publisher) - @publisher=%{ <meta name="dc.publisher" content="#{content}" />\n} - end - if @md.contributor # DublinCore 6 - contributor - @rdf_contributor=%{ dc.contributor="#{@md.contributor}"\n} - content=meta_content_clean(@md.contributor) - @contributor=%{ <meta name="dc.contributor" content="#{content}" />\n} - end - if @md.date # DublinCore 7 - date year-mm-dd - @rdf_date=%{ dc.date="#{@md.date}"\n} - @date=%{ <meta name="dc.date" content="#{@md.date}" #{@md.date_scheme} />\n} - end - if @md.date_created # DublinCore 7 - date.created year-mm-dd - @rdf_date_created=%{ dc.date.created="#{@md.date_created}"\n} - @date_created=%{ <meta name="dc.date.created" content="#{@md.date_created}" #{@md.date_created_scheme} />\n} - end - if @md.date_issued # DublinCore 7 - date.issued year-mm-dd - @rdf_date_issued=%{ dc.date.issued="#{@md.date_issued}"\n} - @date_issued=%{ <meta name="dc.date.issued" content="#{@md.date_issued}" #{@md.date_issued_scheme} />\n} - end - if @md.date_available # DublinCore 7 - date.available year-mm-dd - @rdf_date_available=%{ dc.date.available="#{@md.date_available}"\n} - @date_available=%{ <meta name="dc.date.available" content="#{@md.date_available}" #{@md.date_available_scheme} />\n} - end - if @md.date_valid # DublinCore 7 - date.valid year-mm-dd - @rdf_date_valid=%{ dc.date.valid="#{@md.date_valid}"\n} - @date_valid=%{ <meta name="dc.date.valid" content="#{@md.date_valid}" #{@md.date_valid_scheme} />\n} - end - if @md.date_modified # DublinCore 7 - date.modified year-mm-dd - @rdf_date_modified=%{ dc.date.modified="#{@md.date_modified}"\n} - @date_modified=%{ <meta name="dc.date.modified" content="#{@md.date_modified}" #{@md.date_modified_scheme} />\n} - end - if @md.type # DublinCore 8 - type (genre eg. report, convention etc) - @rdf_type=%{ dc.type="#{@md.type}"\n} - content=meta_content_clean(@md.type) - @type=%{ <meta name="dc.type" content="#{content}" />\n} - end - if @md.format # DublinCore 9 - format (use your mime type) - @rdf_format=%{ dc.format="#{@md.format}"\n} - content=meta_content_clean(@md.format) - @format=%{ <meta name="dc.format" content="#{content}" />\n} - end - if @md.identifier # DublinCore 10 - identifier (your identifier, could use urn which is free) - @rdf_identifier=%{ dc.identifier="#{@md.identifier}"\n} - content=meta_content_clean(@md.identifier) - @identifier=%{ <meta name="dc.identifier" content="#{content}" />\n} - end - if @md.source # DublinCore 11 - source (document source) - @rdf_source=%{ dc.source="#{@md.source}"\n} - content=meta_content_clean(@md.source) - @source=%{ <meta name="dc.source" content="#{content}" />\n} - end - if @md.language \ - and @md.language[:name] # DublinCore 12 - language (English) - @rdf_language=%{ dc.language="#{@md.language[:name]}"\n} - @language=%{ <meta name="dc.language" content="#{@md.language[:name]}" />\n} - end - if @md.language_original \ - and @md.language_original[:name] - @rdf_language_original=%{ dc.language="#{@md.language_original[:name]}"\n} - @language_original=%{ <meta name="dc.language" content="#{@md.language_original[:name]}" />\n} - end - if @md.relation # DublinCore 13 - relation - @rdf_relation=%{ dc.relation="#{@md.relation}"\n} - content=meta_content_clean(@md.relation) - @relation=%{ <meta name="dc.relation" content="#{content}" />\n} - end - if @md.coverage # DublinCore 14 - coverage - @rdf_coverage=%{ dc.coverage="#{@md.coverage}"\n} - content=meta_content_clean(@md.coverage) - @coverage=%{ <meta name="dc.coverage" content="#{content}" />\n} - end - if @md.rights # DublinCore 15 - rights - @rdf_rights=%{ dc.rights="#{@md.rights}"\n} - content=meta_content_clean(@md.rights) - @rights=%{ <meta name="dc.rights" content="#{content}" />\n} - end - content=meta_content_clean(@md.keywords) - @keywords=%{ <meta name="keywords" content="#{content}" />\n} if @md.keywords - @vz=SiSU_Env::Get_init.instance.skin - end - def meta_content_clean(content='') - unless content.nil? - content.tr!('"',"'") - end - content - end - def rdftoc #tocHead #values strung together, because some empty, and resulting output (line breaks) is much better - #<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" - # xmlns:dc="http://purl.org/dc/elements/1.1/"> - # <rdf:Description rdf:about="http://www.jus.uio.no/lm/doc" - # dc:creator="Author" - # dc:title="Title" - # dc:description="Description if any" - # dc:date="Publication Date" - # /> - #</rdf:RDF> - #Dublin Core -#### XML only :-( KEEP -#<<WOK -#<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" -# xmlns:dc="http://purl.org/dc/elements/1.1/"> -# <rdf:Description -# #@rdfurl#@rdf_title#@rdf_subtitle#@rdf_creator#@rdf_subject#@rdf_description#@rdf_publisher#@rdf_contributor#@rdf_date#@rdf_dateCreated#@rdf_dateIssued#@rdf_dateAvailable#@rdf_dateValid#@rdf_dateModified#@rdf_type#@rdf_format#@rdf_identifier#@rdf_source#@rdf_language#@rdf_relation #@rdf_coverage#@rdf_rights -# />\n -#</rdf:RDF>\n -#WOK - end - def rdfseg #segHead - rdftoc - end - def comment_xml(extra='') - generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})" if @md.sisu_version[:version] - lastdone="Last Generated on: #{Time.now}" - rubyv="Ruby version: #{@md.ruby_version}" - sc=if @md.sc_info - "Source file: #{@md.sc_filename} version: #{@md.sc_number} of: #{@md.sc_date}" - else '' - end - if extra.empty? -<<WOK -<!-- Document processing information: - * #{generator} - * #{rubyv} - * #{sc} - * #{lastdone} - * SiSU http://www.jus.uio.no/sisu ---> -WOK - else -<<WOK -<!-- Document processing information: - * #{extra} - * #{generator} - * #{rubyv} - * #{sc} - * #{lastdone} - * SiSU http://www.jus.uio.no/sisu ---> -WOK - end - end - def comment_xml_sax - desc='SiSU XML, SAX type representation' - comment_xml(desc) - end - def comment_xml_node - desc='SiSU XML, Node type representation' - comment_xml(desc) - end - def comment_xml_dom - desc='SiSU XML, DOM type representation' - comment_xml(desc) - end - def metatag_html #values strung together, because some empty, and resulting output (line breaks) is much better -#{@vz.js_head} -<<WOK -#@full_title#@subtitle#@author#@subject#@description#@publisher#@contributor#@date#@date_created#@date_issued#@date_available#@date_valid#@date_modified#@type#@format#@identifier#@source#@language#@relation#@coverage#@rights#@copyright#@owner -#{@vz.txt_generator} -#{@vz.png_ico} -WOK - end - end -end -module SiSU_Tables - require "#{SiSU_lib}/xml_tables" -end -__END__ |