diff options
author | Ralph Amissah <ralph@amissah.com> | 2009-07-04 11:57:29 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2009-07-04 11:57:29 -0400 |
commit | 7372f56054259457f77c64cbdb34e736531cfc0e (patch) | |
tree | e46b3ff01bd379cfb476dc8333b397765aef9681 /lib/sisu/v1/shared_xml.rb | |
parent | changelog, update (diff) |
move lib to version 1 directory, (lib/sisu/v1) and make related changes
Diffstat (limited to 'lib/sisu/v1/shared_xml.rb')
-rw-r--r-- | lib/sisu/v1/shared_xml.rb | 739 |
1 files changed, 739 insertions, 0 deletions
diff --git a/lib/sisu/v1/shared_xml.rb b/lib/sisu/v1/shared_xml.rb new file mode 100644 index 00000000..05161c41 --- /dev/null +++ b/lib/sisu/v1/shared_xml.rb @@ -0,0 +1,739 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2009 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <http://www.fsf.org/licensing/licenses/gpl.html> + <http://www.gnu.org/licenses/gpl.html> + + <http://www.jus.uio.no/sisu/gpl.fsf/toc.html> + <http://www.jus.uio.no/sisu/gpl.fsf/doc.html> + <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt> + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + <http://www.jus.uio.no/sisu> + <http://www.sisudoc.org> + + * Download: + <http://www.jus.uio.no/sisu/SiSU/download.html> + + * Ralph Amissah + <ralph@amissah.com> + <ralph.amissah@gmail.com> + + ** Description: common file for xml generation + +=end +module SiSU_text_parts + require "#{SiSU_lib}/shared_structure" + class Split_text_object < SiSU_Structure::Split_text_object + require "#{SiSU_lib}/param" + require "#{SiSU_lib}/xml_format" + include SiSU_Viz + include SiSU_XML_format + @@alt_id_count=0 + @@dp=nil + def lev_segname_para + if @para =~/^#{Mx[:lv_o]}(?:\d:|<:.+?>).+/ + if /^#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]}\s*(\S.+)/m.match(@para) + @format,segname,@text=$1,$2,$3 + elsif /^#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]}\s*(\S.+)/m.match(@para) + @format,@text=$1,$2 + elsif /<:(.+?)>\s*(\S.+?)/m.match(@para) + @format,@text=$1,$2 + elsif /^#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]}\s*(\S.+?)/m.match(@para) + @@alt_id_count+=1 + @format,segname,@text=$1,$2,$3 + #@format="#@format:#{segname}" # + elsif /^#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]}\s*(\S.+?)/m.match(@para) + @@alt_id_count+=1 + @format,@text=$1,$2 + end + else + if /(.+?)/m.match(@para) + @text=$1 + end + if @para !~/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$|^$/ #added 2002w06 + @text=/(.+?)/m.match(@para)[1] + end + if /^(\d)~\S*\s+(.+)/m.match(@para) + @format,@text=$1,$2 + end + end + @format="#@format:#{segname}" # +#follow this search beneath for heading_body1-6 + @lev_para_ocn=if @para =~/.+#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ + t_o={:format=>@format,:txt=>@text,:ocn=>@ocn} #(@format,@text,@ocn) + SiSU_XML_format::Format_scroll.new(@md,t_o) + else + t_o={:format=>@format,:txt=>@text,:ocn=>0} #(@format,@text,@ocn) + SiSU_XML_format::Format_scroll.new(@md,t_o) + #SiSU_XML_format::Format_scroll.new(@md,@format,@text,"#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}") + end + self + end + end +end +module SiSU_XML_munge + class Trans + require "#{SiSU_lib}/defaults" + def initialize(md) + @md=md + @sys=SiSU_Env::System_call.new + @dir=SiSU_Env::Info_env.new(@md.fns) + @dp=SiSU_Env::Info_env.new.digest.pattern + @url_brace=SiSU_Viz::Skin.new.url_decoration + if @md.sem_tag + #@ab ||=SiSU_Viz::Skin.new.semantic_tags.default + @ab ||=semantic_tags.default + end + end + def semantic_tags + def default + { + :pub => 'publication', + :conv => 'convention', + :vol => 'volume', + :pg => 'page', + :cty => 'city', + :org => 'organization', + :uni => 'university', + :dept => 'department', + :fac => 'faculty', + :inst => 'institute', + :co => 'company', + :com => 'company', + :conv => 'convention', + :dt => 'date', + :y => 'year', + :m => 'month', + :d => 'day', + :ti => 'title', + :au => 'author', + :ed => 'editor', #editor? + :v => 'version', #edition + :n => 'name', + :fn => 'firstname', + :mn => 'middlename', + :ln => 'lastname', + :in => 'initials', + :qt => 'quote', + :ct => 'cite', + :ref => 'reference', + :ab => 'abreviation', + :def => 'define', + :desc => 'description', + :trans => 'translate', + } + end + self + end + def char_enc #character encode + def utf8(para='') + if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn + #¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûü + #¢£¥§©ª«®°±²³µ¶¹º»¼½¾×÷ + ##para.gsub!(//, '&#;') + ##para.gsub!(//, '&;') + para.gsub!(/</u, '<') # '<' # < + para.gsub!(/>/u, '>') # '>' # > + para.gsub!(/¢/u, '¢') # '¢' # ¢ + para.gsub!(/£/u, '£') # '£' # £ + para.gsub!(/¥/u, '¥') # '¥' # ¥ + para.gsub!(/§/u, '§') # '§' # § + para.gsub!(/©/u, '©') # '©' # © + para.gsub!(/ª/u, 'ª') # 'ª' # ª + para.gsub!(/«/u, '«') # '«' # « + para.gsub!(/®/u, '®') # '®' # ® + para.gsub!(/°/u, '°') # '°' # ° + para.gsub!(/±/u, '±') # '±' # ± + para.gsub!(/²/u, '²') # '²' # ² + para.gsub!(/³/u, '³') # '³' # ³ + para.gsub!(/µ/u, 'µ') # 'µ' # µ + para.gsub!(/¶/u, '¶') # '¶' # ¶ + para.gsub!(/¹/u, '¹') # '¹' # ¹ + para.gsub!(/º/u, 'º') # 'º' # º + para.gsub!(/»/u, '»') # '»' # » + para.gsub!(/¼/u, '¼') # '¼' # ¼ + para.gsub!(/½/u, '½') # '½' # ½ + para.gsub!(/¾/u, '¾') # '¾' # ¾ + para.gsub!(/×/u, '×') # '×' # × + para.gsub!(/÷/u, '÷') # '÷' # ÷ + para.gsub!(/¿/u, '¿') # '¿' # ¿ + para.gsub!(/À/u, 'À') # 'À' # À + para.gsub!(/Á/u, 'Á') # 'Á' # Á + para.gsub!(/Â/u, 'Â') # 'Â' #  + para.gsub!(/Ã/u, 'Ã') # 'Ã' # à + para.gsub!(/Ä/u, 'Ä') # 'Ä' # Ä + para.gsub!(/Å/u, 'Å') # 'Å' # Å + para.gsub!(/Æ/u, 'Æ') # 'Æ' # Æ + para.gsub!(/Ç/u, 'Ç') # 'Ç' # Ç + para.gsub!(/È/u, 'È') # 'È' # È + para.gsub!(/É/u, 'É') # 'É' # É + para.gsub!(/Ê/u, 'Ê') # 'Ê' # Ê + para.gsub!(/Ë/u, 'Ë') # 'Ë' # Ë + para.gsub!(/Ì/u, 'Ì') # 'Ì' # Ì + para.gsub!(/Í/u, 'Í') # 'Í' # Í + para.gsub!(/Î/u, 'Î') # 'Î' # Î + para.gsub!(/Ï/u, 'Ï') # 'Ï' # Ï + para.gsub!(/Ð/u, 'Ð') # 'Ð' # Ð + para.gsub!(/Ñ/u, 'Ñ') # 'Ñ' # Ñ + para.gsub!(/Ò/u, 'Ò') # 'Ò' # Ò + para.gsub!(/Ó/u, 'Ó') # 'Ó' # Ó + para.gsub!(/Ô/u, 'Ô') # 'Ô' # Ô + para.gsub!(/Õ/u, 'Õ') # 'Õ' # Õ + para.gsub!(/Ö/u, 'Ö') # 'Ö' # Ö + para.gsub!(/Ø/u, 'Ø') # 'Ø' # Ø + para.gsub!(/Ù/u, 'Ù') # 'Ù' # Ù + para.gsub!(/Ú/u, 'Ú') # 'Ú' # Ú + para.gsub!(/Û/u, 'Û') # 'Û' # Û + para.gsub!(/Ü/u, 'Ü') # 'Ü' # Ü + para.gsub!(/Ý/u, 'Ý') # 'Ý' # Ý + para.gsub!(/Þ/u, 'Þ') # 'Þ' # Þ + para.gsub!(/ß/u, 'ß') # 'ß' # ß + para.gsub!(/à/u, 'à') # 'à' # à + para.gsub!(/á/u, 'á') # 'á' # á + para.gsub!(/â/u, 'â') # 'â' # â + para.gsub!(/ã/u, 'ã') # 'ã' # ã + para.gsub!(/ä/u, 'ä') # 'ä' # ä + para.gsub!(/å/u, 'å') # 'å' # å + para.gsub!(/æ/u, 'æ') # 'æ' # æ + para.gsub!(/ç/u, 'ç') # 'ç' # ç + para.gsub!(/è/u, 'è') # 'è' # è + para.gsub!(/é/u, 'é') # '´' # é + para.gsub!(/ê/u, 'ê') # 'ˆ' # ê + para.gsub!(/ë/u, 'ë') # 'ë' # ë + para.gsub!(/ì/u, 'ì') # 'ì' # ì + para.gsub!(/í/u, 'í') # '´' # í + para.gsub!(/î/u, 'î') # 'î' # î + para.gsub!(/ï/u, 'ï') # 'ï' # ï + para.gsub!(/ð/u, 'ð') # 'ð' # ð + para.gsub!(/ñ/u, 'ñ') # 'ñ' # ñ + para.gsub!(/ò/u, 'ò') # 'ò' # ò + para.gsub!(/ó/u, 'ó') # 'ó' # ó + para.gsub!(/ô/u, 'ô') # 'ô' # ô + para.gsub!(/õ/u, 'õ') # 'õ' # õ + para.gsub!(/ö/u, 'ö') # 'ö' # ö + para.gsub!(/ø/u, 'ø') # 'ø' # ø + para.gsub!(/ù/u, 'ú') # 'ù' # ú + para.gsub!(/ú/u, 'û') # 'ú' # û + para.gsub!(/û/u, 'ü') # 'û' # ü + para.gsub!(/ü/u, 'ý') # 'ü' # ý + para.gsub!(/þ/u, 'þ') # 'þ' # þ + para.gsub!(/ÿ/u, 'ÿ') # 'ÿ' # ÿ + para.gsub!(/‘/u, '‘') # '‘' # ‘ + para.gsub!(/’/u, '’') # '’' # ’ + para.gsub!(/“/u, '“') # “ # “ + para.gsub!(/”/u, '”') # ” # ” + para.gsub!(/–/u, '–') # – # – + para.gsub!(/—/u, '—') # — # — + para.gsub!(/∝/u, '∝') # ∝ # ∝ + para.gsub!(/∞/u, '∞') # ∞ # ∞ + para.gsub!(/™/u, '™') # ™ # ™ + para.gsub!(/✠/u, '✠') # ✗ # ✠ + para.gsub!(/ /u, ' ') # space identify + para.gsub!(/ /u, ' ') # space identify + end + end + def html(para='') + if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn + para.gsub!(/ /u, ' ') # space identify + para.gsub!(/ /u, ' ') # space identify + else + para.gsub!(/¢/u, '¢') # ¢ + para.gsub!(/£/u, '£') # £ + para.gsub!(/¥/u, '¥') # ¥ + para.gsub!(/§/u, '§') # § + para.gsub!(/©/u, '©') # © + para.gsub!(/ª/u, 'ª') # ª + para.gsub!(/«/u, '«') # « + para.gsub!(/®/u, '®') # ® + para.gsub!(/°/u, '°') # ° + para.gsub!(/±/u, '±') # ± + para.gsub!(/²/u, '²') # ² + para.gsub!(/³/u, '³') # ³ + para.gsub!(/µ/u, 'µ') # µ + para.gsub!(/¶/u, '¶') # ¶ + para.gsub!(/¹/u, '¹') # ¹ + para.gsub!(/º/u, 'º') # º + para.gsub!(/»/u, '»') # » + para.gsub!(/¼/u, '¼') # ¼ + para.gsub!(/½/u, '½') # ½ + para.gsub!(/¾/u, '¾') # ¾ + para.gsub!(/×/u, '×') # × + para.gsub!(/÷/u, '÷') # ÷ + para.gsub!(/¿/u, '¿') # ¿ + para.gsub!(/À/u, 'À') # À + para.gsub!(/Á/u, 'Á') # Á + para.gsub!(/Â/u, 'Â') #  + para.gsub!(/Ã/u, 'Ã') # à + para.gsub!(/Ä/u, 'Ä') # Ä + para.gsub!(/Å/u, 'Å') # Å + para.gsub!(/Æ/u, 'Æ') # Æ + para.gsub!(/Ç/u, 'Ç') # Ç + para.gsub!(/È/u, 'È') # È + para.gsub!(/É/u, 'É') # É + para.gsub!(/Ê/u, 'Ê') # Ê + para.gsub!(/Ë/u, 'Ë') # Ë + para.gsub!(/Ì/u, 'Ì') # Ì + para.gsub!(/Í/u, 'Í') # Í + para.gsub!(/Î/u, 'Î') # Î + para.gsub!(/Ï/u, 'Ï') # Ï + para.gsub!(/Ð/u, 'Ð') # Ð + para.gsub!(/Ñ/u, 'Ñ') # Ñ + para.gsub!(/Ò/u, 'Ò') # Ò + para.gsub!(/Ó/u, 'Ó') # Ó + para.gsub!(/Ô/u, 'Ô') # Ô + para.gsub!(/Õ/u, 'Õ') # Õ + para.gsub!(/Ö/u, 'Ö') # Ö + para.gsub!(/Ø/u, 'Ø') # Ø + para.gsub!(/Ù/u, 'Ù') # Ù + para.gsub!(/Ú/u, 'Ú') # Ú + para.gsub!(/Û/u, 'Û') # Û + para.gsub!(/Ü/u, 'Ü') # Ü + para.gsub!(/Ý/u, 'Ý') # Ý + para.gsub!(/Þ/u, 'Þ') # Þ + para.gsub!(/ß/u, 'ß') # ß + para.gsub!(/à/u, 'à') # à + para.gsub!(/á/u, 'á') # á + para.gsub!(/â/u, 'â') # â + para.gsub!(/ã/u, 'ã') # ã + para.gsub!(/ä/u, 'ä') # ä + para.gsub!(/å/u, 'å') # å + para.gsub!(/æ/u, 'æ') # æ + para.gsub!(/ç/u, 'ç') # ç + para.gsub!(/è/u, 'è') # è + para.gsub!(/é/u, '´') # é + para.gsub!(/ê/u, 'ˆ') # ê + para.gsub!(/ë/u, 'ë') # ë + para.gsub!(/ì/u, 'ì') # ì + para.gsub!(/í/u, '´') # í + para.gsub!(/î/u, 'î') # î + para.gsub!(/ï/u, 'ï') # ï + para.gsub!(/ð/u, 'ð') # ð + para.gsub!(/ñ/u, 'ñ') # ñ + para.gsub!(/ò/u, 'ò') # ò + para.gsub!(/ó/u, 'ó') # ó + para.gsub!(/ô/u, 'ô') # ô + para.gsub!(/õ/u, 'õ') # õ + para.gsub!(/ö/u, 'ö') # ö + para.gsub!(/ø/u, 'ø') # ø + para.gsub!(/ù/u, 'ù') # ú + para.gsub!(/ú/u, 'ú') # û + para.gsub!(/û/u, 'û') # ü + para.gsub!(/ü/u, 'ü') # ý + para.gsub!(/þ/u, 'þ') # þ + para.gsub!(/ÿ/u, 'ÿ') # ÿ + para.gsub!(/‘/u, '&#lsquo;') # ‘ # ‘ + para.gsub!(/’/u, '&#rsquo;') # ’ # ’ + para.gsub!(/“/u, '“') # “ # “ + para.gsub!(/”/u, '”') # ” # ” + para.gsub!(/–/u, '–') # – # – + para.gsub!(/—/u, '—') # — # — + para.gsub!(/∝/u, '∝') # ∝ # ∝ + para.gsub!(/∞/u, '∞') # ∞ # ∞ + para.gsub!(/™/u, '™') # ™ # ™ + para.gsub!(/✠/u, '✠') # ✠ + #para.gsub!(/✠/u, '†') # † # † incorrect replacement † + para.gsub!(/ /u, ' ') # space identify + para.gsub!(/ /u, ' ') # space identify + end + end + self + end + def tidywords(wordlist) + wordlist.each do |x| + #imperfect solution will not catch all possible cases + x.gsub!(/&/,'&') unless x =~/&\S+;/ + x.gsub!(/&([A-Z])/,'&\1') + end + end + def markup(para='') + wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 + para=tidywords(wordlist).join(' ').strip + para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'<br />') + para.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') + para.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;') + para.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;') + para.gsub!(/(^|#{Mx[:gl_c]}|\s+)<\s+/,'\1< '); para.gsub!(/\s+>(\s+|$)/,' >\1') + #para.gsub!(/#{Mx[:fa_emphasis_o]}(.+?)#{Mx[:fa_emphasis_c]}/,'<em>\1</em>') #reinstate + para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'<b>\1</b>') + para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'<i>\1</i>') + para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<u>\1</u>') + para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'<sup>\1</sup>') + para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'<sub>\1</sub>') + para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'<ins>\1</ins>') + para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'<cite>\1</cite>') + para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'<del>\1</del>') + para.gsub!(/<:pb>\s*/,'') #Fix + para.gsub!(/<+[-~]#>+/,'') + para.gsub!(/#{Mx[:id_o]}0;\w\d+;[um]\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'') + if para !~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/ + #embeds a red-bullet image --> + para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'<b>\1</b>') + para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'<i>\1</i>') + para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<u>\1</u>') + para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'<del>\1</del>') + para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'<br />') + para.gsub!(/#{Mx[:br_page]}\s*/,'') + para.gsub!(/#{Mx[:br_page_new]}\s*/,'') + para.gsub!(/#{Mx[:pa_non_object_no_heading]}|#{Mx[:pa_non_object_dummy_heading]}/,''); para.gsub!(/<[-~]#>/,'') + para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}(https?:\/\/\S+)/, + %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1" width="\\2" height="\\3" />[\\1] \\4}) + para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?#{Mx[:lnk_c]}(https?:\/\/\S+)/, + %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1"/>\\1}) + para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}image/, + %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1" width="\\2" height="\\3" />[\\1] \\4}) + para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?#{Mx[:lnk_c]}image/, + %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1"/>\\1}) + para.gsub!(/(^|#{Mx[:gl_c]}|\s)#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/, + '\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\3">\2</link>\4') #watch, compare html_tune + para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, + %{\\1#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\2">\\2</link>#{@url_brace.xml_close}\\3}) + para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, + '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later + else + para.gsub!(/(^|[^}])_</m,'\1<'); para.gsub!(/(^|[^}])_>/m,'\1>') #code-block: angle brackets special characters + para.gsub!(/(^|[^}])_</m,'\1<'); para.gsub!(/(^|[^}])_>/m,'\1>') + end + para.gsub!(/ |#{Mx[:nbsp]}/m,' ') + para + end + def markup_light(para='') + para.gsub!(/\/\{(.+?)\}\//,'<i>\1</i>') + para.gsub!(/[*!]\{(.+?)\}[*!]/,'<b>\1</b>') + para.gsub!(/_\{(.+?)\}_/,'<u>\1</u>') + para.gsub!(/-\{(.+?)\}-/,'<del>\1</del>') + para.gsub!(/<br(\s*\/)?>/,'<br />') + para.gsub!(/<:pb>\s*/,'') + para.gsub!(/<[-~]#>/,'') + para.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort + para.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax + para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/, + "<image.path>#{@dir.url.images_local}\/\\1</image.path>") + para.gsub!(/ |#{Mx[:nbsp]}/,' ') + #para.gsub!(/ /,' ') #clean + wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 + para=tidywords(wordlist).join(' ').strip + para + end + def markup_fictionbook(para='') + para.gsub!(/~\{([\d*+]+).+?\}~/,'[\1]') + para.gsub!(/\/\{(.+?)\}\//,'<i>\1</i>') + para.gsub!(/[*!]\{(.+?)\}[*!]/,'<b>\1</b>') + para.gsub!(/_\{(.+?)\}_/,'<u>\1</u>') + para.gsub!(/-\{(.+?)\}-/,'<del>\1</del>') + para.gsub!(/<br(\s*\/)?>/,'<br />') + para.gsub!(/<:pb>\s*/,'') + para.gsub!(/<[-~]#>/,'') + #temporary --> + para.gsub!(/<:\S+?>/,'') + #<-- temporary + para.gsub!(/<[-~]#>/,'') + para.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort + para.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax + para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/, + "<image.path>#{@dir.url.images_local}\/\\1</image.path>") + para.gsub!(/ |#{Mx[:nbsp]}/,' ') + #para.gsub!(/ /,' ') #clean + wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 + para=tidywords(wordlist).join(' ').strip + para + end + def markup_group(para='') + para.gsub!(/</,'<'); para.gsub!(/>/,'>') + para.gsub!(/<:?br(?:\s+\/)?>/,'<br />') + para.gsub!(/<(link xmlns:xlink=".+?")>/,'<\1>') + para.gsub!(/<(\/link)>/,'<\1>') + para.gsub!(/<(\/?en)>/,'<\1>') + para + end + def xml_sem_block_paired(matched) # colon depth: many, recurs + matched.gsub!(/\b(au):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:au]} depth="many">\\2</sem:#{@ab[:au]}>}) # sem : + matched.gsub!(/\b(vol):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:vol]} depth="many">\\2</sem:#{@ab[:vol]}>}) # sem : + matched.gsub!(/\b(pub):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:pub]} depth="many">\\2</sem:#{@ab[:pub]}>}) # sem : + matched.gsub!(/\b(ref):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:ref]} depth="many">\\2</sem:#{@ab[:ref]}>}) # sem : + matched.gsub!(/\b(desc):\{(.+?)\}:\1\b/m,%{<sem:#{@ab[:desc]} depth="many">\\2</sem:#{@ab[:desc]}>}) # sem : + matched.gsub!(/\b(conv):\{(.+?)\}:\1\b/m,%{<sem:#{@ab[:conv]} depth="many">\\2</sem:#{@ab[:conv]}>}) # sem : + matched.gsub!(/\b(ct):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:ct]} depth="many">\\2</sem:#{@ab[:ct]}>}) # sem : + matched.gsub!(/\b(cty):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:cty]} depth="many">\\2</sem:#{@ab[:cty]}>}) # sem : + matched.gsub!(/\b(org):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:org]} depth="many">\\2</sem:#{@ab[:org]}>}) # sem : + matched.gsub!(/\b(dt):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:dt]} depth="many">\\2</sem:#{@ab[:dt]}>}) # sem : + matched.gsub!(/\b(n):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:n]} depth="many">\\2</sem:#{@ab[:n]}>}) # sem : + matched.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m,'<sem:\1 depth="many">\2</sem:\1>') # sem : + end + def xml_semantic_tags(para) + if @md.sem_tag + para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : + para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : + para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : + #colon one / single / flat / shallow + para.gsub!(/:\{(.+?)\}:au\b/m, %{<sem:#{@ab[:au]} depth="one">\\1</sem:#{@ab[:au]}>}) # sem : + para.gsub!(/:\{(.+?)\}:n\b/m, %{<sem:#{@ab[:n]} depth="one">\\1</sem:#{@ab[:n]}>}) # sem : + para.gsub!(/:\{(.+?)\}:ti\b/m, %{<sem:#{@ab[:ti]} depth="one">\\1</sem:#{@ab[:ti]}>}) # sem : + para.gsub!(/:\{(.+?)\}:ref\b/m, %{<sem:#{@ab[:ref]} depth="one">\\1</sem:#{@ab[:ref]}>}) # sem : + para.gsub!(/:\{(.+?)\}:desc\b/m, %{<sem:#{@ab[:desc]} depth="one">\\1</sem:#{@ab[:desc]}>}) # sem : + para.gsub!(/:\{(.+?)\}:cty\b/m, %{<sem:#{@ab[:cty]} depth="one">\\1</sem:#{@ab[:cty]}>}) # sem : + para.gsub!(/:\{(.+?)\}:org\b/m, %{<sem:#{@ab[:org]} depth="one">\\1</sem:#{@ab[:org]}>}) # sem : + para.gsub!(/:\{(.+?)\}:([a-z]+(?:[_:.][a-z]+)*)/m,'<sem:\2 depth="one">\1</sem:\2>') # sem : + #semicolon zero / none + para.gsub!(/;\{([^}]+(?![;]))\};ti\b/m, %{<sem:#{@ab[:ti]} depth="zero">\\1</sem:#{@ab[:ti]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};qt\b/m, %{<sem:#{@ab[:qt]} depth="zero">\\1</sem:#{@ab[:qt]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};ref\b/m, %{<sem:#{@ab[:ref]} depth="zero">\\1</sem:#{@ab[:ref]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};ed\b/m, %{<sem:#{@ab[:ed]} depth="zero">\\1</sem:#{@ab[:ed]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};v\b/m, %{<sem:#{@ab[:v]} depth="zero">\\1</sem:#{@ab[:v]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};desc\b/m, %{<sem:#{@ab[:desc]} depth="zero">\\1</sem:#{@ab[:desc]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};def\b/m, %{<sem:#{@ab[:def]} depth="zero">\\1</sem:#{@ab[:def]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};trans\b/m, %{<sem:#{@ab[:trans]} depth="zero">\\1</sem:#{@ab[:trans]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};y\b/m, %{<sem:#{@ab[:y]} depth="zero">\\1</sem:#{@ab[:y]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};ab\b/m, %{<sem:#{@ab[:ab]} depth="zero">\\1</sem:#{@ab[:ab]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};pg\b/m, %{<sem:#{@ab[:pg]} depth="zero">\\1</sem:#{@ab[:pg]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};fn?\b/m, %{<sem:#{@ab[:fn]} depth="zero">\\1</sem:#{@ab[:fn]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};mn?\b/m, %{<sem:#{@ab[:mn]} depth="zero">\\1</sem:#{@ab[:mn]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};ln?\b/m, %{<sem:#{@ab[:ln]} depth="zero">\\1</sem:#{@ab[:ln]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};in\b/m, %{<sem:#{@ab[:in]} depth="zero">\\1</sem:#{@ab[:in]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};uni\b/m, %{<sem:#{@ab[:uni]} depth="zero">\\1</sem:#{@ab[:uni]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};fac\b/m, %{<sem:#{@ab[:fac]} depth="zero">\\1</sem:#{@ab[:fac]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};inst\b/m, %{<sem:#{@ab[:inst]} depth="zero">\\1</sem:#{@ab[:inst]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};dept\b/m, %{<sem:#{@ab[:dpt]} depth="zero">\\1</sem:#{@ab[:dept]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};org\b/m, %{<sem:#{@ab[:org]} depth="zero">\\1</sem:#{@ab[:org]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};com?\b/m, %{<sem:#{@ab[:com]} depth="zero">\\1</sem:#{@ab[:com]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};cty\b/m, %{<sem:#{@ab[:cty]} depth="zero">\\1</sem:#{@ab[:cty]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};([a-z]+(?:[_:.][a-z]+)*)/m,'<sem:\2 depth="zero">\1</sem:\2>') # sem ; + end + para + end + end +end +module SiSU_XML_tags #Format + require "#{SiSU_lib}/param" + include SiSU_Param + include SiSU_Viz + class RDF + def initialize(md='',seg_name=[],tracker=0) + @full_title=@subtitle=@author=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@copyright=@owner=@keywords='' + #seg_name=%{#{@@seg_name[@@tracker]} - } if @@seg_name[@@tracker] + @md=md + @rdfurl=%{ rdf:about="http://www.jus.uio.no/lm/toc"\n} + if @md.full_title # DublinCore 1 - title + @rdf_title=%{ dc.title="#{seg_name}#{@md.full_title}"\n} + @full_title=%{ <meta name="dc.title" content="#{@md.full_title}" />\n} + #@full_title=%{ <meta name="dc.title" content="#{seg_name}#{@md.full_title}" />\n} + end + if @md.author # DublinCore 2 - creator/author (author) + @rdf_author=%{ dc.author="#{@md.author}"\n} + content=meta_content_clean(@md.author) + @author=%{ <meta name="dc.author" content="#{content}" />\n} + end + if @md.subject # DublinCore 3 - subject (us library of congress, eric or udc, or schema???) + @rdf_subject=%{ dc.subject="#{@md.subject}"\n} + content=meta_content_clean(@md.subject) + @subject=%{ <meta name="dc.subject" content="#{content}" />\n} + end + if @md.description # DublinCore 4 - description + @rdf_description=%{ dc.description="#{@md.description}"\n} + content=meta_content_clean(@md.description) + @description=%{ <meta name="dc.description" content="#{content}" />\n} + end + if @md.publisher # DublinCore 5 - publisher (current copy published by) + @rdf_publisher=%{ dc.publisher="#{@md.publisher}"\n} + content=meta_content_clean(@md.publisher) + @publisher=%{ <meta name="dc.publisher" content="#{content}" />\n} + end + if @md.contributor # DublinCore 6 - contributor + @rdf_contributor=%{ dc.contributor="#{@md.contributor}"\n} + content=meta_content_clean(@md.contributor) + @contributor=%{ <meta name="dc.contributor" content="#{content}" />\n} + end + if @md.date # DublinCore 7 - date year-mm-dd + @rdf_date=%{ dc.date="#{@md.date}"\n} + @date=%{ <meta name="dc.date" content="#{@md.date}" #{@md.date_scheme} />\n} + end + if @md.date_created # DublinCore 7 - date.created year-mm-dd + @rdf_date_created=%{ dc.date.created="#{@md.date_created}"\n} + @date_created=%{ <meta name="dc.date.created" content="#{@md.date_created}" #{@md.date_created_scheme} />\n} + end + if @md.date_issued # DublinCore 7 - date.issued year-mm-dd + @rdf_date_issued=%{ dc.date.issued="#{@md.date_issued}"\n} + @date_issued=%{ <meta name="dc.date.issued" content="#{@md.date_issued}" #{@md.date_issued_scheme} />\n} + end + if @md.date_available # DublinCore 7 - date.available year-mm-dd + @rdf_date_available=%{ dc.date.available="#{@md.date_available}"\n} + @date_available=%{ <meta name="dc.date.available" content="#{@md.date_available}" #{@md.date_available_scheme} />\n} + end + if @md.date_valid # DublinCore 7 - date.valid year-mm-dd + @rdf_date_valid=%{ dc.date.valid="#{@md.date_valid}"\n} + @date_valid=%{ <meta name="dc.date.valid" content="#{@md.date_valid}" #{@md.date_valid_scheme} />\n} + end + if @md.date_modified # DublinCore 7 - date.modified year-mm-dd + @rdf_date_modified=%{ dc.date.modified="#{@md.date_modified}"\n} + @date_modified=%{ <meta name="dc.date.modified" content="#{@md.date_modified}" #{@md.date_modified_scheme} />\n} + end + if @md.type # DublinCore 8 - type (genre eg. report, convention etc) + @rdf_type=%{ dc.type="#{@md.type}"\n} + content=meta_content_clean(@md.type) + @type=%{ <meta name="dc.type" content="#{content}" />\n} + end + if @md.format # DublinCore 9 - format (use your mime type) + @rdf_format=%{ dc.format="#{@md.format}"\n} + content=meta_content_clean(@md.format) + @format=%{ <meta name="dc.format" content="#{content}" />\n} + end + if @md.identifier # DublinCore 10 - identifier (your identifier, could use urn which is free) + @rdf_identifier=%{ dc.identifier="#{@md.identifier}"\n} + content=meta_content_clean(@md.identifier) + @identifier=%{ <meta name="dc.identifier" content="#{content}" />\n} + end + if @md.source # DublinCore 11 - source (document source) + @rdf_source=%{ dc.source="#{@md.source}"\n} + content=meta_content_clean(@md.source) + @source=%{ <meta name="dc.source" content="#{content}" />\n} + end + if @md.language \ + and @md.language[:name] # DublinCore 12 - language (English) + @rdf_language=%{ dc.language="#{@md.language[:name]}"\n} + @language=%{ <meta name="dc.language" content="#{@md.language[:name]}" />\n} + end + if @md.language_original \ + and @md.language_original[:name] + @rdf_language_original=%{ dc.language="#{@md.language_original[:name]}"\n} + @language_original=%{ <meta name="dc.language" content="#{@md.language_original[:name]}" />\n} + end + if @md.relation # DublinCore 13 - relation + @rdf_relation=%{ dc.relation="#{@md.relation}"\n} + content=meta_content_clean(@md.relation) + @relation=%{ <meta name="dc.relation" content="#{content}" />\n} + end + if @md.coverage # DublinCore 14 - coverage + @rdf_coverage=%{ dc.coverage="#{@md.coverage}"\n} + content=meta_content_clean(@md.coverage) + @coverage=%{ <meta name="dc.coverage" content="#{content}" />\n} + end + if @md.rights # DublinCore 15 - rights + @rdf_rights=%{ dc.rights="#{@md.rights}"\n} + content=meta_content_clean(@md.rights) + @rights=%{ <meta name="dc.rights" content="#{content}" />\n} + end + content=meta_content_clean(@md.keywords) + @keywords=%{ <meta name="keywords" content="#{content}" />\n} if @md.keywords + @vz=SiSU_Env::Get_init.instance.skin + end + def meta_content_clean(content='') + unless content.nil? + content.tr!('"',"'") + end + content + end + def rdftoc #tocHead #values strung together, because some empty, and resulting output (line breaks) is much better + #<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + # xmlns:dc="http://purl.org/dc/elements/1.1/"> + # <rdf:Description rdf:about="http://www.jus.uio.no/lm/doc" + # dc:creator="Author" + # dc:title="Title" + # dc:description="Description if any" + # dc:date="Publication Date" + # /> + #</rdf:RDF> + #Dublin Core +#### XML only :-( KEEP +#<<WOK +#<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" +# xmlns:dc="http://purl.org/dc/elements/1.1/"> +# <rdf:Description +# #@rdfurl#@rdf_title#@rdf_subtitle#@rdf_creator#@rdf_subject#@rdf_description#@rdf_publisher#@rdf_contributor#@rdf_date#@rdf_dateCreated#@rdf_dateIssued#@rdf_dateAvailable#@rdf_dateValid#@rdf_dateModified#@rdf_type#@rdf_format#@rdf_identifier#@rdf_source#@rdf_language#@rdf_relation #@rdf_coverage#@rdf_rights +# />\n +#</rdf:RDF>\n +#WOK + end + def rdfseg #segHead + rdftoc + end + def comment_xml(extra='') + generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})" if @md.sisu_version[:version] + lastdone="Last Generated on: #{Time.now}" + rubyv="Ruby version: #{@md.ruby_version}" + sc=if @md.sc_info + "Source file: #{@md.sc_filename} version: #{@md.sc_number} of: #{@md.sc_date}" + else '' + end + if extra.empty? +<<WOK +<!-- Document processing information: + * #{generator} + * #{rubyv} + * #{sc} + * #{lastdone} + * SiSU http://www.jus.uio.no/sisu +--> +WOK + else +<<WOK +<!-- Document processing information: + * #{extra} + * #{generator} + * #{rubyv} + * #{sc} + * #{lastdone} + * SiSU http://www.jus.uio.no/sisu +--> +WOK + end + end + def comment_xml_sax + desc='SiSU XML, SAX type representation' + comment_xml(desc) + end + def comment_xml_node + desc='SiSU XML, Node type representation' + comment_xml(desc) + end + def comment_xml_dom + desc='SiSU XML, DOM type representation' + comment_xml(desc) + end + def metatag_html #values strung together, because some empty, and resulting output (line breaks) is much better +#{@vz.js_head} +<<WOK +#@full_title#@subtitle#@author#@subject#@description#@publisher#@contributor#@date#@date_created#@date_issued#@date_available#@date_valid#@date_modified#@type#@format#@identifier#@source#@language#@relation#@coverage#@rights#@copyright#@owner +#{@vz.txt_generator} +#{@vz.png_ico} +WOK + end + end +end +module SiSU_Tables + require "#{SiSU_lib}/xml_tables" +end +__END__ |