# coding: utf-8 =begin * Name: SiSU * Description: a framework for document structuring, publishing and search * Author: Ralph Amissah * Copyright: (C) 1997 - 2009 Ralph Amissah All Rights Reserved. * License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: * SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system * Hompages: * Download: * Ralph Amissah ** Description: common file for xml generation =end module SiSU_text_parts require "#{SiSU_lib}/shared_structure" class Split_text_object < SiSU_Structure::Split_text_object require "#{SiSU_lib}/param" require "#{SiSU_lib}/xml_format" include SiSU_Viz include SiSU_XML_format @@alt_id_count=0 @@dp=nil def lev_segname_para if @para =~/^#{Mx[:lv_o]}(?:\d:|<:.+?>).+/ if /^#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]}\s*(\S.+)/m.match(@para) @format,segname,@text=$1,$2,$3 elsif /^#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]}\s*(\S.+)/m.match(@para) @format,@text=$1,$2 elsif /<:(.+?)>\s*(\S.+?)/m.match(@para) @format,@text=$1,$2 elsif /^#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]}\s*(\S.+?)/m.match(@para) @@alt_id_count+=1 @format,segname,@text=$1,$2,$3 #@format="#@format:#{segname}" # elsif /^#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]}\s*(\S.+?)/m.match(@para) @@alt_id_count+=1 @format,@text=$1,$2 end else if /(.+?)/m.match(@para) @text=$1 end if @para !~/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$|^$/ #added 2002w06 @text=/(.+?)/m.match(@para)[1] end if /^(\d)~\S*\s+(.+)/m.match(@para) @format,@text=$1,$2 end end @format="#@format:#{segname}" # #follow this search beneath for heading_body1-6 @lev_para_ocn=if @para =~/.+#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ t_o={:format=>@format,:txt=>@text,:ocn=>@ocn} #(@format,@text,@ocn) SiSU_XML_format::Format_scroll.new(@md,t_o) else t_o={:format=>@format,:txt=>@text,:ocn=>0} #(@format,@text,@ocn) SiSU_XML_format::Format_scroll.new(@md,t_o) #SiSU_XML_format::Format_scroll.new(@md,@format,@text,"#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}") end self end end end module SiSU_XML_munge class Trans require "#{SiSU_lib}/defaults" def initialize(md) @md=md @sys=SiSU_Env::System_call.new @dir=SiSU_Env::Info_env.new(@md.fns) @dp=SiSU_Env::Info_env.new.digest.pattern @url_brace=SiSU_Viz::Skin.new.url_decoration if @md.sem_tag #@ab ||=SiSU_Viz::Skin.new.semantic_tags.default @ab ||=semantic_tags.default end end def semantic_tags def default { :pub => 'publication', :conv => 'convention', :vol => 'volume', :pg => 'page', :cty => 'city', :org => 'organization', :uni => 'university', :dept => 'department', :fac => 'faculty', :inst => 'institute', :co => 'company', :com => 'company', :conv => 'convention', :dt => 'date', :y => 'year', :m => 'month', :d => 'day', :ti => 'title', :au => 'author', :ed => 'editor', #editor? :v => 'version', #edition :n => 'name', :fn => 'firstname', :mn => 'middlename', :ln => 'lastname', :in => 'initials', :qt => 'quote', :ct => 'cite', :ref => 'reference', :ab => 'abreviation', :def => 'define', :desc => 'description', :trans => 'translate', } end self end def char_enc #character encode def utf8(para='') if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn #¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûü #¢£¥§©ª«®°±²³µ¶¹º»¼½¾×÷ ##para.gsub!(//, '&#;') ##para.gsub!(//, '&;') para.gsub!(//u, '>') # '>' # > para.gsub!(/¢/u, '¢') # '¢' # ¢ para.gsub!(/£/u, '£') # '£' # £ para.gsub!(/¥/u, '¥') # '¥' # ¥ para.gsub!(/§/u, '§') # '§' # § para.gsub!(/©/u, '©') # '©' # © para.gsub!(/ª/u, 'ª') # 'ª' # ª para.gsub!(/«/u, '«') # '«' # « para.gsub!(/®/u, '®') # '®' # ® para.gsub!(/°/u, '°') # '°' # ° para.gsub!(/±/u, '±') # '±' # ± para.gsub!(/²/u, '²') # '²' # ² para.gsub!(/³/u, '³') # '³' # ³ para.gsub!(/µ/u, 'µ') # 'µ' # µ para.gsub!(/¶/u, '¶') # '¶' # ¶ para.gsub!(/¹/u, '¹') # '¹' # ¹ para.gsub!(/º/u, 'º') # 'º' # º para.gsub!(/»/u, '»') # '»' # » para.gsub!(/¼/u, '¼') # '¼' # ¼ para.gsub!(/½/u, '½') # '½' # ½ para.gsub!(/¾/u, '¾') # '¾' # ¾ para.gsub!(/×/u, '×') # '×' # × para.gsub!(/÷/u, '÷') # '÷' # ÷ para.gsub!(/¿/u, '¿') # '¿' # ¿ para.gsub!(/À/u, 'À') # 'À' # À para.gsub!(/Á/u, 'Á') # 'Á' # Á para.gsub!(/Â/u, 'Â') # 'Â' #  para.gsub!(/Ã/u, 'Ã') # 'Ã' # à para.gsub!(/Ä/u, 'Ä') # 'Ä' # Ä para.gsub!(/Å/u, 'Å') # 'Å' # Å para.gsub!(/Æ/u, 'Æ') # 'Æ' # Æ para.gsub!(/Ç/u, 'Ç') # 'Ç' # Ç para.gsub!(/È/u, 'È') # 'È' # È para.gsub!(/É/u, 'É') # 'É' # É para.gsub!(/Ê/u, 'Ê') # 'Ê' # Ê para.gsub!(/Ë/u, 'Ë') # 'Ë' # Ë para.gsub!(/Ì/u, 'Ì') # 'Ì' # Ì para.gsub!(/Í/u, 'Í') # 'Í' # Í para.gsub!(/Î/u, 'Î') # 'Î' # Î para.gsub!(/Ï/u, 'Ï') # 'Ï' # Ï para.gsub!(/Ð/u, 'Ð') # 'Ð' # Ð para.gsub!(/Ñ/u, 'Ñ') # 'Ñ' # Ñ para.gsub!(/Ò/u, 'Ò') # 'Ò' # Ò para.gsub!(/Ó/u, 'Ó') # 'Ó' # Ó para.gsub!(/Ô/u, 'Ô') # 'Ô' # Ô para.gsub!(/Õ/u, 'Õ') # 'Õ' # Õ para.gsub!(/Ö/u, 'Ö') # 'Ö' # Ö para.gsub!(/Ø/u, 'Ø') # 'Ø' # Ø para.gsub!(/Ù/u, 'Ù') # 'Ù' # Ù para.gsub!(/Ú/u, 'Ú') # 'Ú' # Ú para.gsub!(/Û/u, 'Û') # 'Û' # Û para.gsub!(/Ü/u, 'Ü') # 'Ü' # Ü para.gsub!(/Ý/u, 'Ý') # 'Ý' # Ý para.gsub!(/Þ/u, 'Þ') # 'Þ' # Þ para.gsub!(/ß/u, 'ß') # 'ß' # ß para.gsub!(/à/u, 'à') # 'à' # à para.gsub!(/á/u, 'á') # 'á' # á para.gsub!(/â/u, 'â') # 'â' # â para.gsub!(/ã/u, 'ã') # 'ã' # ã para.gsub!(/ä/u, 'ä') # 'ä' # ä para.gsub!(/å/u, 'å') # 'å' # å para.gsub!(/æ/u, 'æ') # 'æ' # æ para.gsub!(/ç/u, 'ç') # 'ç' # ç para.gsub!(/è/u, 'è') # 'è' # è para.gsub!(/é/u, 'é') # '´' # é para.gsub!(/ê/u, 'ê') # 'ˆ' # ê para.gsub!(/ë/u, 'ë') # 'ë' # ë para.gsub!(/ì/u, 'ì') # 'ì' # ì para.gsub!(/í/u, 'í') # '´' # í para.gsub!(/î/u, 'î') # 'î' # î para.gsub!(/ï/u, 'ï') # 'ï' # ï para.gsub!(/ð/u, 'ð') # 'ð' # ð para.gsub!(/ñ/u, 'ñ') # 'ñ' # ñ para.gsub!(/ò/u, 'ò') # 'ò' # ò para.gsub!(/ó/u, 'ó') # 'ó' # ó para.gsub!(/ô/u, 'ô') # 'ô' # ô para.gsub!(/õ/u, 'õ') # 'õ' # õ para.gsub!(/ö/u, 'ö') # 'ö' # ö para.gsub!(/ø/u, 'ø') # 'ø' # ø para.gsub!(/ù/u, 'ú') # 'ù' # ú para.gsub!(/ú/u, 'û') # 'ú' # û para.gsub!(/û/u, 'ü') # 'û' # ü para.gsub!(/ü/u, 'ý') # 'ü' # ý para.gsub!(/þ/u, 'þ') # 'þ' # þ para.gsub!(/ÿ/u, 'ÿ') # 'ÿ' # ÿ para.gsub!(/‘/u, '‘') # '‘' # ‘ para.gsub!(/’/u, '’') # '’' # ’ para.gsub!(/“/u, '“') # “ # “ para.gsub!(/”/u, '”') # ” # ” para.gsub!(/–/u, '–') # – # – para.gsub!(/—/u, '—') # — # — para.gsub!(/∝/u, '∝') # ∝ # ∝ para.gsub!(/∞/u, '∞') # ∞ # ∞ para.gsub!(/™/u, '™') # ™ # ™ para.gsub!(/✠/u, '✠') # ✗ # ✠ para.gsub!(/ /u, ' ') # space identify para.gsub!(/ /u, ' ') # space identify end end def html(para='') if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn para.gsub!(/ /u, ' ') # space identify para.gsub!(/ /u, ' ') # space identify else para.gsub!(/¢/u, '¢') # ¢ para.gsub!(/£/u, '£') # £ para.gsub!(/¥/u, '¥') # ¥ para.gsub!(/§/u, '§') # § para.gsub!(/©/u, '©') # © para.gsub!(/ª/u, 'ª') # ª para.gsub!(/«/u, '«') # « para.gsub!(/®/u, '®') # ® para.gsub!(/°/u, '°') # ° para.gsub!(/±/u, '±') # ± para.gsub!(/²/u, '²') # ² para.gsub!(/³/u, '³') # ³ para.gsub!(/µ/u, 'µ') # µ para.gsub!(/¶/u, '¶') # ¶ para.gsub!(/¹/u, '¹') # ¹ para.gsub!(/º/u, 'º') # º para.gsub!(/»/u, '»') # » para.gsub!(/¼/u, '¼') # ¼ para.gsub!(/½/u, '½') # ½ para.gsub!(/¾/u, '¾') # ¾ para.gsub!(/×/u, '×') # × para.gsub!(/÷/u, '÷') # ÷ para.gsub!(/¿/u, '¿') # ¿ para.gsub!(/À/u, 'À') # À para.gsub!(/Á/u, 'Á') # Á para.gsub!(/Â/u, 'Â') #  para.gsub!(/Ã/u, 'Ã') # à para.gsub!(/Ä/u, 'Ä') # Ä para.gsub!(/Å/u, 'Å') # Å para.gsub!(/Æ/u, 'Æ') # Æ para.gsub!(/Ç/u, 'Ç') # Ç para.gsub!(/È/u, 'È') # È para.gsub!(/É/u, 'É') # É para.gsub!(/Ê/u, 'Ê') # Ê para.gsub!(/Ë/u, 'Ë') # Ë para.gsub!(/Ì/u, 'Ì') # Ì para.gsub!(/Í/u, 'Í') # Í para.gsub!(/Î/u, 'Î') # Î para.gsub!(/Ï/u, 'Ï') # Ï para.gsub!(/Ð/u, 'Ð') # Ð para.gsub!(/Ñ/u, 'Ñ') # Ñ para.gsub!(/Ò/u, 'Ò') # Ò para.gsub!(/Ó/u, 'Ó') # Ó para.gsub!(/Ô/u, 'Ô') # Ô para.gsub!(/Õ/u, 'Õ') # Õ para.gsub!(/Ö/u, 'Ö') # Ö para.gsub!(/Ø/u, 'Ø') # Ø para.gsub!(/Ù/u, 'Ù') # Ù para.gsub!(/Ú/u, 'Ú') # Ú para.gsub!(/Û/u, 'Û') # Û para.gsub!(/Ü/u, 'Ü') # Ü para.gsub!(/Ý/u, 'Ý') # Ý para.gsub!(/Þ/u, 'Þ') # Þ para.gsub!(/ß/u, 'ß') # ß para.gsub!(/à/u, 'à') # à para.gsub!(/á/u, 'á') # á para.gsub!(/â/u, 'â') # â para.gsub!(/ã/u, 'ã') # ã para.gsub!(/ä/u, 'ä') # ä para.gsub!(/å/u, 'å') # å para.gsub!(/æ/u, 'æ') # æ para.gsub!(/ç/u, 'ç') # ç para.gsub!(/è/u, 'è') # è para.gsub!(/é/u, '´') # é para.gsub!(/ê/u, 'ˆ') # ê para.gsub!(/ë/u, 'ë') # ë para.gsub!(/ì/u, 'ì') # ì para.gsub!(/í/u, '´') # í para.gsub!(/î/u, 'î') # î para.gsub!(/ï/u, 'ï') # ï para.gsub!(/ð/u, 'ð') # ð para.gsub!(/ñ/u, 'ñ') # ñ para.gsub!(/ò/u, 'ò') # ò para.gsub!(/ó/u, 'ó') # ó para.gsub!(/ô/u, 'ô') # ô para.gsub!(/õ/u, 'õ') # õ para.gsub!(/ö/u, 'ö') # ö para.gsub!(/ø/u, 'ø') # ø para.gsub!(/ù/u, 'ù') # ú para.gsub!(/ú/u, 'ú') # û para.gsub!(/û/u, 'û') # ü para.gsub!(/ü/u, 'ü') # ý para.gsub!(/þ/u, 'þ') # þ para.gsub!(/ÿ/u, 'ÿ') # ÿ para.gsub!(/‘/u, '&#lsquo;') # ‘ # ‘ para.gsub!(/’/u, '&#rsquo;') # ’ # ’ para.gsub!(/“/u, '“') # “ # “ para.gsub!(/”/u, '”') # ” # ” para.gsub!(/–/u, '–') # – # – para.gsub!(/—/u, '—') # — # — para.gsub!(/∝/u, '∝') # ∝ # ∝ para.gsub!(/∞/u, '∞') # ∞ # ∞ para.gsub!(/™/u, '™') # ™ # ™ para.gsub!(/✠/u, '✠') # ✠ #para.gsub!(/✠/u, '†') # † # † incorrect replacement † para.gsub!(/ /u, ' ') # space identify para.gsub!(/ /u, ' ') # space identify end end self end def tidywords(wordlist) wordlist.each do |x| #imperfect solution will not catch all possible cases x.gsub!(/&/,'&') unless x =~/&\S+;/ x.gsub!(/&([A-Z])/,'&\1') end end def markup(para='') wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 para=tidywords(wordlist).join(' ').strip para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
') para.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') para.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;') para.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;') para.gsub!(/(^|#{Mx[:gl_c]}|\s+)<\s+/,'\1< '); para.gsub!(/\s+>(\s+|$)/,' >\1') #para.gsub!(/#{Mx[:fa_emphasis_o]}(.+?)#{Mx[:fa_emphasis_c]}/,'\1') #reinstate para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1') para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1') para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') para.gsub!(/<:pb>\s*/,'') #Fix para.gsub!(/<+[-~]#>+/,'') para.gsub!(/#{Mx[:id_o]}0;\w\d+;[um]\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'') if para !~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/ #embeds a red-bullet image --> para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
') para.gsub!(/#{Mx[:br_page]}\s*/,'') para.gsub!(/#{Mx[:br_page_new]}\s*/,'') para.gsub!(/#{Mx[:pa_non_object_no_heading]}|#{Mx[:pa_non_object_dummy_heading]}/,''); para.gsub!(/<[-~]#>/,'') para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}(https?:\/\/\S+)/, %{[\\1] \\4}) para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?#{Mx[:lnk_c]}(https?:\/\/\S+)/, %{\\1}) para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}image/, %{[\\1] \\4}) para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?#{Mx[:lnk_c]}image/, %{\\1}) para.gsub!(/(^|#{Mx[:gl_c]}|\s)#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/, '\1\2\4') #watch, compare html_tune para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, %{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, '\1\2') #escaped urls not linked, deal with later else para.gsub!(/(^|[^}])_/m,'\1>') #code-block: angle brackets special characters para.gsub!(/(^|[^}])_/m,'\1>') end para.gsub!(/ |#{Mx[:nbsp]}/m,' ') para end def markup_light(para='') para.gsub!(/\/\{(.+?)\}\//,'\1') para.gsub!(/[*!]\{(.+?)\}[*!]/,'\1') para.gsub!(/_\{(.+?)\}_/,'\1') para.gsub!(/-\{(.+?)\}-/,'\1') para.gsub!(//,'
') para.gsub!(/<:pb>\s*/,'') para.gsub!(/<[-~]#>/,'') para.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort para.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/, "#{@dir.url.images_local}\/\\1") para.gsub!(/ |#{Mx[:nbsp]}/,' ') #para.gsub!(/ /,' ') #clean wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 para=tidywords(wordlist).join(' ').strip para end def markup_fictionbook(para='') para.gsub!(/~\{([\d*+]+).+?\}~/,'[\1]') para.gsub!(/\/\{(.+?)\}\//,'\1') para.gsub!(/[*!]\{(.+?)\}[*!]/,'\1') para.gsub!(/_\{(.+?)\}_/,'\1') para.gsub!(/-\{(.+?)\}-/,'\1') para.gsub!(//,'
') para.gsub!(/<:pb>\s*/,'') para.gsub!(/<[-~]#>/,'') #temporary --> para.gsub!(/<:\S+?>/,'') #<-- temporary para.gsub!(/<[-~]#>/,'') para.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort para.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/, "#{@dir.url.images_local}\/\\1") para.gsub!(/ |#{Mx[:nbsp]}/,' ') #para.gsub!(/ /,' ') #clean wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 para=tidywords(wordlist).join(' ').strip para end def markup_group(para='') para.gsub!(//,'>') para.gsub!(/<:?br(?:\s+\/)?>/,'
') para.gsub!(/<(link xmlns:xlink=".+?")>/,'<\1>') para.gsub!(/<(\/link)>/,'<\1>') para.gsub!(/<(\/?en)>/,'<\1>') para end def xml_sem_block_paired(matched) # colon depth: many, recurs matched.gsub!(/\b(au):\{(.+?)\}:\1\b/m, %{\\2}) # sem : matched.gsub!(/\b(vol):\{(.+?)\}:\1\b/m, %{\\2}) # sem : matched.gsub!(/\b(pub):\{(.+?)\}:\1\b/m, %{\\2}) # sem : matched.gsub!(/\b(ref):\{(.+?)\}:\1\b/m, %{\\2}) # sem : matched.gsub!(/\b(desc):\{(.+?)\}:\1\b/m,%{\\2}) # sem : matched.gsub!(/\b(conv):\{(.+?)\}:\1\b/m,%{\\2}) # sem : matched.gsub!(/\b(ct):\{(.+?)\}:\1\b/m, %{\\2}) # sem : matched.gsub!(/\b(cty):\{(.+?)\}:\1\b/m, %{\\2}) # sem : matched.gsub!(/\b(org):\{(.+?)\}:\1\b/m, %{\\2}) # sem : matched.gsub!(/\b(dt):\{(.+?)\}:\1\b/m, %{\\2}) # sem : matched.gsub!(/\b(n):\{(.+?)\}:\1\b/m, %{\\2}) # sem : matched.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m,'\2') # sem : end def xml_semantic_tags(para) if @md.sem_tag para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : #colon one / single / flat / shallow para.gsub!(/:\{(.+?)\}:au\b/m, %{\\1}) # sem : para.gsub!(/:\{(.+?)\}:n\b/m, %{\\1}) # sem : para.gsub!(/:\{(.+?)\}:ti\b/m, %{\\1}) # sem : para.gsub!(/:\{(.+?)\}:ref\b/m, %{\\1}) # sem : para.gsub!(/:\{(.+?)\}:desc\b/m, %{\\1}) # sem : para.gsub!(/:\{(.+?)\}:cty\b/m, %{\\1}) # sem : para.gsub!(/:\{(.+?)\}:org\b/m, %{\\1}) # sem : para.gsub!(/:\{(.+?)\}:([a-z]+(?:[_:.][a-z]+)*)/m,'\1') # sem : #semicolon zero / none para.gsub!(/;\{([^}]+(?![;]))\};ti\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};qt\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};ref\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};ed\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};v\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};desc\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};def\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};trans\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};y\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};ab\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};pg\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};fn?\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};mn?\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};ln?\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};in\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};uni\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};fac\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};inst\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};dept\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};org\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};com?\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};cty\b/m, %{\\1}) # sem ; para.gsub!(/;\{([^}]+(?![;]))\};([a-z]+(?:[_:.][a-z]+)*)/m,'\1') # sem ; end para end end end module SiSU_XML_tags #Format require "#{SiSU_lib}/param" include SiSU_Param include SiSU_Viz class RDF def initialize(md='',seg_name=[],tracker=0) @full_title=@subtitle=@author=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@copyright=@owner=@keywords='' #seg_name=%{#{@@seg_name[@@tracker]} - } if @@seg_name[@@tracker] @md=md @rdfurl=%{ rdf:about="http://www.jus.uio.no/lm/toc"\n} if @md.full_title # DublinCore 1 - title @rdf_title=%{ dc.title="#{seg_name}#{@md.full_title}"\n} @full_title=%{ \n} #@full_title=%{ \n} end if @md.author # DublinCore 2 - creator/author (author) @rdf_author=%{ dc.author="#{@md.author}"\n} content=meta_content_clean(@md.author) @author=%{ \n} end if @md.subject # DublinCore 3 - subject (us library of congress, eric or udc, or schema???) @rdf_subject=%{ dc.subject="#{@md.subject}"\n} content=meta_content_clean(@md.subject) @subject=%{ \n} end if @md.description # DublinCore 4 - description @rdf_description=%{ dc.description="#{@md.description}"\n} content=meta_content_clean(@md.description) @description=%{ \n} end if @md.publisher # DublinCore 5 - publisher (current copy published by) @rdf_publisher=%{ dc.publisher="#{@md.publisher}"\n} content=meta_content_clean(@md.publisher) @publisher=%{ \n} end if @md.contributor # DublinCore 6 - contributor @rdf_contributor=%{ dc.contributor="#{@md.contributor}"\n} content=meta_content_clean(@md.contributor) @contributor=%{ \n} end if @md.date # DublinCore 7 - date year-mm-dd @rdf_date=%{ dc.date="#{@md.date}"\n} @date=%{ \n} end if @md.date_created # DublinCore 7 - date.created year-mm-dd @rdf_date_created=%{ dc.date.created="#{@md.date_created}"\n} @date_created=%{ \n} end if @md.date_issued # DublinCore 7 - date.issued year-mm-dd @rdf_date_issued=%{ dc.date.issued="#{@md.date_issued}"\n} @date_issued=%{ \n} end if @md.date_available # DublinCore 7 - date.available year-mm-dd @rdf_date_available=%{ dc.date.available="#{@md.date_available}"\n} @date_available=%{ \n} end if @md.date_valid # DublinCore 7 - date.valid year-mm-dd @rdf_date_valid=%{ dc.date.valid="#{@md.date_valid}"\n} @date_valid=%{ \n} end if @md.date_modified # DublinCore 7 - date.modified year-mm-dd @rdf_date_modified=%{ dc.date.modified="#{@md.date_modified}"\n} @date_modified=%{ \n} end if @md.type # DublinCore 8 - type (genre eg. report, convention etc) @rdf_type=%{ dc.type="#{@md.type}"\n} content=meta_content_clean(@md.type) @type=%{ \n} end if @md.format # DublinCore 9 - format (use your mime type) @rdf_format=%{ dc.format="#{@md.format}"\n} content=meta_content_clean(@md.format) @format=%{ \n} end if @md.identifier # DublinCore 10 - identifier (your identifier, could use urn which is free) @rdf_identifier=%{ dc.identifier="#{@md.identifier}"\n} content=meta_content_clean(@md.identifier) @identifier=%{ \n} end if @md.source # DublinCore 11 - source (document source) @rdf_source=%{ dc.source="#{@md.source}"\n} content=meta_content_clean(@md.source) @source=%{ \n} end if @md.language \ and @md.language[:name] # DublinCore 12 - language (English) @rdf_language=%{ dc.language="#{@md.language[:name]}"\n} @language=%{ \n} end if @md.language_original \ and @md.language_original[:name] @rdf_language_original=%{ dc.language="#{@md.language_original[:name]}"\n} @language_original=%{ \n} end if @md.relation # DublinCore 13 - relation @rdf_relation=%{ dc.relation="#{@md.relation}"\n} content=meta_content_clean(@md.relation) @relation=%{ \n} end if @md.coverage # DublinCore 14 - coverage @rdf_coverage=%{ dc.coverage="#{@md.coverage}"\n} content=meta_content_clean(@md.coverage) @coverage=%{ \n} end if @md.rights # DublinCore 15 - rights @rdf_rights=%{ dc.rights="#{@md.rights}"\n} content=meta_content_clean(@md.rights) @rights=%{ \n} end content=meta_content_clean(@md.keywords) @keywords=%{ \n} if @md.keywords @vz=SiSU_Env::Get_init.instance.skin end def meta_content_clean(content='') unless content.nil? content.tr!('"',"'") end content end def rdftoc #tocHead #values strung together, because some empty, and resulting output (line breaks) is much better # # # #Dublin Core #### XML only :-( KEEP #< # \n #\n #WOK end def rdfseg #segHead rdftoc end def comment_xml(extra='') generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})" if @md.sisu_version[:version] lastdone="Last Generated on: #{Time.now}" rubyv="Ruby version: #{@md.ruby_version}" sc=if @md.sc_info "Source file: #{@md.sc_filename} version: #{@md.sc_number} of: #{@md.sc_date}" else '' end if extra.empty? < WOK else < WOK end end def comment_xml_sax desc='SiSU XML, SAX type representation' comment_xml(desc) end def comment_xml_node desc='SiSU XML, Node type representation' comment_xml(desc) end def comment_xml_dom desc='SiSU XML, DOM type representation' comment_xml(desc) end def metatag_html #values strung together, because some empty, and resulting output (line breaks) is much better #{@vz.js_head} <