From ee91e6282464321d5f1e8694c048c24abf8d15d7 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 24 Feb 2016 23:21:06 -0500 Subject: json, an output representation, first pass --- lib/sisu/json_shared.rb | 681 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 681 insertions(+) create mode 100644 lib/sisu/json_shared.rb (limited to 'lib/sisu/json_shared.rb') diff --git a/lib/sisu/json_shared.rb b/lib/sisu/json_shared.rb new file mode 100644 index 00000000..28bb0ad3 --- /dev/null +++ b/lib/sisu/json_shared.rb @@ -0,0 +1,681 @@ +# encoding: utf-8 +=begin + +* Name: SiSU + +** Description: documents, structuring, processing, publishing, search +*** Description: json output logic, flow + +** Author: Ralph Amissah + [ralph@amissah.com] + [ralph.amissah@gmail.com] + +** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Ralph Amissah, + All Rights Reserved. + +** License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see [http://www.gnu.org/licenses/]. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + [http://www.fsf.org/licensing/licenses/gpl.html] + [http://www.gnu.org/licenses/gpl.html] + +** SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + +** Hompages: + [http://www.jus.uio.no/sisu] + [http://www.sisudoc.org] + +** Git + [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] + [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/json_shared.rb;hb=HEAD] + +=end +module SiSU_JSONutils + require_relative 'generic_parts' # generic_parts.rb + class Clean + def initialize(para='') + @para=para + #@para,@n_char_max,@n_indent,@post,=para,n_char_max,n_indent,post + #@n_char_max_extend = n_char_max + #@n_hang=n_hang ? n_hang : @n_indent + end + def line_json_clean + @para=@para.gsub(/
/,' \\ '). + gsub(/#{Mx[:br_nl]}/,"\n\n"). + gsub(/"/,'\"'). + gsub(/'/,"\\\\'") + @para + end + end +end +module SiSU_JSON_Munge + require_relative 'json_parts' # json_parts.rb + class Trans + include SiSU_Parts_JSON + def initialize(md) + @md=md + @sys=SiSU_Env::SystemCall.new + @dir=SiSU_Env::InfoEnv.new(@md.fns) + if @md.sem_tag + @ab ||=semantic_tags.default + end + end + def semantic_tags + def default + { + pub: 'publication', + conv: 'convention', + vol: 'volume', + pg: 'page', + cty: 'city', + org: 'organization', + uni: 'university', + dept: 'department', + fac: 'faculty', + inst: 'institute', + co: 'company', + com: 'company', + conv: 'convention', + dt: 'date', + y: 'year', + m: 'month', + d: 'day', + ti: 'title', + au: 'author', + ed: 'editor', #editor? + v: 'version', #edition + n: 'name', + fn: 'firstname', + mn: 'middlename', + ln: 'lastname', + in: 'initials', + qt: 'quote', + ct: 'cite', + ref: 'reference', + ab: 'abreviation', + def: 'define', + desc: 'description', + trans: 'translate', + } + end + self + end + def char_enc #character encode + def utf8(dob='') + if @sys.locale =~/utf-?8/i # instead ucs for utf8 # String#encode Iñtërnâtiônàlizætiøn + str=if defined? dob.obj then dob.obj + elsif dob.is_a?(String) then dob + end + if str + #¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûü + #¢£¥§©ª«®°±²³µ¶¹º»¼½¾×÷ + str=str.gsub(//um,'>'). # '>' # > + gsub(/¢/um,'¢'). # '¢' # ¢ + gsub(/£/um,'£'). # '£' # £ + gsub(/¥/um,'¥'). # '¥' # ¥ + gsub(/§/um,'§'). # '§' # § + gsub(/©/um,'©'). # '©' # © + gsub(/ª/um,'ª'). # 'ª' # ª + gsub(/«/um,'«'). # '«' # « + gsub(/®/um,'®'). # '®' # ® + gsub(/°/um,'°'). # '°' # ° + gsub(/±/um,'±'). # '±' # ± + gsub(/²/um,'²'). # '²' # ² + gsub(/³/um,'³'). # '³' # ³ + gsub(/µ/um,'µ'). # 'µ' # µ + gsub(/¶/um,'¶'). # '¶' # ¶ + gsub(/¹/um,'¹'). # '¹' # ¹ + gsub(/º/um,'º'). # 'º' # º + gsub(/»/um,'»'). # '»' # » + gsub(/¼/um,'¼'). # '¼' # ¼ + gsub(/½/um,'½'). # '½' # ½ + gsub(/¾/um,'¾'). # '¾' # ¾ + gsub(/×/um,'×'). # '×' # × + gsub(/÷/um,'÷'). # '÷' # ÷ + gsub(/¿/um,'¿'). # '¿' # ¿ + gsub(/À/um,'À'). # 'À' # À + gsub(/Á/um,'Á'). # 'Á' # Á + gsub(/Â/um,'Â'). # 'Â' #  + gsub(/Ã/um,'Ã'). # 'Ã' # à + gsub(/Ä/um,'Ä'). # 'Ä' # Ä + gsub(/Å/um,'Å'). # 'Å' # Å + gsub(/Æ/um,'Æ'). # 'Æ' # Æ + gsub(/Ç/um,'Ç'). # 'Ç' # Ç + gsub(/È/um,'È'). # 'È' # È + gsub(/É/um,'É'). # 'É' # É + gsub(/Ê/um,'Ê'). # 'Ê' # Ê + gsub(/Ë/um,'Ë'). # 'Ë' # Ë + gsub(/Ì/um,'Ì'). # 'Ì' # Ì + gsub(/Í/um,'Í'). # 'Í' # Í + gsub(/Î/um,'Î'). # 'Î' # Î + gsub(/Ï/um,'Ï'). # 'Ï' # Ï + gsub(/Ð/um,'Ð'). # 'Ð' # Ð + gsub(/Ñ/um,'Ñ'). # 'Ñ' # Ñ + gsub(/Ò/um,'Ò'). # 'Ò' # Ò + gsub(/Ó/um,'Ó'). # 'Ó' # Ó + gsub(/Ô/um,'Ô'). # 'Ô' # Ô + gsub(/Õ/um,'Õ'). # 'Õ' # Õ + gsub(/Ö/um,'Ö'). # 'Ö' # Ö + gsub(/Ø/um,'Ø'). # 'Ø' # Ø + gsub(/Ù/um,'Ù'). # 'Ù' # Ù + gsub(/Ú/um,'Ú'). # 'Ú' # Ú + gsub(/Û/um,'Û'). # 'Û' # Û + gsub(/Ü/um,'Ü'). # 'Ü' # Ü + gsub(/Ý/um,'Ý'). # 'Ý' # Ý + gsub(/Þ/um,'Þ'). # 'Þ' # Þ + gsub(/ß/um,'ß'). # 'ß' # ß + gsub(/à/um,'à'). # 'à' # à + gsub(/á/um,'á'). # 'á' # á + gsub(/â/um,'â'). # 'â' # â + gsub(/ã/um,'ã'). # 'ã' # ã + gsub(/ä/um,'ä'). # 'ä' # ä + gsub(/å/um,'å'). # 'å' # å + gsub(/æ/um,'æ'). # 'æ' # æ + gsub(/ç/um,'ç'). # 'ç' # ç + gsub(/è/um,'è'). # 'è' # è + gsub(/é/um,'é'). # '´' # é + gsub(/ê/um,'ê'). # 'ˆ' # ê + gsub(/ë/um,'ë'). # 'ë' # ë + gsub(/ì/um,'ì'). # 'ì' # ì + gsub(/í/um,'í'). # '´' # í + gsub(/î/um,'î'). # 'î' # î + gsub(/ï/um,'ï'). # 'ï' # ï + gsub(/ð/um,'ð'). # 'ð' # ð + gsub(/ñ/um,'ñ'). # 'ñ' # ñ + gsub(/ò/um,'ò'). # 'ò' # ò + gsub(/ó/um,'ó'). # 'ó' # ó + gsub(/ô/um,'ô'). # 'ô' # ô + gsub(/õ/um,'õ'). # 'õ' # õ + gsub(/ö/um,'ö'). # 'ö' # ö + gsub(/ø/um,'ø'). # 'ø' # ø + gsub(/ù/um,'ú'). # 'ù' # ú + gsub(/ú/um,'û'). # 'ú' # û + gsub(/û/um,'ü'). # 'û' # ü + gsub(/ü/um,'ý'). # 'ü' # ý + gsub(/þ/um,'þ'). # 'þ' # þ + gsub(/ÿ/um,'ÿ'). # 'ÿ' # ÿ + gsub(/‘/um,'‘'). # '‘' # ‘ + gsub(/’/um,'’'). # '’' # ’ + gsub(/“/um,'“'). # “ # “ + gsub(/”/um,'”'). # ” # ” + gsub(/–/um,'–'). # – # – + gsub(/—/um,'—'). # — # — + gsub(/∝/um,'∝'). # ∝ # ∝ + gsub(/∞/um,'∞'). # ∞ # ∞ + gsub(/™/um,'™'). # ™ # ™ + gsub(/✠/um,'✠'). # ✗ # ✠ + gsub(/ /um,' '). # space identify + gsub(/ /um,' ') # space identify + end + dob=if defined? dob.obj + dob.obj=str + dob + elsif dob.is_a?(String) + str + end + dob + end + end + def html(dob='') + if @sys.locale =~/utf-?8/i # instead ucs for utf8 # String#encode Iñtërnâtiônàlizætiøn + dob.obj=dob.obj.gsub(/ /u,' '). # space identify + gsub(/ /u,' ') # space identify + end + end + self + end + def tidywords(wordlist) + wordlist_new=[] + wordlist.each do |x| + #imperfect solution will not catch all possible cases + x=x.gsub(/&/,'&') unless x =~/&\S+;/ + x=x.gsub(/&([A-Z])/,'&\1') + wordlist_new << x + end + wordlist_new + end + def markup(dob='') + wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 + dob.obj=tidywords(wordlist).join(' ').strip + unless dob.is==:table + dob.obj=dob.obj.gsub(/#{Mx[:br_line]}/u,'
'). + gsub(/#{Mx[:br_paragraph]}/u,'
'). + gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
') + end + dob.obj=dob.obj.gsub(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,''). + gsub(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;'). + gsub(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;'). + gsub(/(^|#{Mx[:gl_c]}|\s+)<\s+/,'\1< ').gsub(/\s+>(\s+|$)/,' >\1'). + #gsub(/#{Mx[:fa_emphasis_o]}(.+?)#{Mx[:fa_emphasis_c]}/,'\1'). #reinstate + gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/m,'\1'). + gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/m,'\1'). + gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1'). + gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1'). + gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1'). + gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1'). + gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1'). + gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1'). + gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1'). + gsub(/<:pb>\s*/,''). #Fix + gsub(/<+[-~]#>+/,'') + if dob.is !=:code + #embeds a red-bullet image --> + dob.obj=dob.obj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1'). + gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1'). + gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1'). + gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') + dob.obj=dob.obj.gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
') unless dob.is==:table + dob.obj=dob.obj.gsub(/#{Mx[:br_page]}\s*/,''). + gsub(/#{Mx[:br_page_new]}\s*/,''). + gsub(/#{Mx[:br_page_line]}\s*/,''). + gsub(/#{Mx[:pa_non_object_no_heading]}|#{Mx[:pa_non_object_dummy_heading]}/,''). + gsub(/<[-~]#>/,''). + gsub(/href="#{Xx[:segment]}/m,'href="'). + gsub(/#{Mx[:lnk_o]}([^#{Mx[:lnk_o]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{Mx[:rel_c]}]+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}(\.\.\/\S+?)#{Mx[:rel_c]}/, + '\1'). + gsub(/#{Mx[:lnk_o]}([^#{Mx[:lnk_o]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{Mx[:rel_c]}]+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}:(\S+?)#{Mx[:rel_c]}/, + '\1'). + gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}(\S+?)#{Mx[:rel_c]}/, + '\1'). + gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))[ ]+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, + %{[\\1] \\4}). + gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))([ ]+[^}]+)?#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, + %{\\1}). + gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))[ ]+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}image/, + %{[\\1] \\4}). + gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))([ ]+[^}]+)?#{Mx[:lnk_c]}image/, + %{\\1}). + gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, + '\1'). #watch, compare html_tune + gsub(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, + %{#{the_url_decoration.xml_open}\\1#{the_url_decoration.xml_close}}). + gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/, + '\1') #escaped urls not linked, deal with later + else + dob.obj=dob.obj.gsub(//m,'>') + end + if dob.of==:block + dob.obj=dob.obj.gsub(/#{Mx[:gl_bullet]}/,'● ') + end + dob.obj=dob.obj.gsub(/#{Mx[:url_o]}([a-zA-Z0-9._-]+\@\S+?\.[a-zA-Z0-9._-]+)#{Mx[:url_c]}/, + %{#{the_url_decoration.xml_open}\\1#{the_url_decoration.xml_close}}). + gsub(/#{Dx[:url_o]}/,"#{Dx[:url_o_xml]}"). + gsub(/#{Dx[:url_c]}/,"#{Dx[:url_c_xml]}"). + gsub(/ |#{Mx[:nbsp]}/m,' '). + gsub(/;&([^#]|(?:[^gl][^t]|[^a][^m][^p]|[^n][^b][^s][^p])[^;])/,';&\1') # pattern not to match + dob + end + def markup_light(dob='') + dob.obj=dob.obj.gsub(/\/\{(.+?)\}\//,'\1'). + gsub(/[*!]\{(.+?)\}[*!]/,'\1'). + gsub(/_\{(.+?)\}_/,'\1'). + gsub(/-\{(.+?)\}-/,'\1'). + gsub(//,'
'). + gsub(/<:pb>\s*/,''). + gsub(/<[-~]#>/,''). + gsub(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& '). #sort + gsub(/&([^;]{1,5})/,'&\1'). #sort, rough estimate, revisit #WATCH found in node not sax + gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/, + "#{@md.file.output_path.xml.rel_image}\/\\1"). + gsub(/ |#{Mx[:nbsp]}/,' '). + gsub(/;&([^#]|(?:[^gl][^t]|[^a][^m][^p]|[^n][^b][^s][^p])[^;])/,';&\1') # pattern not to match + wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 + dob.obj=tidywords(wordlist).join(' ').strip + dob + end + def clean(str) + str=str.gsub(/#{Mx[:gl_o]}(#[0-9]{3})#{Mx[:gl_c]}/u,'&\1;'). + gsub(/#{Mx[:gl_o]}#([a-z]{2,4})#{Mx[:gl_c]}/u,'&\1;') + end + def markup_fictionbook(str='',is='') + str=str.gsub(/#{Mx[:en_a_o]}([\d+*]+).+?#{Mx[:en_a_c]}/m,'[\1]'). + gsub(/&/,'&'). #sort + gsub(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;'). + gsub(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& '). #sort + gsub(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;') + str=str.gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
') unless is==:table + str=str.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1'). + gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1'). + gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1'). + gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1'). + gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1'). + gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1'). + gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1'). + gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1'). + gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1'). # tt, kbd + gsub(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg|gif)).+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}|image)/m,''). + gsub(/#{Mx[:url_o]}(.+?)#{Mx[:url_c]}/,"#{Dx[:url_o]}\\1#{Dx[:url_c]}"). + gsub(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,''). + gsub(/#{Mx[:gl_bullet]}/m,'● '). #  not available + gsub(/#{Mx[:nbsp]}/,' '). #  not available + gsub(/<(p|br)>/,'<\1 />') + clean(str) + end + def markup_docbook(dob='') # work on, initially a copy of fictionbook! + if dob.is !=:code + dob.obj=dob.obj.gsub(/#{Mx[:en_a_o]}(\d+)\s*(.+?)#{Mx[:en_a_c]}/m,'\2'). + gsub(/\\\\/,''). + gsub(/&/,'&'). #sort + gsub(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;'). + gsub(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& '). #sort + gsub(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;') + dob.obj=dob.obj.gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
') unless dob.is==:table + dob.obj=dob.obj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1'). + gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1'). + gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1'). + gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1'). + gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1'). + gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1'). + gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1'). + gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1'). + gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1'). # tt, kbd + gsub(/#{Mx[:lnk_o]}\s*(\S+?)\.(png|jpg|gif).+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}|image)/m, + %{#{Xx[:split]}:spaces0:
\n:spaces1:\n:spaces1:\n:spaces0:
#{Xx[:split]}}). # common image location, else use ./images + gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(.+?)#{Mx[:url_c]}/, + '\1'). + gsub(/#{Mx[:url_o]}(.+?)#{Mx[:url_c]}/, + '\1'). + gsub(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,''). + gsub(/#{Mx[:gl_bullet]}/m,'● '). #  not available + gsub(/#{Mx[:nbsp]}/,' '). #  not available + gsub(/<(p|br)>/,'<\1 />') + dob.obj=clean(dob.obj) + elsif dob.is == :code + dob.obj=dob.obj.gsub(/&/m,'&'). #sort + gsub(//,'>') + else # p dob.is ?? + end + dob + end + def markup_group(dob='') + dob.obj=dob.obj.gsub(//,'>'). + gsub(/<:?br(?:\s+\/)?>/,'
'). + gsub(/<(link xmlns:xl=".+?")>/,'<\1>'). + gsub(/<(\/link)>/,'<\1>'). + gsub(/<(\/?en)>/,'<\1>') + dob + end + def markup_block(dob='') + dob.obj=dob.obj.gsub(//,'>'). + gsub(/<:?br(?:\s+\/)?>/,'
'). + gsub(/<(link xmlns:xl=".+?")>/,'<\1>'). + gsub(/<(\/link)>/,'<\1>'). + gsub(/<(\/?en)>/,'<\1>') + dob + end + def xml_sem_block_paired(matched) # colon depth: many, recurs + matched=matched.gsub(/\b(au):\{(.+?)\}:\1\b/m, %{\\2}). + gsub(/\b(vol):\{(.+?)\}:\1\b/m, %{\\2}). + gsub(/\b(pub):\{(.+?)\}:\1\b/m, %{\\2}). + gsub(/\b(ref):\{(.+?)\}:\1\b/m, %{\\2}). + gsub(/\b(desc):\{(.+?)\}:\1\b/m,%{\\2}). + gsub(/\b(conv):\{(.+?)\}:\1\b/m,%{\\2}). + gsub(/\b(ct):\{(.+?)\}:\1\b/m, %{\\2}). + gsub(/\b(cty):\{(.+?)\}:\1\b/m, %{\\2}). + gsub(/\b(org):\{(.+?)\}:\1\b/m, %{\\2}). + gsub(/\b(dt):\{(.+?)\}:\1\b/m, %{\\2}). + gsub(/\b(n):\{(.+?)\}:\1\b/m, %{\\2}). + gsub(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m,'\2') + end + def xml_semantic_tags(dob) + if @md.sem_tag + dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } + dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } + dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } + dob.obj=dob.obj.gsub(/:\{(.+?)\}:au\b/m, %{\\1}). + gsub(/:\{(.+?)\}:n\b/m, %{\\1}). + gsub(/:\{(.+?)\}:ti\b/m, %{\\1}). + gsub(/:\{(.+?)\}:ref\b/m, %{\\1}). + gsub(/:\{(.+?)\}:desc\b/m, %{\\1}). + gsub(/:\{(.+?)\}:cty\b/m, %{\\1}). + gsub(/:\{(.+?)\}:org\b/m, %{\\1}). + gsub(/:\{(.+?)\}:([a-z]+(?:[_:.][a-z]+)*)/m,'\1'). + gsub(/;\{([^}]+(?![;]))\};ti\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};qt\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};ref\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};ed\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};v\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};desc\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};def\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};trans\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};y\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};ab\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};pg\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};fn?\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};mn?\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};ln?\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};in\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};uni\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};fac\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};inst\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};dept\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};org\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};com?\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};cty\b/m, %{\\1}). + gsub(/;\{([^}]+(?![;]))\};([a-z]+(?:[_:.][a-z]+)*)/m,'\1') + end + dob + end + end +end +module SiSU_XML_Tags #Format + require_relative 'dp' # dp.rb + include SiSU_Param + class RDF + include SiSU_Parts_JSON + def initialize(md='',seg_name=[],tracker=0) + @full_title=@subtitle=@author=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@copyright=@owner=@keywords='' + @md=md + @rdfurl=%{ rdf:about="http://www.jus.uio.no/lm/toc"\n} + if defined? @md.title.full \ + and @md.title.full # DublinCore 1 - title + @rdf_title=%{ dc.title="#{seg_name}#{@md.title.full}"\n} + @full_title=%{ \n} + end + if defined? @md.creator.author \ + and @md.creator.author=~/\S+/ # DublinCore 2 - creator/author (author) + @rdf_author=%{ dc.author="#{@md.creator.author}"\n} + content=meta_content_clean(@md.creator.author) + @author=%{ \n} + end + if defined? @md.publisher \ + and @md.publisher # DublinCore 5 - publisher (current copy published by) + @rdf_publisher=%{ dc.publisher="#{@md.publisher}"\n} + content=meta_content_clean(@md.publisher) + @publisher=%{ \n} + end + if defined? @md.creator.contributor \ + and @md.creator.contributor=~/\S+/ # DublinCore 6 - contributor + @rdf_contributor=%{ dc.contributor="#{@md.creator.contributor}"\n} + content=meta_content_clean(@md.creator.contributor) + @contributor=%{ \n} + end + if defined? @md.date.published \ + and @md.date.published=~/\S+/ # DublinCore 7 - date year-mm-dd + @rdf_date=%{ dc.date="#{@md.date.published}"\n} + @date=%{ \n} # fix @md.date_scheme + end + if defined? @md.date.created \ + and @md.date.created=~/\S+/ # DublinCore 7 - date.created year-mm-dd + @rdf_date_created=%{ dc.date.created="#{@md.date.created}"\n} + @date_created=%{ \n} + end + if defined? @md.date.issued \ + and @md.date.issued=~/\S+/ # DublinCore 7 - date.issued year-mm-dd + @rdf_date_issued=%{ dc.date.issued="#{@md.date.issued}"\n} + @date_issued=%{ \n} + end + if defined? @md.date.available \ + and @md.date.available=~/\S+/ # DublinCore 7 - date.available year-mm-dd + @rdf_date_available=%{ dc.date.available="#{@md.date.available}"\n} + @date_available=%{ \n} + end + if defined? @md.date.valid \ + and @md.date.valid=~/\S+/ # DublinCore 7 - date.valid year-mm-dd + @rdf_date_valid=%{ dc.date.valid="#{@md.date.valid}"\n} + @date_valid=%{ \n} + end + if defined? @md.date.modified \ + and @md.date.modified=~/\S+/ # DublinCore 7 - date.modified year-mm-dd + @rdf_date_modified=%{ dc.date.modified="#{@md.date.modified}"\n} + @date_modified=%{ \n} + end + if defined? @md.rights.all \ + and @md.rights.all # DublinCore 15 - rights + @rdf_rights=%{ dc.rights="#{@md.rights.all}"\n} + content=meta_content_clean(@md.rights.all) + @rights=%{ \n} + end + if defined? @md.classify.subject \ + and @md.classify.subject=~/\S+/ # DublinCore 3 - subject (us library of congress, eric or udc, or schema???) + @rdf_subject=%{ dc.subject="#{@md.classify.subject}"\n} + content=meta_content_clean(@md.classify.subject) + @subject=%{ \n} + end + if defined? @md.notes.description \ + and @md.notes.description=~/\S+/ # DublinCore 4 - description + @rdf_description=%{ dc.description="#{@md.notes.description}"\n} + content=meta_content_clean(@md.notes.description) + @description=%{ \n} + end + if defined? @md.notes.coverage \ + and @md.notes.coverage=~/\S+/ # DublinCore 14 - coverage + @rdf_coverage=%{ dc.coverage="#{@md.notes.coverage}"\n} + content=meta_content_clean(@md.notes.coverage) + @coverage=%{ \n} + end + if defined? @md.notes.relation \ + and @md.notes.relation=~/\S+/ # DublinCore 13 - relation + @rdf_relation=%{ dc.relation="#{@md.notes.relation}"\n} + content=meta_content_clean(@md.notes.relation) + @relation=%{ \n} + end + if defined? @md.notes.type \ + and @md.notes.type # DublinCore 8 - type (genre eg. report, convention etc) + @rdf_type=%{ dc.type="#{@md.notes.type}"\n} + content=meta_content_clean(@md.notes.type) + @type=%{ \n} + end + if defined? @md.notes.format \ + and @md.notes.format=~/\S+/ # DublinCore 9 - format (use your mime type) + @rdf_format=%{ dc.format="#{@md.notes.format}"\n} + content=meta_content_clean(@md.notes.format) + @format=%{ \n} + end + #if defined? @md.identifier.sisupod \ + #and @md.identifier.sisupod=~/\S+/ # DublinCore 10 - identifier (your identifier, could use urn which is free) + # @rdf_identifier=%{ dc.identifier="#{@md.identifier.sisupod}"\n} + # content=meta_content_clean(@md.identifier.sisupod) + # @identifier=%{ \n} + #end + if defined? @md.original.source \ + and @md.original.source=~/\S+/ # DublinCore 11 - source (document source) + @rdf_source=%{ dc.source="#{@md.original.source}"\n} + content=meta_content_clean(@md.original.source) + @source=%{ \n} + end + if defined? @md.title.language \ + and @md.title.language=~/\S+/ # DublinCore 12 - language (English) + @rdf_language=%{ dc.language="#{@md.title.language}"\n} + @language=%{ \n} + end + if defined? @md.original.language \ + and @md.original.language=~/\S+/ + @rdf_language_original=%{ dc.language="#{@md.original.language}"\n} + @language_original=%{ \n} + end + content=meta_content_clean(@md.keywords) + @keywords=%{ \n} if @md.keywords + end + def meta_content_clean(content='') + content=if not content.nil? + content=content.tr('"',"'"). + gsub(/&/,'&') + content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content) + else content + end + end + def rdfseg #segHead + rdftoc + end + def comment_xml(extra='') + generator="Generated by: #{@md.project_details.project} #{@md.project_details.version} of #{@md.project_details.date_stamp} (#{@md.project_details.date})" if @md.project_details.version + lastdone="Last Generated on: #{Time.now}" + rubyv="Ruby version: #{@md.ruby_version}" + sc=if @md.sc_info + "Source file: #{@md.sc_filename} version: #{@md.sc_number} of: #{@md.sc_date}" + else '' + end + if extra.empty? +< +WOK + else +< +WOK + end + end + def comment_xml_sax + desc='SiSU XML, SAX type representation' + comment_xml(desc) + end + def comment_xml_node + desc='SiSU XML, Node type representation' + comment_xml(desc) + end + def comment_xml_dom + desc='SiSU XML, DOM type representation' + comment_xml(desc) + end + def metatag_html #values strung together, because some empty, and resulting output (line breaks) is much better +<