diff options
author | Ralph Amissah <ralph@amissah.com> | 2013-01-27 16:26:29 -0500 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2013-01-27 16:28:29 -0500 |
commit | 11907e10c73883e5dcdaba11a093ef01c7ee2de8 (patch) | |
tree | 65660c532f372936f79544f4d7c2705cfcc56c7d /lib/sisu/v4 | |
parent | v4 v3: epub, toc.ncx fix, navpoint_close (diff) |
v4: check xml representation of characters (& < > in particular)
Diffstat (limited to 'lib/sisu/v4')
-rw-r--r-- | lib/sisu/v4/epub_format.rb | 63 | ||||
-rw-r--r-- | lib/sisu/v4/shared_metadata.rb | 109 |
2 files changed, 47 insertions, 125 deletions
diff --git a/lib/sisu/v4/epub_format.rb b/lib/sisu/v4/epub_format.rb index 84d32000..dd3273d0 100644 --- a/lib/sisu/v4/epub_format.rb +++ b/lib/sisu/v4/epub_format.rb @@ -1217,6 +1217,18 @@ module SiSU_EPUB_Format WOK end end + module SanitizeXML + def self.xml(x) + if x.is_a?(String) + x.gsub(/&/,'&'). + gsub(/</,'<').gsub(/>/,'>'). + #gsub(/</,'<').gsub(/>/,'>'). + gsub(/\\\\/,'<br />'). + gsub(/<br(?: \/)?>/,'<br />') + else x + end + end + end class HeadInformation include SiSU_Viz attr_reader :md,:rdf,:vz @@ -1347,10 +1359,12 @@ output_epub_cont_seg.close end def head depth=@md.lvs[1] + @md.lvs[2] + @md.lvs[3] + @md.lvs[4] + title=SanitizeXML.xml(@md.title.full) + author=SanitizeXML.xml(@md.author) <<-WOK <!-- four required metadata items (for all NCX documents, (including the relaxed constraints of OPS 2.0) --> - <title>#{@md.title.full} by #{@md.author}</title> + <title>#{title} by #{author}</title> <link href="css/xhtml.css" rel="stylesheet" type="text/css" id="main-css" /> <meta name="dtb:uid" content="urn:uuid:#{@md.dgst[1]}" /> <!-- <meta name="epub-creator" content="#{@md.publisher}" /> --> @@ -1365,16 +1379,18 @@ output_epub_cont_seg.close WOK end def doc_title + txt=SanitizeXML.xml(@md.title.full) <<-WOK <docTitle> - <text>#{@md.title.full}</text> + <text>#{txt}</text> </docTitle> WOK end def doc_author + txt=SanitizeXML.xml(@md.author) <<-WOK <docAuthor> - <text>#{@md.author}</text> + <text>#{txt}</text> </docAuthor> WOK end @@ -1466,12 +1482,10 @@ output_epub_cont_seg.close m=(m.empty?) \ ? (surname + other_names) : (m + '; ' + surname + ', ' + other_names) - m=m.gsub(/</,'<').gsub(/>/,'>'). - gsub(/<br(?: \/)?>/,';') + m=SanitizeXML.xml(m) end x=@md.creator.author.dup - x=x.gsub(/</,'<').gsub(/>/,'>'). - gsub(/<br(?: \/)?>/,'<br />') + x=SanitizeXML.xml(x) %{\n <dc:creator opf:file-as="#{m}" opf:role="aut">#{x}</dc:creator>} else '' end @@ -1488,12 +1502,10 @@ output_epub_cont_seg.close m=(m.empty?) \ ? (surname + other_names) : (m + '; ' + surname + ', ' + other_names) - m=m.gsub(/</,'<').gsub(/>/,'>'). - gsub(/<br(?: \/)?>/,';') + m=SanitizeXML.xml(m) end x=@md.creator.editor.dup - x=x.gsub(/</,'<').gsub(/>/,'>'). - gsub(/<br(?: \/)?>/,'<br />') + x=SanitizeXML.xml(x) %{\n <dc:creator opf:file-as="#{m}" opf:role="edt">#{x}</dc:creator>} else '' end @@ -1510,12 +1522,10 @@ output_epub_cont_seg.close m=(m.empty?) \ ? (surname + other_names) : (m + '; ' + surname + ', ' + other_names) - m=m.gsub(/</,'<').gsub(/>/,'>'). - gsub(/<br(?: \/)?>/,';') + m=SanitizeXML.xml(m) end x=@md.creator.translator.dup - x=x.gsub(/</,'<').gsub(/>/,'>'). - gsub(/<br(?: \/)?>/,'<br />') + x=SanitizeXML.xml(x) %{\n <dc:creator opf:file-as="#{m}" opf:role="trl">#{x}</dc:creator>} else '' end @@ -1532,28 +1542,24 @@ output_epub_cont_seg.close m=(m.empty?) \ ? (surname + other_names) : (m + '; ' + surname + ', ' + other_names) - m=m.gsub(/</,'<').gsub(/>/,'>'). - gsub(/<br(?: \/)?>/,';') + m=SanitizeXML.xml(m) end x=@md.creator.illustrator.dup - x=x.gsub(/</,'<').gsub(/>/,'>'). - gsub(/<br(?: \/)?>/,'<br />') + x=SanitizeXML.xml(x) %{\n <dc:creator opf:file-as="#{m}" opf:role="ill">#{x}</dc:creator>} else '' end date_published=if defined? @md.date.published \ and @md.date.published =~/\S+/ x=@md.date.published.dup - x=x.gsub(/</,'<').gsub(/>/,'>'). - gsub(/<br(?: \/)?>/,'<br />') + x=SanitizeXML.xml(x) %{\n <dc:date opf:event="published">#{x}</dc:date>} else '' end subject=if defined? @md.classify.subject \ and @md.classify.subject =~/\S+/ x=@md.classify.subject.dup - x=x.gsub(/</,'<').gsub(/>/,'>'). - gsub(/<br(?: \/)?>/,'<br />') + x=SanitizeXML.xml(x) %{\n <dc:subject>#{x}</dc:subject>} else '' end @@ -1565,7 +1571,7 @@ output_epub_cont_seg.close end rights=if defined? @md.rights.all \ and @md.rights.all =~/\S+/ - rights=@md.rights.all.gsub(/<br>/,'<br />') + rights=SanitizeXML.xml(@md.rights.all) %{\n <dc:rights>#{rights}</dc:rights>} else '' end @@ -1750,9 +1756,8 @@ output_epub_cont_seg.close end def rights def all - rghts=@md.rights.all.gsub(/<br>/,'<br />') - rghts=rghts.gsub(/^\s*Copyright\s+\(C\)/,'Copyright <sup>©</sup> ') - %{<p class="small_left">Rights: #{rghts}</p>} + rights=SanitizeXML.xml(@md.rights.all) + %{<p class="small_left">Rights: #{rights}</p>} end self end @@ -2068,15 +2073,19 @@ output_epub_cont_seg.close #{@vz.table_close}} end def toc_head_copy_at + @txt=SanitizeXML.xml(@txt) %{<p class="center">#{@txt}</p>\n} end def center + @txt=SanitizeXML.xml(@txt) %{<p class="center">#{@txt}</p>\n} end def bold + @txt=SanitizeXML.xml(@txt) %{<p class="bold">#{@txt}</p>\n} end def center_bold + @txt=SanitizeXML.xml(@txt) %{<p class="centerbold">#{@txt}</p>\n} end end diff --git a/lib/sisu/v4/shared_metadata.rb b/lib/sisu/v4/shared_metadata.rb index 44c7243e..8b660208 100644 --- a/lib/sisu/v4/shared_metadata.rb +++ b/lib/sisu/v4/shared_metadata.rb @@ -725,107 +725,19 @@ module SiSU_Metadata end def char_enc(str) @s=str + def amp + if @s \ + and @s.is_a?(String) + @s=@s.gsub(/&/u,'&') + end + @s + end def utf8 if @s \ and @s.is_a?(String) @s=@s.gsub(/<br(?: \/)?>/u,Mx[:br_paragraph]). - gsub(/</um,'<'). # '<' # < - gsub(/</um,'<'). # '<' # < - gsub(/>/um,'>'). # '>' # > - gsub(/¢/um,'¢'). # '¢' # ¢ - gsub(/£/um,'£'). # '£' # £ - gsub(/¥/um,'¥'). # '¥' # ¥ - gsub(/§/um,'§'). # '§' # § - gsub(/©/um,'©'). # '©' # © - gsub(/ª/um,'ª'). # 'ª' # ª - gsub(/«/um,'«'). # '«' # « - gsub(/®/um,'®'). # '®' # ® - gsub(/°/um,'°'). # '°' # ° - gsub(/±/um,'±'). # '±' # ± - gsub(/²/um,'²'). # '²' # ² - gsub(/³/um,'³'). # '³' # ³ - gsub(/µ/um,'µ'). # 'µ' # µ - gsub(/¶/um,'¶'). # '¶' # ¶ - gsub(/¹/um,'¹'). # '¹' # ¹ - gsub(/º/um,'º'). # 'º' # º - gsub(/»/um,'»'). # '»' # » - gsub(/¼/um,'¼'). # '¼' # ¼ - gsub(/½/um,'½'). # '½' # ½ - gsub(/¾/um,'¾'). # '¾' # ¾ - gsub(/×/um,'×'). # '×' # × - gsub(/÷/um,'÷'). # '÷' # ÷ - gsub(/¿/um,'¿'). # '¿' # ¿ - gsub(/À/um,'À'). # 'À' # À - gsub(/Á/um,'Á'). # 'Á' # Á - gsub(/Â/um,'Â'). # 'Â' # Â - gsub(/Ã/um,'Ã'). # 'Ã' # Ã - gsub(/Ä/um,'Ä'). # 'Ä' # Ä - gsub(/Å/um,'Å'). # 'Å' # Å - gsub(/Æ/um,'Æ'). # 'Æ' # Æ - gsub(/Ç/um,'Ç'). # 'Ç' # Ç - gsub(/È/um,'È'). # 'È' # È - gsub(/É/um,'É'). # 'É' # É - gsub(/Ê/um,'Ê'). # 'Ê' # Ê - gsub(/Ë/um,'Ë'). # 'Ë' # Ë - gsub(/Ì/um,'Ì'). # 'Ì' # Ì - gsub(/Í/um,'Í'). # 'Í' # Í - gsub(/Î/um,'Î'). # 'Î' # Î - gsub(/Ï/um,'Ï'). # 'Ï' # Ï - gsub(/Ð/um,'Ð'). # 'Ð' # Ð - gsub(/Ñ/um,'Ñ'). # 'Ñ' # Ñ - gsub(/Ò/um,'Ò'). # 'Ò' # Ò - gsub(/Ó/um,'Ó'). # 'Ó' # Ó - gsub(/Ô/um,'Ô'). # 'Ô' # Ô - gsub(/Õ/um,'Õ'). # 'Õ' # Õ - gsub(/Ö/um,'Ö'). # 'Ö' # Ö - gsub(/Ø/um,'Ø'). # 'Ø' # Ø - gsub(/Ù/um,'Ù'). # 'Ù' # Ù - gsub(/Ú/um,'Ú'). # 'Ú' # Ú - gsub(/Û/um,'Û'). # 'Û' # Û - gsub(/Ü/um,'Ü'). # 'Ü' # Ü - gsub(/Ý/um,'Ý'). # 'Ý' # Ý - gsub(/Þ/um,'Þ'). # 'Þ' # Þ - gsub(/ß/um,'ß'). # 'ß' # ß - gsub(/à/um,'à'). # 'à' # à - gsub(/á/um,'á'). # 'á' # á - gsub(/â/um,'â'). # 'â' # â - gsub(/ã/um,'ã'). # 'ã' # ã - gsub(/ä/um,'ä'). # 'ä' # ä - gsub(/å/um,'å'). # 'å' # å - gsub(/æ/um,'æ'). # 'æ' # æ - gsub(/ç/um,'ç'). # 'ç' # ç - gsub(/è/um,'è'). # 'è' # è - gsub(/é/um,'é'). # '´' # é - gsub(/ê/um,'ê'). # 'ˆ' # ê - gsub(/ë/um,'ë'). # 'ë' # ë - gsub(/ì/um,'ì'). # 'ì' # ì - gsub(/í/um,'í'). # '´' # í - gsub(/î/um,'î'). # 'î' # î - gsub(/ï/um,'ï'). # 'ï' # ï - gsub(/ð/um,'ð'). # 'ð' # ð - gsub(/ñ/um,'ñ'). # 'ñ' # ñ - gsub(/ò/um,'ò'). # 'ò' # ò - gsub(/ó/um,'ó'). # 'ó' # ó - gsub(/ô/um,'ô'). # 'ô' # ô - gsub(/õ/um,'õ'). # 'õ' # õ - gsub(/ö/um,'ö'). # 'ö' # ö - gsub(/ø/um,'ø'). # 'ø' # ø - gsub(/ù/um,'ú'). # 'ù' # ú - gsub(/ú/um,'û'). # 'ú' # û - gsub(/û/um,'ü'). # 'û' # ü - gsub(/ü/um,'ý'). # 'ü' # ý - gsub(/þ/um,'þ'). # 'þ' # þ - gsub(/ÿ/um,'ÿ'). # 'ÿ' # ÿ - gsub(/‘/um,'‘'). # '‘' # ‘ - gsub(/’/um,'’'). # '’' # ’ - gsub(/“/um,'“'). # “ # “ - gsub(/”/um,'”'). # ” # ” - gsub(/–/um,'–'). # – # – - gsub(/—/um,'—'). # — # — - gsub(/∝/um,'∝'). # ∝ # ∝ - gsub(/∞/um,'∞'). # ∞ # ∞ - gsub(/™/um,'™'). # ™ # ™ - gsub(/✠/um,'✠'). # ✗ # ✠ + gsub(/</um,'<').gsub(/>/um,'>'). + #gsub(/</um,'<').gsub(/>/um,'>'). gsub(/ /um,' '). # space identify gsub(/ /um,' '). # space identify gsub(/#{Mx[:br_paragraph]}/u,'<br />') @@ -888,7 +800,8 @@ WOK end def xhtml_display def meta_para - inf_xml=char_enc(@inf).utf8 + inf_xml=char_enc(@inf).amp + inf_xml=char_enc(inf_xml).utf8 %{<p class="norm"> <b>#{@tag}</b>: #{inf_xml} </p>} |