From bbd1100d9674b4d4b57e62999414beaf339ff4a5 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 22 Mar 2008 17:05:50 +0000 Subject: tex to pdf, xetex (utf8) added as alternative to pdftex --- CHANGELOG | 12 ++ lib/sisu/v0/sysenv.rb | 48 +++-- lib/sisu/v0/texpdf_format.rb | 472 ++++++++++++++++++++++++++++++------------- 3 files changed, 368 insertions(+), 164 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 8d4a255a..cf2ab222 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -9,6 +9,18 @@ Reverse Chronological: %% STABLE MANIFEST +%% sisu_0.66.1.orig.tar.gz (2008-03-22:11/6) +http://www.jus.uio.no/sisu/pkg/src/sisu_0.66.1.orig.tar.gz + sisu_0.66.1.orig.tar.gz + sisu_0.66.1-1.dsc + sisu_0.66.1-1.diff.gz + + * tex to pdf, xetex (utf8) added as alternative to pdftex + [for now special character processing is separate, consider merging common + parts, that is, most of it] + + * debian [add] texlive-xetex + %% sisu_0.66.0.orig.tar.gz (2008-02-24:07/7) http://www.jus.uio.no/sisu/pkg/src/sisu_0.66.0.orig.tar.gz b45d81d949590a9b24924589bc98032b 1492653 sisu_0.66.0.orig.tar.gz diff --git a/lib/sisu/v0/sysenv.rb b/lib/sisu/v0/sysenv.rb index 9cf14507..816c72b7 100644 --- a/lib/sisu/v0/sysenv.rb +++ b/lib/sisu/v0/sysenv.rb @@ -1,4 +1,4 @@ -# coding: utf-8 +# coding: utf-6 =begin * Name: SiSU @@ -647,30 +647,36 @@ module SiSU_Env else puts "\tWARN: #{program} is not installed #{program_ref}" end end - def latex2pdf #convert from latex to pdf - prog=[] - prog=['pdflatex','pdfetex','pdftex'] - program_ref="\n\t\tSee http://www.tug.org/applications/pdftex/\n\t\tOn Debian this is is included in tetex-extra" + def tex2pdf_engine + prog=['xetex','xelatex','pdflatex','pdfetex','pdftex'] @pdfetex_flag=false @cmd ||='' - tell=if @cmd =~/[MVv]/; '' - else '> /dev/null' - end - mode='batchmode' - #mode='nonstopmode' + @texpdf=nil prog.each do |program| if program_found?(program) - case program - when /pdflatex/; system("#{program} -interaction=#{mode} #@input #{tell}\n") - when /pdfetex/; system("#{program} -interaction=#{mode} -fmt=pdflatex #@input #{tell}\n") # debian specific paramters ? - #system("#{program} -interaction=batchmode -progname=pdflatex #@input\n") - when /pdftex/; system("#{program} -interaction=#{mode} -fmt=pdflatex #@input #{tell}\n") - end + @texpdf=program if program =~/xetex|xelatex|pdftex|pdflatex/ @pdfetex_flag=true break end - unless @pdfetex_flag; puts "\tWARN: none of the following programs are installed: #{program[0]}, #{program[1]}, #{program[2]} is installed. #{program_ref}" + end + @texpdf + end + def latex2pdf #convert from latex to pdf + tell=if @cmd =~/[MVv]/; '' + else '> /dev/null' + end + mode='batchmode' + #mode='nonstopmode' + program_ref="\n\t\tSee http://www.tug.org/applications/pdftex/\n\t\tOn Debian this is is included in tetex-extra" + texpdf=tex2pdf_engine + if @pdfetex_flag; + texpdf_cmd=case texpdf + when /xetex/; "#{texpdf} -interaction=#{mode} -fmt=xelatex #@input #{tell}\n" + when /pdftex/; "#{texpdf} -interaction=#{mode} -fmt=pdflatex #@input #{tell}\n" + when /xelatex|pdflatex/; "#{texpdf} -interaction=#{mode} #@input #{tell}\n" end + system(texpdf_cmd) + else puts "\tWARN: none of the following programs are installed: #{program[0]}, #{program[1]}, #{program[2]} is installed. #{program_ref}" end end def makeinfo #texinfo @@ -2558,11 +2564,11 @@ WOK end def images unless FileTest.directory?("#{@env.path.output}/_sisu") - mkdir_p("#{@env.path.output}/_sisu") + mkdir_p("#{@env.path.output}/_sisu") end unless File.exist?("#{@env.path.output}/_sisu/image_sys") \ or File.symlink?("#{@env.path.output}/_sisu/image_sys") - File.symlink("../../_sisu/image_sys", "#{@env.path.output}/_sisu/image_sys") + File.symlink("../../_sisu/image_sys", "#{@env.path.output}/_sisu/image_sys") end end def man_forms @@ -2657,7 +2663,7 @@ WOK def dbi if psql.host =~/(?:\S{1,3}\.){3}\S{1,3}|\S+?\.\S+/ "DBI:Pg:database=#{psql.db};host=#{psql.host};port=#{psql.port}" - else "DBI:Pg:database=#{psql.db};port=#{psql.port}" + else "DBI:Pg:database=#{psql.db};port=#{psql.port}" end end self @@ -3138,7 +3144,7 @@ fns_array=unless fns =~/\.ssm.sst$/ IO.readlines(fns,'') else IO.readlines(fns,'r:utf-8') end -else +else if RUBY_VERSION < '1.9' IO.readlines("#{path.composite_file}/#{fns}",'') else IO.readlines("#{path.composite_file}/#{fns}",'r:utf-8') diff --git a/lib/sisu/v0/texpdf_format.rb b/lib/sisu/v0/texpdf_format.rb index 03bdd184..9e7fccde 100644 --- a/lib/sisu/v0/texpdf_format.rb +++ b/lib/sisu/v0/texpdf_format.rb @@ -284,6 +284,7 @@ WOK @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern @tx=SiSU_Env::Get_init.instance.tex @url_brace=SiSU_Viz::Skin.new.url_decoration + @tex2pdf=@@tex3pdf ||=SiSU_Env::System_call.new.tex2pdf_engine end def longtable_landscape @end_table='\end{longtable}' @@ -432,14 +433,14 @@ WOK end @string end - def special_characters_1(para) # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list + def pdftex_special_characters_1(string) # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list #p @@utf_8.list #@string=Iconv.conv('ISO-8859-1', 'UTF-8', @string) - word=@string.scan(/\S+|\n/) #unless line =~/^(?:0~\S|%+\s)/ + word=string.scan(/\S+|\n/) #unless line =~/^(?:0~\S|%+\s)/ para_array=[] - if word + string=if word word.each do |w| # _ - / # | : ! ^ ~ - unless para =~/^(?:0~|%+ |') unless w=~/^[1-6]~|~\{|\}~|~\[|\]~|^\^~\s|~\^|\*~\S+|~#|\{t~|<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ w.gsub!(/&#(?:126|152);/,'<=tilde>') #126 usual #w.gsub!(/&#(?:126|152);/,'<=tilde>') unless w=~/https?:\/\/\S+/ #126 usual @@ -447,162 +448,334 @@ WOK end para_array << w end - para=para_array.join(' ') - @string=para.strip + string=para_array.join(' ') + string=string.strip + string + else '' end - @string.gsub!(/<~\d+;(?:\w|[0-6]:)\d+;[umdv]\d+><#@dp:#@dp>/,'') - @string.gsub!(/.+?<-#>/,'') - @string.gsub!(//,'') - @string.gsub!(//,'') + string.gsub!(/<~\d+;(?:\w|[0-6]:)\d+;[umdv]\d+><#@dp:#@dp>/,'') + string.gsub!(/.+?<-#>/,'') + string.gsub!(//,'') + string.gsub!(//,'') #problem sequence -> - @string.gsub!(/&(?:nbsp);/,'<=hardspace>') # < SiSU special character also LaTeX - @string.gsub!(/&(?:lt|#060);/,'<=lt>') # < SiSU special character also LaTeX - @string.gsub!(/&(?:gt|#062);/,'<=gt>') # > SiSU special character also LaTeX - @string.gsub!(/{/,'<=curlyopen>') # { SiSU special character also LaTeX - @string.gsub!(/}/,'<=curlyclose>') # } SiSU special character also LaTeX - @string.gsub!(/&#(?:126|152);/,'<=tilde>') # ~ SiSU special character also LaTeX - @string.gsub!(/#/,'\#') # # SiSU special character also LaTeX - @string.gsub!(/!/,'!') # ! SiSU not really special sisu character but done, also LaTeX - @string.gsub!(/*/,'*') # * should you wish to escape astrisk e.g. describing \*{bold}* - @string.gsub!(/-/,'-') # - SiSU special character also LaTeX - @string.gsub!(/+/,'+') # + SiSU special character also LaTeX - @string.gsub!(/,/,',') # + SiSU special character also LaTeX - @string.gsub!(/&/,'<=amp>') #unless @string=~/<:code>/ # / SiSU special character also LaTeX - @string.gsub!(///,'<=slash>') # / SiSU special character also LaTeX - @string.gsub!(/\/,'<=backslash>') # \ SiSU special character also LaTeX - @string.gsub!(/_/,'<=underscore>') # _ SiSU special character also LaTeX - @string.gsub!(/|/,'|') # | SiSU not really special sisu character but done, also LaTeX - @string.gsub!(/:/,':') # : SiSU not really special sisu character but done, also LaTeX - @string.gsub!(/^|\^/,'<=caret>') # ^ SiSU not really special sisu character but done, also LaTeX - @string.gsub!(/\#/,'<=hash>') + string.gsub!(/&(?:nbsp);/,'<=hardspace>') # < SiSU special character also LaTeX + string.gsub!(/&(?:lt|#060);/,'<=lt>') # < SiSU special character also LaTeX + string.gsub!(/&(?:gt|#062);/,'<=gt>') # > SiSU special character also LaTeX + string.gsub!(/{/,'<=curlyopen>') # { SiSU special character also LaTeX + string.gsub!(/}/,'<=curlyclose>') # } SiSU special character also LaTeX + string.gsub!(/&#(?:126|152);/,'<=tilde>') # ~ SiSU special character also LaTeX + string.gsub!(/#/,'\#') # # SiSU special character also LaTeX + string.gsub!(/!/,'!') # ! SiSU not really special sisu character but done, also LaTeX + string.gsub!(/*/,'*') # * should you wish to escape astrisk e.g. describing \*{bold}* + string.gsub!(/-/,'-') # - SiSU special character also LaTeX + string.gsub!(/+/,'+') # + SiSU special character also LaTeX + string.gsub!(/,/,',') # + SiSU special character also LaTeX + string.gsub!(/&/,'<=amp>') #unless @string=~/<:code>/ # / SiSU special character also LaTeX + string.gsub!(///,'<=slash>') # / SiSU special character also LaTeX + string.gsub!(/\/,'<=backslash>') # \ SiSU special character also LaTeX + string.gsub!(/_/,'<=underscore>') # _ SiSU special character also LaTeX + string.gsub!(/|/,'|') # | SiSU not really special sisu character but done, also LaTeX + string.gsub!(/:/,':') # : SiSU not really special sisu character but done, also LaTeX + string.gsub!(/^|\^/,'<=caret>') # ^ SiSU not really special sisu character but done, also LaTeX + string.gsub!(/\#/,'<=hash>') ##watch placement, problem sequence ^ - @string.gsub!(/&atild;<\/font><\/sup>/,' ') - @string.gsub!(/<:pb>/,'\newpage') - @string.gsub!(/<:pn>/,'\clearpage') - @string.gsub!(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript - end - def special_characters_2(para) - @string.gsub!(/œ/,'\oe ') - @string.gsub!(/\$/,'\$') - @string.gsub!(/\#/,'\#') - @string.gsub!(/\%/,'\%') - @string.gsub!(/\~/,'\~') #revist, should not be necessary to mark remaining tildes - if @string !~/^\s*<:image|\}:image\s/ - @string.gsub!(/_/,'\_') + string.gsub!(/&atild;<\/font><\/sup>/,' ') + string.gsub!(/<:pb>/,'\newpage') + string.gsub!(/<:pn>/,'\clearpage') + string.gsub!(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript + string + end + def pdftex_special_characters_2(string) + string.gsub!(/œ/,'\oe ') + string.gsub!(/\$/,'\$') + string.gsub!(/\#/,'\#') + string.gsub!(/\%/,'\%') + string.gsub!(/\~/,'\~') #revist, should not be necessary to mark remaining tildes + if string !~/^\s*<:image|\}:image\s/ + string.gsub!(/_/,'\_') end - @string.gsub!(/\{/,'\{') - @string.gsub!(/\}/,'\}') - @string.gsub!(/ /,'~') # ~ character for hardspace + string.gsub!(/\{/,'\{') + string.gsub!(/\}/,'\}') + string.gsub!(/ /,'~') # ~ character for hardspace # sequence important must appear after removal of { and } - @string.gsub!(/&\S+?;/,'') #hmmm + string.gsub!(/&\S+?;/,'') #hmmm # sequence imortant place before removal of & - if @string=~/<:code>/; @@flag_code=true - elsif @string=~/<:code-end>/; @@flag_code=false + if string=~/<:code>/; @@flag_code=true + elsif string=~/<:code-end>/; @@flag_code=false end - if @@flag_code; @string.gsub!(/&/,'{\\\&}') - else @string.gsub!(/(\s+&\s+)/,' and ') + if @@flag_code; string.gsub!(/&/,'{\\\&}') + else string.gsub!(/(\s+&\s+)/,' and ') end - @string.gsub!(/§/u,'\S') #latex: space between next character not preserved? #@string.gsub!(/§ /,'\S ') - @string.gsub!(/£/u,'\pounds') - @string.gsub!(/&\S+?;/,' ') - @string.gsub!(//,' ') - @string.gsub!(/<\/a>/,' ') - @string.gsub!(/[^\}>_]((?:https?|file|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case - @string.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url - @string.gsub!(/\B(?:\\_|\\)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration + string.gsub!(/§/u,'\S') #latex: space between next character not preserved? #string.gsub!(/§ /,'\S ') + string.gsub!(/£/u,'\pounds') + string.gsub!(/&\S+?;/,' ') + string.gsub!(//,' ') + string.gsub!(/<\/a>/,' ') + string.gsub!(/[^\}>_]((?:https?|file|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case + string.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url + string.gsub!(/\B(?:\\_|\\)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration unless @@flag_code - @string.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration positive lookahead, sequence issue with { linked }http://url cannot use \b at start + string.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration positive lookahead, sequence issue with { linked }http://url cannot use \b at start else #code-block: angle brackets special characters, note _ already escaped - @string.gsub!(/\\_/,'{\UseTextSymbol{OML}{>}}') + string.gsub!(/\\_/,'{\UseTextSymbol{OML}{>}}') end - @string.gsub!(/<:ee>/,'') - @string.gsub!(//,' ') + string.gsub!(/<:ee>/,'') + string.gsub!(//,' ') #proposed change, insert, but may be redundant - @string.gsub!(/ \/><:i[12]>(.+?)(?:\}~||<\/\s*(br|p)>|<(br|p)\s*\/>/," #{@@tex_backslash*2} ") # Work Area - @string.gsub!(/(.+?)<\/b>/,'\begin{bfseries}\1 \end{bfseries}') - @string.gsub!(/(.+?)<\/em>/,'\begin{bfseries}\1 \end{bfseries}') - @string.gsub!(/<(bold|strong)>(.+?)<\/(bold|strong)>/,'\begin{bfseries}\1 \end{bfseries}') - @string.gsub!(/(.+?)<\/h\d+>/,'\begin{bfseries}\1 \end{bfseries}') - @string.gsub!(/(.+?)<\/i>/,'\emph{\1}') - @string.gsub!(/(.+?)<\/italic>/,'\emph{\1}') - @string.gsub!(/(.+?)<\/u>/,'\uline{\1}') # ulem - @string.gsub!(/(.+?)<\/cite>/,"``\\1''") # quote - @string.gsub!(/(.+?)<\/ins>/,'\uline{\1}') # ulem - @string.gsub!(/(.+?)<\/del>/,'\sout{\1}') # ulem - @string.gsub!(/(.+?)<\/sub>/,"\$_{\\textrm{\\1}}\$") - @string.gsub!(/(.+?)<\/sup>/,"\$^{\\textrm{\\1}}\$") + string.gsub!(/ \/><:i[12]>(.+?)(?:\}~||<\/\s*(br|p)>|<(br|p)\s*\/>/," #{@@tex_backslash*2} ") # Work Area + string.gsub!(/(.+?)<\/b>/,'\begin{bfseries}\1 \end{bfseries}') + string.gsub!(/(.+?)<\/em>/,'\begin{bfseries}\1 \end{bfseries}') + string.gsub!(/<(bold|strong)>(.+?)<\/(bold|strong)>/,'\begin{bfseries}\1 \end{bfseries}') + string.gsub!(/(.+?)<\/h\d+>/,'\begin{bfseries}\1 \end{bfseries}') + string.gsub!(/(.+?)<\/i>/,'\emph{\1}') + string.gsub!(/(.+?)<\/italic>/,'\emph{\1}') + string.gsub!(/(.+?)<\/u>/,'\uline{\1}') # ulem + string.gsub!(/(.+?)<\/cite>/,"``\\1''") # quote + string.gsub!(/(.+?)<\/ins>/,'\uline{\1}') # ulem + string.gsub!(/(.+?)<\/del>/,'\sout{\1}') # ulem + string.gsub!(/(.+?)<\/sub>/,"\$_{\\textrm{\\1}}\$") + string.gsub!(/(.+?)<\/sup>/,"\$^{\\textrm{\\1}}\$") unless @@flag_code - @string.gsub!(/"(.+?)"/,"``\\1''") # quote marks / quotations open & close " need condition exclude for code - @string.gsub!(/\s+"/,' ``') # open " - @string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*"/,'\1``') # open " - @string.gsub!(/"(\s|\.|,|:|;)/,"''\\1") # close " - @string.gsub!(/"([1-6-]#{@@tilde}\S*|<.+?>)?\s*$/,"''\\1") # close " - @string.gsub!(/"(\.|,)/,"''") # close " - @string.gsub!(/\s+'/,' `') # open ' - @string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*'/,'\1`') # open ' + string.gsub!(/"(.+?)"/,'“\1”') # quote marks / quotations open & close " need condition exclude for code + string.gsub!(/\s+"/,' “') # open " + string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*"/,'\1“') # open " + string.gsub!(/"(\s|\.|,|:|;)/,'”\1') # close " + string.gsub!(/"([1-6-]#{@@tilde}\S*|<.+?>)?\s*$/,'”\1') # close " + string.gsub!(/"(\.|,)/,'”') # close " + string.gsub!(/\s+'/,' `') # open ' + string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*'/,'\1`') # open ' end - @string.gsub!(/^(<:i[1-9]>)?\s*\\_\*\s*/,'\1 \begin{math} \bullet \end{math}~~') #bullets - added 2004w17 watch \\_ - @string.gsub!(/(|<\/font>)/,'') - @string.gsub!(/\s*(\S+?)<\/sup>/,'^\1') - @string.gsub!(/(|<\/sup>)/,'') - @string + string.gsub!(/^(<:i[1-9]>)?\s*\\_\*\s*/,'\1 \begin{math} \bullet \end{math}~~') #bullets - added 2004w17 watch \\_ + string.gsub!(/(|<\/font>)/,'') + string.gsub!(/\s*(\S+?)<\/sup>/,'^\1') + string.gsub!(/(|<\/sup>)/,'') + string + end + def pdftex_special_characters_3(string) + string.gsub!(/])/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder + string.gsub!(/([^<][^b][^r]\s+)\/>/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder + #problem sequence (another kludge) -> + string.gsub!(/<=lt>/,'{\UseTextSymbol{OML}{<}}') + string.gsub!(/<=gt>/,'{\UseTextSymbol{OML}{>}}') + #string.gsub!(/<=lt>/,'\<') + #string.gsub!(/<=gt>/,'\>') + string.gsub!(/<=underscore>/,'\_') + string.gsub!(/(\href\{http:\/\/\S+?)(?:(?:<=tilde>)(\S+))+\}/,'\1\~\2}') #tildes in urls \href treated differently from text + string.gsub!(/<=tilde>/,'{\~~}') + string.gsub!(/<=pipe>/,'{\textbar}') + string.gsub!(/<=caret>/,'{\^{~}}') + #string.gsub!(/<=caret>/,'\^{}') + string.gsub!(/<=exclaim>/,'\Verbatim{!}') + string.gsub!(/<=hash>/,'{\#}') + #string.gsub!(/<=hash>/,'{\UseTextSymbol{OT1}{#}}') + #string.gsub!(/<=slash>/,'{\slash}') + string.gsub!(/<=hardspace>/,'{~}') #changed ... 2005 + string.gsub!(/<=amp>/,'{\\\&}') #changed ... 2005 + #string.gsub!(/<=amp>/,'{\UseTextSymbol{OT1}{&}}') + string.gsub!(/<=slash>/,'{/}') + string.gsub!(/<=backslash>/,'{\textbackslash}') + #string.gsub!(/<=asterisk>/,'*') + #string.gsub!(/<=exclaim>/,'!') + #string.gsub!(/<=asterisk>/,'{\ast}') + #string.gsub!(/<=copymark>/,"^{\\copyright} ") # watch has been problematic + #copymark='{\\begin{small}\\raisebox{1ex}{\\copyright}\\end{small}} ' + string.gsub!(/<=copymark>\s*(.+)?\s+(<\\~\d+;\w(?:[0-6]:)?\d+;\w\d+><#@dp:#@dp>)/,"^\\copyright \\textnormal{\\1} \\2") # watch likely to be problematic + string end - def special_characters_3(para) - @string.gsub!(/])/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder - @string.gsub!(/([^<][^b][^r]\s+)\/>/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder + def xetex_special_characters_1(string) # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list + #p @@utf_8.list + #string=Iconv.conv('ISO-8859-1', 'UTF-8', @string) + word=string.scan(/\S+|\n/) #unless line =~/^(?:0~\S|%+\s)/ + para_array=[] + string=if word + word.each do |w| # _ - / # | : ! ^ ~ + unless string =~/^(?:0~|%+ |') unless w=~/^[1-6]~|~\{|\}~|~\[|\]~|^\^~\s|~\^|\*~\S+|~#|\{t~|<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ + w.gsub!(/&#(?:126|152);/,'<=tilde>') #126 usual + #w.gsub!(/&#(?:126|152);/,'<=tilde>') unless w=~/https?:\/\/\S+/ #126 usual + w.gsub!(/\\?\|||/,'<=pipe>') #unless w=~/<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ # | SiSU not really special sisu character but done, also LaTeX + end + para_array << w + end + string=para_array.join(' ') + string=string.strip + string + else '' + end + string.gsub!(/<~\d+;(?:\w|[0-6]:)\d+;[umdv]\d+><#@dp:#@dp>/,'') + string.gsub!(/.+?<-#>/,'') + string.gsub!(//,'') + string.gsub!(//,'') + #problem sequence -> + string.gsub!(/&(?:nbsp);/,'<=hardspace>') # < SiSU special character also LaTeX + string.gsub!(/&(?:lt|#060);/,'<=lt>') # < SiSU special character also LaTeX + string.gsub!(/&(?:gt|#062);/,'<=gt>') # > SiSU special character also LaTeX + string.gsub!(/{/,'<=curlyopen>') # { SiSU special character also LaTeX + string.gsub!(/}/,'<=curlyclose>') # } SiSU special character also LaTeX + string.gsub!(/&#(?:126|152);/,'<=tilde>') # ~ SiSU special character also LaTeX + string.gsub!(/#/,'\#') # # SiSU special character also LaTeX + string.gsub!(/!/,'!') # ! SiSU not really special sisu character but done, also LaTeX + string.gsub!(/*/,'*') # * should you wish to escape astrisk e.g. describing \*{bold}* + string.gsub!(/-/,'-') # - SiSU special character also LaTeX + string.gsub!(/+/,'+') # + SiSU special character also LaTeX + string.gsub!(/,/,',') # + SiSU special character also LaTeX + string.gsub!(/&/,'<=amp>') #unless @string=~/<:code>/ # / SiSU special character also LaTeX + string.gsub!(///,'<=slash>') # / SiSU special character also LaTeX + string.gsub!(/\/,'<=backslash>') # \ SiSU special character also LaTeX + string.gsub!(/_/,'<=underscore>') # _ SiSU special character also LaTeX + string.gsub!(/|/,'|') # | SiSU not really special sisu character but done, also LaTeX + string.gsub!(/:/,':') # : SiSU not really special sisu character but done, also LaTeX + string.gsub!(/^|\^/,'<=caret>') # ^ SiSU not really special sisu character but done, also LaTeX + string.gsub!(/\#/,'<=hash>') + ##watch placement, problem sequence ^ + string.gsub!(/&atild;<\/font><\/sup>/,' ') + string.gsub!(/<:pb>/,'\newpage') + string.gsub!(/<:pn>/,'\clearpage') + string.gsub!(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript + string + end + def xetex_special_characters_2(string) + string.gsub!(/œ/,'\oe ') + string.gsub!(/\$/,'\$') + string.gsub!(/\#/,'\#') + string.gsub!(/\%/,'\%') + string.gsub!(/\~/,'\~') #revist, should not be necessary to mark remaining tildes + if string !~/^\s*<:image|\}:image\s/ + string.gsub!(/_/,'\_') + end + string.gsub!(/\{/,'\{') + string.gsub!(/\}/,'\}') + string.gsub!(/ /,'~') # ~ character for hardspace + # sequence important must appear after removal of { and } + string.gsub!(/&\S+?;/,'') #hmmm + # sequence imortant place before removal of & + if string=~/<:code>/; @@flag_code=true + elsif string=~/<:code-end>/; @@flag_code=false + end + if @@flag_code; string.gsub!(/&/,'{\\\&}') + else string.gsub!(/(\s+&\s+)/,' and ') + end + string.gsub!(/§/u,'\S') #latex: space between next character not preserved? #string.gsub!(/§ /,'\S ') + string.gsub!(/£/u,'\pounds') + string.gsub!(/&\S+?;/,' ') + string.gsub!(//,' ') + string.gsub!(/<\/a>/,' ') + string.gsub!(/[^\}>_]((?:https?|file|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case + string.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url + string.gsub!(/\B(?:\\_|\\)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration + unless @@flag_code + string.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration positive lookahead, sequence issue with { linked }http://url cannot use \b at start + else #code-block: angle brackets special characters, note _ already escaped + string.gsub!(/\\_/,'{\UseTextSymbol{OML}{>}}') + end + string.gsub!(/<:ee>/,'') + string.gsub!(//,' ') + #proposed change, insert, but may be redundant + string.gsub!(/ \/><:i[12]>(.+?)(?:\}~||<\/\s*(br|p)>|<(br|p)\s*\/>/," #{@@tex_backslash*2} ") # Work Area + string.gsub!(/(.+?)<\/b>/,'\begin{bfseries}\1 \end{bfseries}') + string.gsub!(/(.+?)<\/em>/,'\begin{bfseries}\1 \end{bfseries}') + string.gsub!(/<(bold|strong)>(.+?)<\/(bold|strong)>/,'\begin{bfseries}\1 \end{bfseries}') + string.gsub!(/(.+?)<\/h\d+>/,'\begin{bfseries}\1 \end{bfseries}') + string.gsub!(/(.+?)<\/i>/,'\emph{\1}') + string.gsub!(/(.+?)<\/italic>/,'\emph{\1}') + string.gsub!(/(.+?)<\/u>/,'\uline{\1}') # ulem + string.gsub!(/(.+?)<\/cite>/,"``\\1''") # quote + string.gsub!(/(.+?)<\/ins>/,'\uline{\1}') # ulem + string.gsub!(/(.+?)<\/del>/,'\sout{\1}') # ulem + string.gsub!(/(.+?)<\/sub>/,"\$_{\\textrm{\\1}}\$") + string.gsub!(/(.+?)<\/sup>/,"\$^{\\textrm{\\1}}\$") + unless @@flag_code + string.gsub!(/"(.+?)"/,'“\1”') # quote marks / quotations open & close " need condition exclude for code + string.gsub!(/\s+"/,' “') # open " + string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*"/,'\1“') # open " + string.gsub!(/"(\s|\.|,|:|;)/,'”\1') # close " + string.gsub!(/"([1-6-]#{@@tilde}\S*|<.+?>)?\s*$/,'”\1') # close " + string.gsub!(/"(\.|,)/,'”') # close " + string.gsub!(/\s+'/,' `') # open ' + string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*'/,'\1`') # open ' + end + #string.gsub!(/^(<:i[1-9]>)?\s*\\_\*\s*/,'\1 \begin{math} \bullet \end{math}~~') #bullets - added 2004w17 watch \\_ + string.gsub!(/^(<:i[1-9]>)?\s*\\_\*\s*/,'\1 ● ~~') + string.gsub!(/(|<\/font>)/,'') + string.gsub!(/\s*(\S+?)<\/sup>/,'^\1') + string.gsub!(/(|<\/sup>)/,'') + string + end + def xetex_special_characters_3(string) + string.gsub!(/])/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder + string.gsub!(/([^<][^b][^r]\s+)\/>/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder #problem sequence (another kludge) -> - @string.gsub!(/<=lt>/,'{\UseTextSymbol{OML}{<}}') - @string.gsub!(/<=gt>/,'{\UseTextSymbol{OML}{>}}') - #@string.gsub!(/<=lt>/,'\<') - #@string.gsub!(/<=gt>/,'\>') - @string.gsub!(/<=underscore>/,'\_') - @string.gsub!(/(\href\{http:\/\/\S+?)(?:(?:<=tilde>)(\S+))+\}/,'\1\~\2}') #tildes in urls \href treated differently from text - @string.gsub!(/<=tilde>/,'{\~~}') - @string.gsub!(/<=pipe>/,'{\textbar}') - @string.gsub!(/<=caret>/,'{\^{~}}') - #@string.gsub!(/<=caret>/,'\^{}') - @string.gsub!(/<=exclaim>/,'\Verbatim{!}') - @string.gsub!(/<=hash>/,'{\#}') - #@string.gsub!(/<=hash>/,'{\UseTextSymbol{OT1}{#}}') - #@string.gsub!(/<=slash>/,'{\slash}') - @string.gsub!(/<=hardspace>/,'{~}') #changed ... 2005 - @string.gsub!(/<=amp>/,'{\\\&}') #changed ... 2005 - #@string.gsub!(/<=amp>/,'{\UseTextSymbol{OT1}{&}}') - @string.gsub!(/<=slash>/,'{/}') - @string.gsub!(/<=backslash>/,'{\textbackslash}') - #@string.gsub!(/<=asterisk>/,'*') - #@string.gsub!(/<=exclaim>/,'!') - #@string.gsub!(/<=asterisk>/,'{\ast}') - #@string.gsub!(/<=copymark>/,"^{\\copyright} ") # watch has been problematic + string.gsub!(/<=lt>/,'{\UseTextSymbol{OML}{<}}') + string.gsub!(/<=gt>/,'{\UseTextSymbol{OML}{>}}') + #string.gsub!(/<=lt>/,'\<') + #string.gsub!(/<=gt>/,'\>') + string.gsub!(/<=underscore>/,'\_') + string.gsub!(/(\href\{http:\/\/\S+?)(?:(?:<=tilde>)(\S+))+\}/,'\1\~\2}') #tildes in urls \href treated differently from text + string.gsub!(/<=tilde>/,'{\~~}') + string.gsub!(/<=pipe>/,'{\textbar}') + string.gsub!(/<=caret>/,'{\^{~}}') + #string.gsub!(/<=caret>/,'\^{}') + string.gsub!(/<=exclaim>/,'\Verbatim{!}') + string.gsub!(/<=hash>/,'{\#}') + #string.gsub!(/<=hash>/,'{\UseTextSymbol{OT1}{#}}') + #string.gsub!(/<=slash>/,'{\slash}') + string.gsub!(/<=hardspace>/,'{~}') #changed ... 2005 + string.gsub!(/<=amp>/,'{\\\&}') #changed ... 2005 + #string.gsub!(/<=amp>/,'{\UseTextSymbol{OT1}{&}}') + string.gsub!(/<=slash>/,'{/}') + string.gsub!(/<=backslash>/,'{\textbackslash}') + #string.gsub!(/<=asterisk>/,'*') + #string.gsub!(/<=exclaim>/,'!') + #string.gsub!(/<=asterisk>/,'{\ast}') + #string.gsub!(/<=copymark>/,"^{\\copyright} ") # watch has been problematic #copymark='{\\begin{small}\\raisebox{1ex}{\\copyright}\\end{small}} ' - @string.gsub!(/<=copymark>\s*(.+)?\s+(<\\~\d+;\w(?:[0-6]:)?\d+;\w\d+><#@dp:#@dp>)/,"^\\copyright \\textnormal{\\1} \\2") # watch likely to be problematic - @string + string.gsub!(/<=copymark>\s*(.+)?\s+(<\\~\d+;\w(?:[0-6]:)?\d+;\w\d+><#@dp:#@dp>)/,"^\\copyright \\textnormal{\\1} \\2") # watch likely to be problematic + string end - def special_characters_curly(para) - @string.gsub!(/<=curlyopen>/,'\{') - @string.gsub!(/<=curlyclose>/,'\}') - @string + def special_characters_curly(string) + string.gsub!(/<=curlyopen>/,'\{') + string.gsub!(/<=curlyclose>/,'\}') + string end - def special_characters_unsafe_1(para) #depreciated, make obsolete + + + def special_characters_unsafe_1(string) #depreciated, make obsolete # some substitutions are sequence sensitive, rearrange with care. - @string.gsub!(/\\backslash (copyright|clearpage|newpage)/,"\\\\\\1") #kludge bad solution, find out where tail is sent through specChar ! - end - def special_characters_unsafe_2(para) - end - def special_characters_unsafe_3(para) + string.gsub!(/\\backslash (copyright|clearpage|newpage)/,"\\\\\\1") #kludge bad solution, find out where tail is sent through specChar ! + string end def special_characters #special characters - some substitutions are sequence sensitive, rearrange with care. - special_characters_1(@string) - special_characters_unsafe_1(@string) - special_characters_2(@string) - special_characters_3(@string) + string=@string + case @tex2pdf + when /pdf/ + string=pdftex_special_characters_1(string) unless string.nil? + string=special_characters_unsafe_1(string) unless string.nil? #pdftex_special_characters_unsafe_1(@string) + string=pdftex_special_characters_2(string) unless string.nil? + string=pdftex_special_characters_3(string) unless string.nil? + when /xe/ + string=xetex_special_characters_1(string) unless string.nil? + string=special_characters_unsafe_1(string) unless string.nil? #xetex_special_characters_unsafe_1(@string) + string=xetex_special_characters_2(string) unless string.nil? #issues with xetex + string=xetex_special_characters_3(string) unless string.nil? + end + @string=string end def special_characters_safe #special characters - some substitutions are sequence sensitive, rearrange with care. - special_characters_1(@string) - special_characters_2(@string) - #special_characters_3(@string) + string=@string + case @tex2pdf + when /pdf/ + string=pdftex_special_characters_1(@string) unless string.nil? + string=pdftex_special_characters_2(@string) unless string.nil? + #special_characters_3(@string) + when /xe/ + string=xetex_special_characters_1(@string) unless string.nil? + string=xetex_special_characters_2(@string) unless string.nil? # remove this to start with, causes issues + end + @string=string end def heading_major(para,lev) title=@md.title @@ -947,17 +1120,27 @@ WOK end end def tex_head_encode - case @md.file_encoding - when /iso-?8859/i #% iso8859 - <