aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2008-03-22 17:05:50 +0000
committerRalph Amissah <ralph@amissah.com>2008-03-22 17:05:50 +0000
commitbbd1100d9674b4d4b57e62999414beaf339ff4a5 (patch)
tree14e57887d095272b2d77a474ae78af6c91952430
parenttrack a couple of questions (diff)
tex to pdf, xetex (utf8) added as alternative to pdftex
-rw-r--r--CHANGELOG12
-rw-r--r--lib/sisu/v0/sysenv.rb48
-rw-r--r--lib/sisu/v0/texpdf_format.rb472
3 files changed, 368 insertions, 164 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 8d4a255a..cf2ab222 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -9,6 +9,18 @@ Reverse Chronological:
%% STABLE MANIFEST
+%% sisu_0.66.1.orig.tar.gz (2008-03-22:11/6)
+http://www.jus.uio.no/sisu/pkg/src/sisu_0.66.1.orig.tar.gz
+ sisu_0.66.1.orig.tar.gz
+ sisu_0.66.1-1.dsc
+ sisu_0.66.1-1.diff.gz
+
+ * tex to pdf, xetex (utf8) added as alternative to pdftex
+ [for now special character processing is separate, consider merging common
+ parts, that is, most of it]
+
+ * debian [add] texlive-xetex
+
%% sisu_0.66.0.orig.tar.gz (2008-02-24:07/7)
http://www.jus.uio.no/sisu/pkg/src/sisu_0.66.0.orig.tar.gz
b45d81d949590a9b24924589bc98032b 1492653 sisu_0.66.0.orig.tar.gz
diff --git a/lib/sisu/v0/sysenv.rb b/lib/sisu/v0/sysenv.rb
index 9cf14507..816c72b7 100644
--- a/lib/sisu/v0/sysenv.rb
+++ b/lib/sisu/v0/sysenv.rb
@@ -1,4 +1,4 @@
-# coding: utf-8
+# coding: utf-6
=begin
* Name: SiSU
@@ -647,30 +647,36 @@ module SiSU_Env
else puts "\tWARN: #{program} is not installed #{program_ref}"
end
end
- def latex2pdf #convert from latex to pdf
- prog=[]
- prog=['pdflatex','pdfetex','pdftex']
- program_ref="\n\t\tSee http://www.tug.org/applications/pdftex/\n\t\tOn Debian this is is included in tetex-extra"
+ def tex2pdf_engine
+ prog=['xetex','xelatex','pdflatex','pdfetex','pdftex']
@pdfetex_flag=false
@cmd ||=''
- tell=if @cmd =~/[MVv]/; ''
- else '> /dev/null'
- end
- mode='batchmode'
- #mode='nonstopmode'
+ @texpdf=nil
prog.each do |program|
if program_found?(program)
- case program
- when /pdflatex/; system("#{program} -interaction=#{mode} #@input #{tell}\n")
- when /pdfetex/; system("#{program} -interaction=#{mode} -fmt=pdflatex #@input #{tell}\n") # debian specific paramters ?
- #system("#{program} -interaction=batchmode -progname=pdflatex #@input\n")
- when /pdftex/; system("#{program} -interaction=#{mode} -fmt=pdflatex #@input #{tell}\n")
- end
+ @texpdf=program if program =~/xetex|xelatex|pdftex|pdflatex/
@pdfetex_flag=true
break
end
- unless @pdfetex_flag; puts "\tWARN: none of the following programs are installed: #{program[0]}, #{program[1]}, #{program[2]} is installed. #{program_ref}"
+ end
+ @texpdf
+ end
+ def latex2pdf #convert from latex to pdf
+ tell=if @cmd =~/[MVv]/; ''
+ else '> /dev/null'
+ end
+ mode='batchmode'
+ #mode='nonstopmode'
+ program_ref="\n\t\tSee http://www.tug.org/applications/pdftex/\n\t\tOn Debian this is is included in tetex-extra"
+ texpdf=tex2pdf_engine
+ if @pdfetex_flag;
+ texpdf_cmd=case texpdf
+ when /xetex/; "#{texpdf} -interaction=#{mode} -fmt=xelatex #@input #{tell}\n"
+ when /pdftex/; "#{texpdf} -interaction=#{mode} -fmt=pdflatex #@input #{tell}\n"
+ when /xelatex|pdflatex/; "#{texpdf} -interaction=#{mode} #@input #{tell}\n"
end
+ system(texpdf_cmd)
+ else puts "\tWARN: none of the following programs are installed: #{program[0]}, #{program[1]}, #{program[2]} is installed. #{program_ref}"
end
end
def makeinfo #texinfo
@@ -2558,11 +2564,11 @@ WOK
end
def images
unless FileTest.directory?("#{@env.path.output}/_sisu")
- mkdir_p("#{@env.path.output}/_sisu")
+ mkdir_p("#{@env.path.output}/_sisu")
end
unless File.exist?("#{@env.path.output}/_sisu/image_sys") \
or File.symlink?("#{@env.path.output}/_sisu/image_sys")
- File.symlink("../../_sisu/image_sys", "#{@env.path.output}/_sisu/image_sys")
+ File.symlink("../../_sisu/image_sys", "#{@env.path.output}/_sisu/image_sys")
end
end
def man_forms
@@ -2657,7 +2663,7 @@ WOK
def dbi
if psql.host =~/(?:\S{1,3}\.){3}\S{1,3}|\S+?\.\S+/
"DBI:Pg:database=#{psql.db};host=#{psql.host};port=#{psql.port}"
- else "DBI:Pg:database=#{psql.db};port=#{psql.port}"
+ else "DBI:Pg:database=#{psql.db};port=#{psql.port}"
end
end
self
@@ -3138,7 +3144,7 @@ fns_array=unless fns =~/\.ssm.sst$/
IO.readlines(fns,'')
else IO.readlines(fns,'r:utf-8')
end
-else
+else
if RUBY_VERSION < '1.9'
IO.readlines("#{path.composite_file}/#{fns}",'')
else IO.readlines("#{path.composite_file}/#{fns}",'r:utf-8')
diff --git a/lib/sisu/v0/texpdf_format.rb b/lib/sisu/v0/texpdf_format.rb
index 03bdd184..9e7fccde 100644
--- a/lib/sisu/v0/texpdf_format.rb
+++ b/lib/sisu/v0/texpdf_format.rb
@@ -284,6 +284,7 @@ WOK
@dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern
@tx=SiSU_Env::Get_init.instance.tex
@url_brace=SiSU_Viz::Skin.new.url_decoration
+ @tex2pdf=@@tex3pdf ||=SiSU_Env::System_call.new.tex2pdf_engine
end
def longtable_landscape
@end_table='\end{longtable}'
@@ -432,14 +433,14 @@ WOK
end
@string
end
- def special_characters_1(para) # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list
+ def pdftex_special_characters_1(string) # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list
#p @@utf_8.list
#@string=Iconv.conv('ISO-8859-1', 'UTF-8', @string)
- word=@string.scan(/\S+|\n/) #unless line =~/^(?:0~\S|%+\s)/
+ word=string.scan(/\S+|\n/) #unless line =~/^(?:0~\S|%+\s)/
para_array=[]
- if word
+ string=if word
word.each do |w| # _ - / # | : ! ^ ~
- unless para =~/^(?:0~|%+ |<!Th?¡ )/um
+ unless string =~/^(?:0~|%+ |<!Th?¡ )/um
w.gsub!(/[\\]?~/,'<=tilde>') unless w=~/^[1-6]~|~\{|\}~|~\[|\]~|^\^~\s|~\^|\*~\S+|~#|\{t~|<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/
w.gsub!(/&#(?:126|152);/,'<=tilde>') #126 usual
#w.gsub!(/&#(?:126|152);/,'<=tilde>') unless w=~/https?:\/\/\S+/ #126 usual
@@ -447,162 +448,334 @@ WOK
end
para_array << w
end
- para=para_array.join(' ')
- @string=para.strip
+ string=para_array.join(' ')
+ string=string.strip
+ string
+ else ''
end
- @string.gsub!(/<~\d+;(?:\w|[0-6]:)\d+;[umdv]\d+><#@dp:#@dp>/,'')
- @string.gsub!(/.+?<-#>/,'')
- @string.gsub!(/<EOF>/,'')
- @string.gsub!(/<ENDNOTES?>/,'')
+ string.gsub!(/<~\d+;(?:\w|[0-6]:)\d+;[umdv]\d+><#@dp:#@dp>/,'')
+ string.gsub!(/.+?<-#>/,'')
+ string.gsub!(/<EOF>/,'')
+ string.gsub!(/<ENDNOTES?>/,'')
#problem sequence ->
- @string.gsub!(/&(?:nbsp);/,'<=hardspace>') # < SiSU special character also LaTeX
- @string.gsub!(/&(?:lt|#060);/,'<=lt>') # < SiSU special character also LaTeX
- @string.gsub!(/&(?:gt|#062);/,'<=gt>') # > SiSU special character also LaTeX
- @string.gsub!(/&#123;/,'<=curlyopen>') # { SiSU special character also LaTeX
- @string.gsub!(/&#125;/,'<=curlyclose>') # } SiSU special character also LaTeX
- @string.gsub!(/&#(?:126|152);/,'<=tilde>') # ~ SiSU special character also LaTeX
- @string.gsub!(/&#035;/,'\#') # # SiSU special character also LaTeX
- @string.gsub!(/&#033;/,'!') # ! SiSU not really special sisu character but done, also LaTeX
- @string.gsub!(/&#042;/,'*') # * should you wish to escape astrisk e.g. describing \*{bold}*
- @string.gsub!(/&#045;/,'-') # - SiSU special character also LaTeX
- @string.gsub!(/&#043;/,'+') # + SiSU special character also LaTeX
- @string.gsub!(/&#044;/,',') # + SiSU special character also LaTeX
- @string.gsub!(/&#038;/,'<=amp>') #unless @string=~/<:code>/ # / SiSU special character also LaTeX
- @string.gsub!(/&#047;/,'<=slash>') # / SiSU special character also LaTeX
- @string.gsub!(/&#092;/,'<=backslash>') # \ SiSU special character also LaTeX
- @string.gsub!(/&#095;/,'<=underscore>') # _ SiSU special character also LaTeX
- @string.gsub!(/&#124;/,'|') # | SiSU not really special sisu character but done, also LaTeX
- @string.gsub!(/&#058;/,':') # : SiSU not really special sisu character but done, also LaTeX
- @string.gsub!(/&#094;|\^/,'<=caret>') # ^ SiSU not really special sisu character but done, also LaTeX
- @string.gsub!(/\#/,'<=hash>')
+ string.gsub!(/&(?:nbsp);/,'<=hardspace>') # < SiSU special character also LaTeX
+ string.gsub!(/&(?:lt|#060);/,'<=lt>') # < SiSU special character also LaTeX
+ string.gsub!(/&(?:gt|#062);/,'<=gt>') # > SiSU special character also LaTeX
+ string.gsub!(/&#123;/,'<=curlyopen>') # { SiSU special character also LaTeX
+ string.gsub!(/&#125;/,'<=curlyclose>') # } SiSU special character also LaTeX
+ string.gsub!(/&#(?:126|152);/,'<=tilde>') # ~ SiSU special character also LaTeX
+ string.gsub!(/&#035;/,'\#') # # SiSU special character also LaTeX
+ string.gsub!(/&#033;/,'!') # ! SiSU not really special sisu character but done, also LaTeX
+ string.gsub!(/&#042;/,'*') # * should you wish to escape astrisk e.g. describing \*{bold}*
+ string.gsub!(/&#045;/,'-') # - SiSU special character also LaTeX
+ string.gsub!(/&#043;/,'+') # + SiSU special character also LaTeX
+ string.gsub!(/&#044;/,',') # + SiSU special character also LaTeX
+ string.gsub!(/&#038;/,'<=amp>') #unless @string=~/<:code>/ # / SiSU special character also LaTeX
+ string.gsub!(/&#047;/,'<=slash>') # / SiSU special character also LaTeX
+ string.gsub!(/&#092;/,'<=backslash>') # \ SiSU special character also LaTeX
+ string.gsub!(/&#095;/,'<=underscore>') # _ SiSU special character also LaTeX
+ string.gsub!(/&#124;/,'|') # | SiSU not really special sisu character but done, also LaTeX
+ string.gsub!(/&#058;/,':') # : SiSU not really special sisu character but done, also LaTeX
+ string.gsub!(/&#094;|\^/,'<=caret>') # ^ SiSU not really special sisu character but done, also LaTeX
+ string.gsub!(/\#/,'<=hash>')
##watch placement, problem sequence ^
- @string.gsub!(/<sup><font face=symbol>&atild;<\/font><\/sup>/,' ')
- @string.gsub!(/<:pb>/,'\newpage')
- @string.gsub!(/<:pn>/,'\clearpage')
- @string.gsub!(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript
- end
- def special_characters_2(para)
- @string.gsub!(/&#156;/,'\oe ')
- @string.gsub!(/\$/,'\$')
- @string.gsub!(/\#/,'\#')
- @string.gsub!(/\%/,'\%')
- @string.gsub!(/\~/,'\~') #revist, should not be necessary to mark remaining tildes
- if @string !~/^\s*<:image|\}:image\s/
- @string.gsub!(/_/,'\_')
+ string.gsub!(/<sup><font face=symbol>&atild;<\/font><\/sup>/,' ')
+ string.gsub!(/<:pb>/,'\newpage')
+ string.gsub!(/<:pn>/,'\clearpage')
+ string.gsub!(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript
+ string
+ end
+ def pdftex_special_characters_2(string)
+ string.gsub!(/&#156;/,'\oe ')
+ string.gsub!(/\$/,'\$')
+ string.gsub!(/\#/,'\#')
+ string.gsub!(/\%/,'\%')
+ string.gsub!(/\~/,'\~') #revist, should not be necessary to mark remaining tildes
+ if string !~/^\s*<:image|\}:image\s/
+ string.gsub!(/_/,'\_')
end
- @string.gsub!(/\{/,'\{')
- @string.gsub!(/\}/,'\}')
- @string.gsub!(/&nbsp;/,'~') # ~ character for hardspace
+ string.gsub!(/\{/,'\{')
+ string.gsub!(/\}/,'\}')
+ string.gsub!(/&nbsp;/,'~') # ~ character for hardspace
# sequence important must appear after removal of { and }
- @string.gsub!(/&\S+?;/,'') #hmmm
+ string.gsub!(/&\S+?;/,'') #hmmm
# sequence imortant place before removal of &
- if @string=~/<:code>/; @@flag_code=true
- elsif @string=~/<:code-end>/; @@flag_code=false
+ if string=~/<:code>/; @@flag_code=true
+ elsif string=~/<:code-end>/; @@flag_code=false
end
- if @@flag_code; @string.gsub!(/&/,'{\\\&}')
- else @string.gsub!(/(\s+&\s+)/,' and ')
+ if @@flag_code; string.gsub!(/&/,'{\\\&}')
+ else string.gsub!(/(\s+&\s+)/,' and ')
end
- @string.gsub!(/§/u,'\S') #latex: space between next character not preserved? #@string.gsub!(/§ /,'\S ')
- @string.gsub!(/£/u,'\pounds')
- @string.gsub!(/&\S+?;/,' ')
- @string.gsub!(/<a href=".+?">/,' ')
- @string.gsub!(/<\/a>/,' ')
- @string.gsub!(/[^\}>_]((?:https?|file|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case
- @string.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url
- @string.gsub!(/\B(?:\\_|\\)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration
+ string.gsub!(/§/u,'\S') #latex: space between next character not preserved? #string.gsub!(/§ /,'\S ')
+ string.gsub!(/£/u,'\pounds')
+ string.gsub!(/&\S+?;/,' ')
+ string.gsub!(/<a href=".+?">/,' ')
+ string.gsub!(/<\/a>/,' ')
+ string.gsub!(/[^\}>_]((?:https?|file|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case
+ string.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url
+ string.gsub!(/\B(?:\\_|\\)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration
unless @@flag_code
- @string.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration <url> positive lookahead, sequence issue with { linked }http://url cannot use \b at start
+ string.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration <url> positive lookahead, sequence issue with { linked }http://url cannot use \b at start
else #code-block: angle brackets special characters, note _ already escaped
- @string.gsub!(/\\_</,'{\UseTextSymbol{OML}{<}}')
- @string.gsub!(/\\_>/,'{\UseTextSymbol{OML}{>}}')
+ string.gsub!(/\\_</,'{\UseTextSymbol{OML}{<}}')
+ string.gsub!(/\\_>/,'{\UseTextSymbol{OML}{>}}')
end
- @string.gsub!(/<:ee>/,'')
- @string.gsub!(/<!>/,' ')
+ string.gsub!(/<:ee>/,'')
+ string.gsub!(/<!>/,' ')
#proposed change, insert, but may be redundant
- @string.gsub!(/ \/><:i[12]>(.+?)(?:\}~|<br)/,' \begin{ParagraphIndent}{0.01\columnwidth}\1\end{ParagraphIndent} ') # footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder
- @string.gsub!(/<(br|p)>|<\/\s*(br|p)>|<(br|p)\s*\/>/," #{@@tex_backslash*2} ") # Work Area
- @string.gsub!(/<b>(.+?)<\/b>/,'\begin{bfseries}\1 \end{bfseries}')
- @string.gsub!(/<em>(.+?)<\/em>/,'\begin{bfseries}\1 \end{bfseries}')
- @string.gsub!(/<(bold|strong)>(.+?)<\/(bold|strong)>/,'\begin{bfseries}\1 \end{bfseries}')
- @string.gsub!(/<h\d+>(.+?)<\/h\d+>/,'\begin{bfseries}\1 \end{bfseries}')
- @string.gsub!(/<i>(.+?)<\/i>/,'\emph{\1}')
- @string.gsub!(/<italic>(.+?)<\/italic>/,'\emph{\1}')
- @string.gsub!(/<u>(.+?)<\/u>/,'\uline{\1}') # ulem
- @string.gsub!(/<cite>(.+?)<\/cite>/,"``\\1''") # quote
- @string.gsub!(/<ins>(.+?)<\/ins>/,'\uline{\1}') # ulem
- @string.gsub!(/<del>(.+?)<\/del>/,'\sout{\1}') # ulem
- @string.gsub!(/<sub>(.+?)<\/sub>/,"\$_{\\textrm{\\1}}\$")
- @string.gsub!(/<sup>(.+?)<\/sup>/,"\$^{\\textrm{\\1}}\$")
+ string.gsub!(/ \/><:i[12]>(.+?)(?:\}~|<br)/,' \begin{ParagraphIndent}{0.01\columnwidth}\1\end{ParagraphIndent} ') # footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder
+ string.gsub!(/<(br|p)>|<\/\s*(br|p)>|<(br|p)\s*\/>/," #{@@tex_backslash*2} ") # Work Area
+ string.gsub!(/<b>(.+?)<\/b>/,'\begin{bfseries}\1 \end{bfseries}')
+ string.gsub!(/<em>(.+?)<\/em>/,'\begin{bfseries}\1 \end{bfseries}')
+ string.gsub!(/<(bold|strong)>(.+?)<\/(bold|strong)>/,'\begin{bfseries}\1 \end{bfseries}')
+ string.gsub!(/<h\d+>(.+?)<\/h\d+>/,'\begin{bfseries}\1 \end{bfseries}')
+ string.gsub!(/<i>(.+?)<\/i>/,'\emph{\1}')
+ string.gsub!(/<italic>(.+?)<\/italic>/,'\emph{\1}')
+ string.gsub!(/<u>(.+?)<\/u>/,'\uline{\1}') # ulem
+ string.gsub!(/<cite>(.+?)<\/cite>/,"``\\1''") # quote
+ string.gsub!(/<ins>(.+?)<\/ins>/,'\uline{\1}') # ulem
+ string.gsub!(/<del>(.+?)<\/del>/,'\sout{\1}') # ulem
+ string.gsub!(/<sub>(.+?)<\/sub>/,"\$_{\\textrm{\\1}}\$")
+ string.gsub!(/<sup>(.+?)<\/sup>/,"\$^{\\textrm{\\1}}\$")
unless @@flag_code
- @string.gsub!(/"(.+?)"/,"``\\1''") # quote marks / quotations open & close " need condition exclude for code
- @string.gsub!(/\s+"/,' ``') # open "
- @string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*"/,'\1``') # open "
- @string.gsub!(/"(\s|\.|,|:|;)/,"''\\1") # close "
- @string.gsub!(/"([1-6-]#{@@tilde}\S*|<.+?>)?\s*$/,"''\\1") # close "
- @string.gsub!(/"(\.|,)/,"''") # close "
- @string.gsub!(/\s+'/,' `') # open '
- @string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*'/,'\1`') # open '
+ string.gsub!(/"(.+?)"/,'“\1”') # quote marks / quotations open & close " need condition exclude for code
+ string.gsub!(/\s+"/,' “') # open "
+ string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*"/,'\1“') # open "
+ string.gsub!(/"(\s|\.|,|:|;)/,'”\1') # close "
+ string.gsub!(/"([1-6-]#{@@tilde}\S*|<.+?>)?\s*$/,'”\1') # close "
+ string.gsub!(/"(\.|,)/,'”') # close "
+ string.gsub!(/\s+'/,' `') # open '
+ string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*'/,'\1`') # open '
end
- @string.gsub!(/^(<:i[1-9]>)?\s*\\_\*\s*/,'\1 \begin{math} \bullet \end{math}~~') #bullets - added 2004w17 watch \\_
- @string.gsub!(/(<font.*?>|<\/font>)/,'')
- @string.gsub!(/\s*<sup>(\S+?)<\/sup>/,'^\1')
- @string.gsub!(/(<sup>|<\/sup>)/,'')
- @string
+ string.gsub!(/^(<:i[1-9]>)?\s*\\_\*\s*/,'\1 \begin{math} \bullet \end{math}~~') #bullets - added 2004w17 watch \\_
+ string.gsub!(/(<font.*?>|<\/font>)/,'')
+ string.gsub!(/\s*<sup>(\S+?)<\/sup>/,'^\1')
+ string.gsub!(/(<sup>|<\/sup>)/,'')
+ string
+ end
+ def pdftex_special_characters_3(string)
+ string.gsub!(/<br(\s*[^\/][^>])/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder
+ string.gsub!(/([^<][^b][^r]\s+)\/>/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder
+ #problem sequence (another kludge) ->
+ string.gsub!(/<=lt>/,'{\UseTextSymbol{OML}{<}}')
+ string.gsub!(/<=gt>/,'{\UseTextSymbol{OML}{>}}')
+ #string.gsub!(/<=lt>/,'\<')
+ #string.gsub!(/<=gt>/,'\>')
+ string.gsub!(/<=underscore>/,'\_')
+ string.gsub!(/(\href\{http:\/\/\S+?)(?:(?:<=tilde>)(\S+))+\}/,'\1\~\2}') #tildes in urls \href treated differently from text
+ string.gsub!(/<=tilde>/,'{\~~}')
+ string.gsub!(/<=pipe>/,'{\textbar}')
+ string.gsub!(/<=caret>/,'{\^{~}}')
+ #string.gsub!(/<=caret>/,'\^{}')
+ string.gsub!(/<=exclaim>/,'\Verbatim{!}')
+ string.gsub!(/<=hash>/,'{\#}')
+ #string.gsub!(/<=hash>/,'{\UseTextSymbol{OT1}{#}}')
+ #string.gsub!(/<=slash>/,'{\slash}')
+ string.gsub!(/<=hardspace>/,'{~}') #changed ... 2005
+ string.gsub!(/<=amp>/,'{\\\&}') #changed ... 2005
+ #string.gsub!(/<=amp>/,'{\UseTextSymbol{OT1}{&}}')
+ string.gsub!(/<=slash>/,'{/}')
+ string.gsub!(/<=backslash>/,'{\textbackslash}')
+ #string.gsub!(/<=asterisk>/,'*')
+ #string.gsub!(/<=exclaim>/,'!')
+ #string.gsub!(/<=asterisk>/,'{\ast}')
+ #string.gsub!(/<=copymark>/,"^{\\copyright} ") # watch has been problematic
+ #copymark='{\\begin{small}\\raisebox{1ex}{\\copyright}\\end{small}} '
+ string.gsub!(/<=copymark>\s*(.+)?\s+(<\\~\d+;\w(?:[0-6]:)?\d+;\w\d+><#@dp:#@dp>)/,"^\\copyright \\textnormal{\\1} \\2") # watch likely to be problematic
+ string
end
- def special_characters_3(para)
- @string.gsub!(/<br(\s*[^\/][^>])/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder
- @string.gsub!(/([^<][^b][^r]\s+)\/>/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder
+ def xetex_special_characters_1(string) # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list
+ #p @@utf_8.list
+ #string=Iconv.conv('ISO-8859-1', 'UTF-8', @string)
+ word=string.scan(/\S+|\n/) #unless line =~/^(?:0~\S|%+\s)/
+ para_array=[]
+ string=if word
+ word.each do |w| # _ - / # | : ! ^ ~
+ unless string =~/^(?:0~|%+ |<!Th?¡ )/um
+ w.gsub!(/[\\]?~/,'<=tilde>') unless w=~/^[1-6]~|~\{|\}~|~\[|\]~|^\^~\s|~\^|\*~\S+|~#|\{t~|<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/
+ w.gsub!(/&#(?:126|152);/,'<=tilde>') #126 usual
+ #w.gsub!(/&#(?:126|152);/,'<=tilde>') unless w=~/https?:\/\/\S+/ #126 usual
+ w.gsub!(/\\?\||&#124;/,'<=pipe>') #unless w=~/<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ # | SiSU not really special sisu character but done, also LaTeX
+ end
+ para_array << w
+ end
+ string=para_array.join(' ')
+ string=string.strip
+ string
+ else ''
+ end
+ string.gsub!(/<~\d+;(?:\w|[0-6]:)\d+;[umdv]\d+><#@dp:#@dp>/,'')
+ string.gsub!(/.+?<-#>/,'')
+ string.gsub!(/<EOF>/,'')
+ string.gsub!(/<ENDNOTES?>/,'')
+ #problem sequence ->
+ string.gsub!(/&(?:nbsp);/,'<=hardspace>') # < SiSU special character also LaTeX
+ string.gsub!(/&(?:lt|#060);/,'<=lt>') # < SiSU special character also LaTeX
+ string.gsub!(/&(?:gt|#062);/,'<=gt>') # > SiSU special character also LaTeX
+ string.gsub!(/&#123;/,'<=curlyopen>') # { SiSU special character also LaTeX
+ string.gsub!(/&#125;/,'<=curlyclose>') # } SiSU special character also LaTeX
+ string.gsub!(/&#(?:126|152);/,'<=tilde>') # ~ SiSU special character also LaTeX
+ string.gsub!(/&#035;/,'\#') # # SiSU special character also LaTeX
+ string.gsub!(/&#033;/,'!') # ! SiSU not really special sisu character but done, also LaTeX
+ string.gsub!(/&#042;/,'*') # * should you wish to escape astrisk e.g. describing \*{bold}*
+ string.gsub!(/&#045;/,'-') # - SiSU special character also LaTeX
+ string.gsub!(/&#043;/,'+') # + SiSU special character also LaTeX
+ string.gsub!(/&#044;/,',') # + SiSU special character also LaTeX
+ string.gsub!(/&#038;/,'<=amp>') #unless @string=~/<:code>/ # / SiSU special character also LaTeX
+ string.gsub!(/&#047;/,'<=slash>') # / SiSU special character also LaTeX
+ string.gsub!(/&#092;/,'<=backslash>') # \ SiSU special character also LaTeX
+ string.gsub!(/&#095;/,'<=underscore>') # _ SiSU special character also LaTeX
+ string.gsub!(/&#124;/,'|') # | SiSU not really special sisu character but done, also LaTeX
+ string.gsub!(/&#058;/,':') # : SiSU not really special sisu character but done, also LaTeX
+ string.gsub!(/&#094;|\^/,'<=caret>') # ^ SiSU not really special sisu character but done, also LaTeX
+ string.gsub!(/\#/,'<=hash>')
+ ##watch placement, problem sequence ^
+ string.gsub!(/<sup><font face=symbol>&atild;<\/font><\/sup>/,' ')
+ string.gsub!(/<:pb>/,'\newpage')
+ string.gsub!(/<:pn>/,'\clearpage')
+ string.gsub!(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript
+ string
+ end
+ def xetex_special_characters_2(string)
+ string.gsub!(/&#156;/,'\oe ')
+ string.gsub!(/\$/,'\$')
+ string.gsub!(/\#/,'\#')
+ string.gsub!(/\%/,'\%')
+ string.gsub!(/\~/,'\~') #revist, should not be necessary to mark remaining tildes
+ if string !~/^\s*<:image|\}:image\s/
+ string.gsub!(/_/,'\_')
+ end
+ string.gsub!(/\{/,'\{')
+ string.gsub!(/\}/,'\}')
+ string.gsub!(/&nbsp;/,'~') # ~ character for hardspace
+ # sequence important must appear after removal of { and }
+ string.gsub!(/&\S+?;/,'') #hmmm
+ # sequence imortant place before removal of &
+ if string=~/<:code>/; @@flag_code=true
+ elsif string=~/<:code-end>/; @@flag_code=false
+ end
+ if @@flag_code; string.gsub!(/&/,'{\\\&}')
+ else string.gsub!(/(\s+&\s+)/,' and ')
+ end
+ string.gsub!(/§/u,'\S') #latex: space between next character not preserved? #string.gsub!(/§ /,'\S ')
+ string.gsub!(/£/u,'\pounds')
+ string.gsub!(/&\S+?;/,' ')
+ string.gsub!(/<a href=".+?">/,' ')
+ string.gsub!(/<\/a>/,' ')
+ string.gsub!(/[^\}>_]((?:https?|file|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case
+ string.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url
+ string.gsub!(/\B(?:\\_|\\)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration
+ unless @@flag_code
+ string.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration <url> positive lookahead, sequence issue with { linked }http://url cannot use \b at start
+ else #code-block: angle brackets special characters, note _ already escaped
+ string.gsub!(/\\_</,'{\UseTextSymbol{OML}{<}}')
+ string.gsub!(/\\_>/,'{\UseTextSymbol{OML}{>}}')
+ end
+ string.gsub!(/<:ee>/,'')
+ string.gsub!(/<!>/,' ')
+ #proposed change, insert, but may be redundant
+ string.gsub!(/ \/><:i[12]>(.+?)(?:\}~|<br)/,' \begin{ParagraphIndent}{0.01\columnwidth}\1\end{ParagraphIndent} ') # footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder
+ string.gsub!(/<(br|p)>|<\/\s*(br|p)>|<(br|p)\s*\/>/," #{@@tex_backslash*2} ") # Work Area
+ string.gsub!(/<b>(.+?)<\/b>/,'\begin{bfseries}\1 \end{bfseries}')
+ string.gsub!(/<em>(.+?)<\/em>/,'\begin{bfseries}\1 \end{bfseries}')
+ string.gsub!(/<(bold|strong)>(.+?)<\/(bold|strong)>/,'\begin{bfseries}\1 \end{bfseries}')
+ string.gsub!(/<h\d+>(.+?)<\/h\d+>/,'\begin{bfseries}\1 \end{bfseries}')
+ string.gsub!(/<i>(.+?)<\/i>/,'\emph{\1}')
+ string.gsub!(/<italic>(.+?)<\/italic>/,'\emph{\1}')
+ string.gsub!(/<u>(.+?)<\/u>/,'\uline{\1}') # ulem
+ string.gsub!(/<cite>(.+?)<\/cite>/,"``\\1''") # quote
+ string.gsub!(/<ins>(.+?)<\/ins>/,'\uline{\1}') # ulem
+ string.gsub!(/<del>(.+?)<\/del>/,'\sout{\1}') # ulem
+ string.gsub!(/<sub>(.+?)<\/sub>/,"\$_{\\textrm{\\1}}\$")
+ string.gsub!(/<sup>(.+?)<\/sup>/,"\$^{\\textrm{\\1}}\$")
+ unless @@flag_code
+ string.gsub!(/"(.+?)"/,'“\1”') # quote marks / quotations open & close " need condition exclude for code
+ string.gsub!(/\s+"/,' “') # open "
+ string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*"/,'\1“') # open "
+ string.gsub!(/"(\s|\.|,|:|;)/,'”\1') # close "
+ string.gsub!(/"([1-6-]#{@@tilde}\S*|<.+?>)?\s*$/,'”\1') # close "
+ string.gsub!(/"(\.|,)/,'”') # close "
+ string.gsub!(/\s+'/,' `') # open '
+ string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*'/,'\1`') # open '
+ end
+ #string.gsub!(/^(<:i[1-9]>)?\s*\\_\*\s*/,'\1 \begin{math} \bullet \end{math}~~') #bullets - added 2004w17 watch \\_
+ string.gsub!(/^(<:i[1-9]>)?\s*\\_\*\s*/,'\1 ● ~~')
+ string.gsub!(/(<font.*?>|<\/font>)/,'')
+ string.gsub!(/\s*<sup>(\S+?)<\/sup>/,'^\1')
+ string.gsub!(/(<sup>|<\/sup>)/,'')
+ string
+ end
+ def xetex_special_characters_3(string)
+ string.gsub!(/<br(\s*[^\/][^>])/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder
+ string.gsub!(/([^<][^b][^r]\s+)\/>/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder
#problem sequence (another kludge) ->
- @string.gsub!(/<=lt>/,'{\UseTextSymbol{OML}{<}}')
- @string.gsub!(/<=gt>/,'{\UseTextSymbol{OML}{>}}')
- #@string.gsub!(/<=lt>/,'\<')
- #@string.gsub!(/<=gt>/,'\>')
- @string.gsub!(/<=underscore>/,'\_')
- @string.gsub!(/(\href\{http:\/\/\S+?)(?:(?:<=tilde>)(\S+))+\}/,'\1\~\2}') #tildes in urls \href treated differently from text
- @string.gsub!(/<=tilde>/,'{\~~}')
- @string.gsub!(/<=pipe>/,'{\textbar}')
- @string.gsub!(/<=caret>/,'{\^{~}}')
- #@string.gsub!(/<=caret>/,'\^{}')
- @string.gsub!(/<=exclaim>/,'\Verbatim{!}')
- @string.gsub!(/<=hash>/,'{\#}')
- #@string.gsub!(/<=hash>/,'{\UseTextSymbol{OT1}{#}}')
- #@string.gsub!(/<=slash>/,'{\slash}')
- @string.gsub!(/<=hardspace>/,'{~}') #changed ... 2005
- @string.gsub!(/<=amp>/,'{\\\&}') #changed ... 2005
- #@string.gsub!(/<=amp>/,'{\UseTextSymbol{OT1}{&}}')
- @string.gsub!(/<=slash>/,'{/}')
- @string.gsub!(/<=backslash>/,'{\textbackslash}')
- #@string.gsub!(/<=asterisk>/,'*')
- #@string.gsub!(/<=exclaim>/,'!')
- #@string.gsub!(/<=asterisk>/,'{\ast}')
- #@string.gsub!(/<=copymark>/,"^{\\copyright} ") # watch has been problematic
+ string.gsub!(/<=lt>/,'{\UseTextSymbol{OML}{<}}')
+ string.gsub!(/<=gt>/,'{\UseTextSymbol{OML}{>}}')
+ #string.gsub!(/<=lt>/,'\<')
+ #string.gsub!(/<=gt>/,'\>')
+ string.gsub!(/<=underscore>/,'\_')
+ string.gsub!(/(\href\{http:\/\/\S+?)(?:(?:<=tilde>)(\S+))+\}/,'\1\~\2}') #tildes in urls \href treated differently from text
+ string.gsub!(/<=tilde>/,'{\~~}')
+ string.gsub!(/<=pipe>/,'{\textbar}')
+ string.gsub!(/<=caret>/,'{\^{~}}')
+ #string.gsub!(/<=caret>/,'\^{}')
+ string.gsub!(/<=exclaim>/,'\Verbatim{!}')
+ string.gsub!(/<=hash>/,'{\#}')
+ #string.gsub!(/<=hash>/,'{\UseTextSymbol{OT1}{#}}')
+ #string.gsub!(/<=slash>/,'{\slash}')
+ string.gsub!(/<=hardspace>/,'{~}') #changed ... 2005
+ string.gsub!(/<=amp>/,'{\\\&}') #changed ... 2005
+ #string.gsub!(/<=amp>/,'{\UseTextSymbol{OT1}{&}}')
+ string.gsub!(/<=slash>/,'{/}')
+ string.gsub!(/<=backslash>/,'{\textbackslash}')
+ #string.gsub!(/<=asterisk>/,'*')
+ #string.gsub!(/<=exclaim>/,'!')
+ #string.gsub!(/<=asterisk>/,'{\ast}')
+ #string.gsub!(/<=copymark>/,"^{\\copyright} ") # watch has been problematic
#copymark='{\\begin{small}\\raisebox{1ex}{\\copyright}\\end{small}} '
- @string.gsub!(/<=copymark>\s*(.+)?\s+(<\\~\d+;\w(?:[0-6]:)?\d+;\w\d+><#@dp:#@dp>)/,"^\\copyright \\textnormal{\\1} \\2") # watch likely to be problematic
- @string
+ string.gsub!(/<=copymark>\s*(.+)?\s+(<\\~\d+;\w(?:[0-6]:)?\d+;\w\d+><#@dp:#@dp>)/,"^\\copyright \\textnormal{\\1} \\2") # watch likely to be problematic
+ string
end
- def special_characters_curly(para)
- @string.gsub!(/<=curlyopen>/,'\{')
- @string.gsub!(/<=curlyclose>/,'\}')
- @string
+ def special_characters_curly(string)
+ string.gsub!(/<=curlyopen>/,'\{')
+ string.gsub!(/<=curlyclose>/,'\}')
+ string
end
- def special_characters_unsafe_1(para) #depreciated, make obsolete
+
+
+ def special_characters_unsafe_1(string) #depreciated, make obsolete
# some substitutions are sequence sensitive, rearrange with care.
- @string.gsub!(/\\backslash (copyright|clearpage|newpage)/,"\\\\\\1") #kludge bad solution, find out where tail is sent through specChar !
- end
- def special_characters_unsafe_2(para)
- end
- def special_characters_unsafe_3(para)
+ string.gsub!(/\\backslash (copyright|clearpage|newpage)/,"\\\\\\1") #kludge bad solution, find out where tail is sent through specChar !
+ string
end
def special_characters #special characters - some substitutions are sequence sensitive, rearrange with care.
- special_characters_1(@string)
- special_characters_unsafe_1(@string)
- special_characters_2(@string)
- special_characters_3(@string)
+ string=@string
+ case @tex2pdf
+ when /pdf/
+ string=pdftex_special_characters_1(string) unless string.nil?
+ string=special_characters_unsafe_1(string) unless string.nil? #pdftex_special_characters_unsafe_1(@string)
+ string=pdftex_special_characters_2(string) unless string.nil?
+ string=pdftex_special_characters_3(string) unless string.nil?
+ when /xe/
+ string=xetex_special_characters_1(string) unless string.nil?
+ string=special_characters_unsafe_1(string) unless string.nil? #xetex_special_characters_unsafe_1(@string)
+ string=xetex_special_characters_2(string) unless string.nil? #issues with xetex
+ string=xetex_special_characters_3(string) unless string.nil?
+ end
+ @string=string
end
def special_characters_safe #special characters - some substitutions are sequence sensitive, rearrange with care.
- special_characters_1(@string)
- special_characters_2(@string)
- #special_characters_3(@string)
+ string=@string
+ case @tex2pdf
+ when /pdf/
+ string=pdftex_special_characters_1(@string) unless string.nil?
+ string=pdftex_special_characters_2(@string) unless string.nil?
+ #special_characters_3(@string)
+ when /xe/
+ string=xetex_special_characters_1(@string) unless string.nil?
+ string=xetex_special_characters_2(@string) unless string.nil? # remove this to start with, causes issues
+ end
+ @string=string
end
def heading_major(para,lev)
title=@md.title
@@ -947,17 +1120,27 @@ WOK
end
end
def tex_head_encode
- case @md.file_encoding
- when /iso-?8859/i #% iso8859
- <<WOK
-\\usepackage[latin1]{inputenc}
+ case @tex2pdf
+ when /xe/
+ <<WOK
+\\usepackage{babel}
+\\usepackage{ucs}
+\\usepackage{fontspec}
+\\usepackage{xunicode}
WOK
- else #% utf-8 assumed
- <<WOK
+ when /pdf/
+ if @md.file_encoding =~ /iso-?8859/i #% iso8859
+ <<WOK
+% \\usepackage[latin1]{inputenc}
+\\usepackage{fontspec}
+WOK
+ else #% utf-8 assumed
+ <<WOK
\\usepackage{babel}
\\usepackage{ucs}
\\usepackage[utf8x]{inputenc}
WOK
+ end
end
end
def tex_head_info
@@ -1099,7 +1282,7 @@ WOK
\\usepackage{url}
\\usepackage{alltt}
\\usepackage{thumbpdf}
-\\usepackage[pdftex,
+\\usepackage[#{@tex2pdf},
#{color.strip}
pdftitle={#@string1},
% pdftitle={Untitled},
@@ -1125,6 +1308,9 @@ WOK
pdfstartview=FitH
]
{hyperref}
+%% trace lost characters
+% \\tracinglostchars = 1
+% \\tracingonline = 1
\\usepackage[usenames]{color}
\\definecolor{myblack}{rgb}{0,0,0}
\\definecolor{myred}{rgb}{0.75,0,0}