# coding: utf-8
=begin
* Name: SiSU
* Description: a framework for document structuring, publishing and search
* Author: Ralph Amissah
* Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007, 2008 Ralph Amissah All Rights Reserved.
* License: GPL 3 or later:
SiSU, a framework for document structuring, publishing and search
Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007, 2008 Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see .
If you have Internet connection, the latest version of the GPL should be
available at these locations:
* SiSU uses:
* Standard SiSU markup syntax,
* Standard SiSU meta-markup syntax, and the
* Standard SiSU object citation numbering and system
* Hompages:
* Download:
* Ralph Amissah
** Description: preprocessing, (document abstraction), data abstraction used
in subsequent processing
=end
module SiSU_DAL
require "#{SiSU_lib}/defaults"
require "#{SiSU_lib}/sysenv"
require "#{SiSU_lib}/param"
require "#{SiSU_lib}/dal_syntax"
require "#{SiSU_lib}/dal_doc_str"
require "#{SiSU_lib}/dal_idx"
require "#{SiSU_lib}/dal_numbering"
require "#{SiSU_lib}/i18n"
require "#{SiSU_lib}/shared_sem"
include SiSU_Env
include SiSU_Param
include SiSU_Viz
include SiSU_Syntax
class Instantiate < SiSU_Param::Parameters::Instructions
def initialize
@@flag_vocab=0
@@endnote={}
@@endnote_array=@@word_mode=[]
@@endnote_counter,@@endnote_counter_asterisk,@@endnote_counter_dag=1,1,1 #added
@@line_mode=''
end
end
class Source #{@my_make_fns.meta}") if @md.cmd =~/M/
tell.txt_grey unless @md.cmd =~/q/
dal.each{|s| dal_array << "#{s.strip}\n\n" unless s.strip.empty?}
dal_array
end
def read_fnm
dal=[]
dal=if FileTest.file?(@fnm)
if RUBY_VERSION < '1.9'
File.open(@fnm){ |f| dal=Marshal.load(f)}
else File.open(@fnm,'r:utf-8'){ |f| dal=Marshal.load(f)}
end
else SiSU_DAL::Source.new(@opt).create_dal
end
end
end
class Output
def initialize(md,data)
@md,@data=md,data
@my_make=SiSU_Env::Create_file.new(@md.cmd,@md.fns)
dir=SiSU_Env::Info_env.new(@md.fns)
@hard="#{dir.path.dal}/#{@md.fns}.meta"
end
def hard_output
if @md.cmd =~/M/
filename_meta=@my_make.file_meta
@data.each {|s| filename_meta.puts s.strip + "\n\n" unless s.strip.empty?}
else File.unlink(@hard) if FileTest.file?(@hard)
end
end
def marshal
marshal_meta=@my_make.marshal_meta
File.open(marshal_meta,'w'){|f| Marshal.dump(@data.to_a,f)}
end
end
class Make
@@endnote={}
@@endnote_array=@@word_mode=[]
@@endnote_counter,@@endnote_counter_asterisk,@@endnote_counter_dag=1,1,1
@@comment='%'
@@dp=nil
def initialize(md,data)
@md,@data=md,data
@@word_mode=[]
@env=SiSU_Env::Info_env.new(@md.fns)
@skin=SiSU_Env::Info_skin.new(@md)
@dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern
l=SiSU_Env::Standardise_language.new.file_to_language(@md.fns)
@language=l[:l]
@tr=SiSU_Translate::Source.new(@md,@language)
end
def reset
@@flag_vocab=0
@@endnote={}
@@endnote_array=@@word_mode=[]
@@endnote_counter,@@endnote_counter_asterisk,@@endnote_counter_dag=1,1,1
@@line_mode=''
end
def song
reset
data=@data
@metafile="#{@env.path.dal}/#{@md.fns}.meta"
my_make_source_file=SiSU_Env::Create_file.new(@md.cmd,@md.fns)
data=data.join.split("\n\n")
data=expand_insertions?(data)
data=SiSU_document_structure::Code.new(@md,data).code
data=substitutions_and_insertions?(data)
data_new=[]
data.each do |x|
data_new << if x =~ /\n\n/m; x.split(/\n\n+/)
else x
end
end
data=data_new.flatten
data=SiSU_Syntax::Markup.new(@md,data).songsheet
data=character_check(data)
data=images(data)
data=SiSU_document_structure::Tables.new(@md,data).tables
data=SiSU_numbering::Numbering.new(@md,data).numbering_song
data=SiSU_book_index::Book_index.new(data).indexing_song if @md.book_index
data=endnotes(data)
data=object_digest(data)
meta=metadata(data)
outputdata=data + meta
if @md.cmd =~/[mM]/
SiSU_DAL::Output.new(@md,outputdata).hard_output
SiSU_DAL::Output.new(@md,outputdata).marshal
end
reset
outputdata
end
protected
def character_check(data)
require 'iconv'
reset
@tuned_file=[]
endnote_no=1
data.each do |para|
para.strip!
para.gsub!(/^([12])~\?\s+/,'\1~ ') #conditional header for incorporated document 2004w12
para.gsub!(/^[{~}]\s*$/,'')
para.gsub!(/^#{@@comment}.*/,'') #remove comment and divider #%
para.gsub!(/<~#>|~#\s*/,"#{Mx[:fa_o]}~##{Mx[:fa_c]}")
para.gsub!(/-#\s*/,"#{Mx[:fa_o]}-##{Mx[:fa_c]}#{Mx[:fa_o]}~##{Mx[:fa_c]}")
#para.gsub!(/(#\{{3} arch-tag:|0\{{3}~cvs)\s+/, "0{{~rcs ") #KEEP ... ENABLE WIDER USE OF REVISION CONTROL
para.gsub!(/(#{Mx[:en_a_o]})\s*\s+/,'\1 '); para.gsub!(/(~\{\s*)\s+/,'\1 ')
para.gsub!(/ \/\//,"#{Mx[:br_line]}") #added 2004w29
para.gsub!(/
/,"#{Mx[:br_line]}") #needed by xml, xhtml etc.
para.gsub!(/\t/,' ')
para.gsub!(/\342\200\231/u,"'") #if para =~/’/ #Avoid #‘ ’ #“ ”
para.gsub!(/�/u,' ') #watch, replace with char code
para.gsub!(/·/u,'*')
para.gsub!(/\\copy(?:right)?\b/,'©')
para.gsub!(/\\trademark\b|\\tm\b/,'®')
#non_utf8(para)
para=para + "\n"
unless para =~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/
case para
when /\^~/ # endnotes
#% Note must do this first (earlier loop) and then enter gathered data into ~^\d+
sub_para=para.dup
@@endnote_array << sub_para.gsub!(/\n/,'').gsub!(/\^~\s+(.+)\s*/,%{#{Mx[:en_a_o]}#{endnote_no} \\1 #{Mx[:en_a_c]}}).strip
endnote_no+=1
para=nil if para =~/\^~ .+/ #removes 'binary' endnote now in endnote array for later insertion
end
end
@tuned_file << para unless para.nil?
end
@tuned_file=@tuned_file.flatten
end
def images(data)
tuned_file=[]
@rmgk=false
if SiSU_Env::Info_settings.new.program?('rmagick'); @rmgk=SiSU_Env::Load.new('RMagick').prog
else tell=SiSU_Screen::Ansi.new(@md.cmd,'use of RMagick is not enabled in sisurc.yml')
tell.warn if @md.cmd =~/[vVM]/
end
data.each do |para|
para.strip!
if para =~/#{Mx[:lnk_o]}\s*\S+\.(?:png|jpg|gif)(?:\s*|\s+.+)?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\S+|image)/
if para !~/#{Mx[:lnk_o]}\s*\S+\.(?:png|jpg|gif)\s+\d+x\d+\s+/
m=/#{Mx[:lnk_o]}\s*(\S+\.(?:png|jpg|gif))/
if @rmgk
imgs=para.scan(m).flatten
images=imgs.each do |image|
dir=SiSU_Env::Info_env.new(@md.fns)
path_image=[dir.path.image_source_local_tex,dir.path.image_source_remote_tex,dir.path.image_source_tex]
image_path=nil
path_image.each do |image_path|
break if FileTest.exist?("#{image_path}/#{image}")
end
if FileTest.exist?("#{image_path}/#{image}")
img=Magick::ImageList.new("#{image_path}/#{image}")
img_col,img_row=img.columns,img.rows
if img_col > img_row #landscape
if img_col> 640 #480
img_col=640 #480
img_row=((1.00*img_col/img.columns)*img.rows).round
end
else #portrait
if img_col> 640 #480
img_col=640 #480
img_row=((1.00*img_col/img.columns)*img.rows).round
end
if img_row > 640
img_row=640
img_col=((1.00*img_row/img.rows)*img.columns).round
end
end
para.gsub!(/(#{image})/,"#{image} #{img_col}x#{img_row}")
else para.gsub!(/#{Mx[:lnk_o]}\s*(\S+)\.(png|jpg|gif).+?#{Mx[:lnk_c]}((?:https?|file|ftp):\S+|image)/,'[ \1 (\2 missing) ]')
end
end
else
images=para.scan(m) do |image|
tell=SiSU_Screen::Ansi.new(@md.cmd,'where image dimensions have not been provided RMagick is required',image)
tell.warn #unless @opt.cmd =~/q/
end
end
end
end
para.gsub!(/(#{Mx[:lnk_o]})\s*(\S+\.(?:png|jpg|gif))\s+/i,'\1\2 ') if para =~/#{Mx[:lnk_o]}\s*\S+\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\S+|image)/
tuned_file << para unless para.nil?
end
tuned_file
end
def output_filetypes_in_cmd(cmd_shortcut,source=nil)
#make list of file types in shortcut command (as configured), e.g. when sisu -3 is used
cf_defaults=SiSU_Env::Info_processing_flag.new
cmd_list=case cmd_shortcut.inspect
when /0/; cf_defaults.cf_0
when /1/; cf_defaults.cf_1
when /2/; cf_defaults.cf_2
when /3/; cf_defaults.cf_3
when /4/; cf_defaults.cf_4
when /5/; cf_defaults.cf_5
end
file_type_names={}
file_type_names[:gen],file_type_names[:src]=[],[]
file_type_names[:gen] <<= if cmd_list =~ /y/; 'sisu_manifest.html'
end
file_type_names[:gen] <<= if cmd_list =~ /h/; ['toc.html', 'doc.html']
end
file_type_names[:gen] <<= if cmd_list =~ /p/; ['landscape.pdf', 'portrait.pdf']
end
#file_type_names[:gen] <<= if cmd_list =~ /i/; 'manpage.1'
#end
file_type_names[:gen] <<= if cmd_list =~ /o/; 'opendocument.odt'
end
file_type_names[:gen] <<= if cmd_list =~ /b/; 'scroll.xhtml'
end
file_type_names[:gen] <<= if cmd_list =~ /x/; 'sax.xml'
end
file_type_names[:gen] <<= if cmd_list =~ /X/; 'dom.xml'
end
file_type_names[:gen] <<= if cmd_list =~ /a/; 'plain.txt'
end
file_type_names[:gen] <<= if cmd_list =~ /g/; 'wiki.txt'
end
file_type_names[:gen] <<= if cmd_list =~ /w/; 'concordance.html'
end
file_type_names[:gen] <<= if cmd_list =~ /N/; 'digest.txt'
end
file_type_names[:src] <<= if source and cmd_shortcut =~ /s/; source
end
file_type_names[:src] <<= if cmd_shortcut =~ /S/; "#{source}.zip"
end
file_type_names[:gen]=file_type_names[:gen].flatten
file_type_names[:src]=file_type_names[:src].flatten
file_type_names
end
def expand_insertions?(data)
tuned_file,tuned_file_tmp=[],[]
data.each do |para|
if para !~/^%+\s/ \
and para =~/\{(?:~\^\s+)?(.+?)\s\[(?:\d(?:[sS]*))\]\}(?:\.\.\/\S+?\/|\S+?\.ss[tm]\b)/
txt,cmd,source,url_dir,note,manifest=nil,nil,nil,nil,nil,nil
@u=SiSU_Env::Info_env.new.url
if defined? @u.remote
if para =~/(.+?)\{(.+?)\s\[(\d[sS]*)\]\}((\S+?)\.ss[tm]\b)(.*)/m
pre,txt,cmd,source,url_dir,note="#{$1.strip} ",$2,$3,$4,$5,$6
elsif para =~/\{(.+?)\s\[(\d[sS]*)\]\}((\S+?)\.ss[tm]\b)(.*)/
pre,txt,cmd,source,url_dir,note='',$1,$2,$3,$4,$5
end
manifest="#{pre}{#{txt} }#{@u.remote}/#{url_dir}/toc.html#{note}\n\n"
else
puts "error, does currently support relative paths (reltive paths were removed, as had problems for citation, and was not suited to all output types should possibly reconsider) #{__FILE__} #{__LINE__}"
if para =~/\{(?:~\^\s+)?(.+?)\s\[(\d[sS]*)\]\}\.\.\/(\S+?)\/(\s+#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]})?/
txt,cmd,url_dir,note=$1,$2,$3,$4
manifest="{ #{txt} }../#{url_dir}/toc.html#{note}\n\n"
end
end
tuned_file_tmp << manifest
output_filetypes=output_filetypes_in_cmd(cmd,source)
output_filetypes[:gen].each do |o_f|
describe = case o_f
when /sisu_manifest.html/; '~^ document manifest'
when /toc.html/; ' html, segmented text'
when /doc.html/; ' html, scroll, document in one'
when /landscape.pdf/; ' pdf, landscape'
when /portrait.pdf/; ' pdf, portrait'
when /opendocument.odt/; ' odf:odt, open document text'
when /scroll.xhtml/; ' xhtml scroll'
when /sax.xml/; ' xml, sax'
when /dom.xml/; ' xml, dom'
when /plain.txt/; ' plain text utf-8'
#when /manpage.1/; ' man, 1'
when /wiki.txt/; ' wiki text'
when /concordance.html/; ' concordance'
when /digest.txt/; ' dcc, document content certificate (digests)'
else nil
end
if describe
tuned_file_tmp << if @u.remote #to double space <:br> at beginning of entry
if describe =~/^~\^ /
"#{Mx[:nbsp]*4} {#{describe} }#{@u.remote}/#{url_dir}/#{o_f} "
else
"#{Mx[:nbsp]*4} { #{describe} }#{@u.remote}/#{url_dir}/#{o_f} "
end
else
if describe =~/^~\^ /
"#{Mx[:nbsp]*4} {#{describe} }../#{url_dir}/#{o_f} "
else "#{Mx[:nbsp]*4} { #{describe} }../#{url_dir}/#{o_f} "
end
end
end
end
output_filetypes[:src].each do |o_f|
describe=case o_f
when /#{source}\.zip/; ' markup source (zipped) pod'
when /#{source}/; ' markup source text'
else nil
end
if describe
tuned_file_tmp << if @u.remote
x=if describe =~/zip/
"#{Mx[:nbsp]*4} {#{describe} }#{@u.src_pod}/#{o_f} "
else "#{Mx[:nbsp]*4} {#{describe} }#{@u.src_txt}/#{o_f} "
end
else
x=if describe =~/zip/
"#{Mx[:nbsp]*4} { #{describe} }../pod/#{o_f} "
else "#{Mx[:nbsp]*4} { #{describe} }../zip/#{o_f} "
end
end
end
end
tuned_file << 'group{' << tuned_file_tmp.join("\n") << '}group'
#tuned_file << 'group{' << tuned_file_tmp.join("\n").strip << '}group'
tuned_file_tmp=[]
else tuned_file << para
end
end
tuned_file
end
def substitutions_and_insertions?(data)
data_expand=[]
if data[0] =~ /^#!\s*(?:\/usr\/bin\/env sisu|\/usr\/bin\/sisu)/ # remove bang from top #! (however file is stripped, so will be removed provided no content precedes it)
data[0].gsub!(/^#!\s*\/usr\/bin\/sisu/,'')
data[0].gsub!(/^#!\s*\/usr\/bin\/env sisu/,'')
end
if data[0] =~ /^(SiSU\s+[\d.]*|sisu-[\d.]+)$/ # SiSU identifier
data[0].gsub!(/^(SiSU\s*[\d.]*)$/,'% \1')
data[0].gsub!(/^(sisu-[\d.]+)$/,'% \1')
end
data.each do |para|
if para =~/<:insert\d+!?>/ \
and para !~/^%\s+/
@skin.select
ins=SiSU_Viz::Inserts.new
case para
when /^\s*<:insert1>\s*$/
para=[]
ins.insert1.split(/\n\n/).each{|x| para << x }
when /^\s*<:insert2>\s*$/
para=[]
ins.insert2.split(/\n\n/).each{|x| para << x }
when /^\s*<:insert3>\s*$/
para=[]
ins.insert3.split(/\n\n/).each{|x| para << x << "\n"}
para=ins.insert3
when /^\s*<:insert4>\s*$/
para=[]
ins.insert4.split(/\n\n/).each{|x| para << x << "\n"}
para=ins.insert4
when /^\s*<:insert5>\s*$/
para=[]
ins.insert5.split(/\n\n/).each{|x| para << x << "\n"}
when /^\s*<:insert6>\s*$/
para=[]
ins.insert6.split(/\n\n/).each{|x| para << x << "\n"}
when /^\s*<:insert7>\s*$/
para=[]
ins.insert7.split(/\n\n/).each{|x| para << x << "\n"}
end
para.each{|x| data_expand << x }
else data_expand << para
end
data_expand.flatten!
data_expand.compact!
end
data_expand.each do |para|
para=if @md.markup_version.to_f >= 0.38
SiSU_document_structure::Structure.new(@md,para).structure_markup_normalize
else
SiSU_document_structure::Structure.new(@md,para).structure_marks
end
#para.gsub!(//,'\1') #consider, would permit use of text hyperlinks if desired, dal_syntax more appropriate?
para.gsub!(/^((?:[1-9]|:?[A-C])~\S*)\s*$/,'\1~ [Note: heading marker::required title missing]~#') #conditional header for incorporated document 2004w12
if para =~/^@\S+?:/
para.gsub!(/^@(\S+?):\s+/,"#{Mx[:meta_o]}\\1#{Mx[:meta_c]}")
para.gsub!(/^@(\S+?):([+-])\s+/,"#{Mx[:meta_o]}\\1\\2#{Mx[:meta_c]}")
end
end
end
def endnotes(data)
@tuned_file=[]
endnote_no,endnote_ref=1,1
#% endnote work zone
data.each do |para|
# manually numbered endnotes -->
if @md.mod.inspect =~/--no-asterisk|--no-annotate/
para.gsub!(/#{Mx[:en_b_o]}\s.+?#{Mx[:en_b_c]}/,'')
end
if @md.mod.inspect =~/--no-dagger|--no-annotate/
para.gsub!(/#{Mx[:en_b_o]}[+]\s.+?#{Mx[:en_b_c]}/,'')
end
unless para =~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/
case para
# auto-numbered endnotes -->
when /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}[*+]\s+.+?#{Mx[:en_b_c]}/
para.gsub!(/\s*(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/,' \1') # required 2003w31
word_mode=para.scan(/#{Mx[:gr_o]}group#{Mx[:gr_c]}\n|\n#{Mx[:gr_o]}group-end#{Mx[:gr_c]}|\S+/m)
word_mode=endnote_call_number(word_mode)
para=word_mode.join(' ')
endnote_ref+=1
when /~\^(?:\s|$)|<:e>/
#%Note inserts endnotes previously gathered from /^(|[-~]\{{3})/ (in earlier loop)
word_mode=para.scan(/#{Mx[:gr_o]}group#{Mx[:gr_c]}\n|\n#{Mx[:gr_o]}group-end#{Mx[:gr_c]}|\S+/m)
word_mode=endnote_call_number(word_mode)
para=word_mode.join(' ')
endnote_ref+=1
end
end
@tuned_file << para
end
@tuned_file=@tuned_file.flatten
end
def endnote_call_number(data)
data.each do |word|
unless data =~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/
case word
when /#{Mx[:en_a_o]}/
unless word =~/#{Mx[:en_a_o]}[*+]+/
word.gsub!(/#{Mx[:en_a_o]}/,"#{Mx[:en_a_o]}#{@@endnote_counter} ")
@@endnote_counter+=1
end
when /#{Mx[:en_b_o]}/
if word =~/#{Mx[:en_b_o]}[+]/
word.gsub!(/#{Mx[:en_b_o]}[+]/,"#{Mx[:en_b_o]}\+#{@@endnote_counter_dag} ")
@@endnote_counter_dag+=1
else
word.gsub!(/#{Mx[:en_b_o]}[*]?/,"#{Mx[:en_b_o]}\*#{@@endnote_counter_asterisk} ")
@@endnote_counter_asterisk+=1
end
when /~\^|<:e>/
word.gsub!(/~\^|<:e>/,"#{@@endnote_array[@@endnote_counter-1]}")
@@endnote_counter+=1
end
end
end
end
def metadata(data)
meta,@dc,@rc,@cvs,dctitle,add=Array.new(6){[]}
dir=SiSU_Env::Info_env.new(@md.fns)
base_html="#{dir.url.root}/#{@md.fnb}"
ocnm=ocnd=ocnv=0
ocnm+=1
header1="\n#{Mx[:lv_o_1]}meta#{Mx[:lv_c]}Document Information (metadata) #{Mx[:id_o]}~0;0:0;m#{ocnm}#{Mx[:id_c]}"
ocnm+=1
header4="\n#{Mx[:lv_o_4]}metadata#{Mx[:lv_c]}Metadata #{Mx[:id_o]}~0;m#{ocnm};m#{ocnm}#{Mx[:id_c]}"
ocnm+=1; ocnd+=1
head_no_dc="#{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
ocnm+=1; ocnd+=1
head_no_dc_tag="#{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
data.each do |para|
case para
when /^#{Mx[:meta_o]}(title|creator|author|translator|translated_by|illustrator|illustrated_by|prepared_by|digitized_by|description|publisher|contributor|date\.created|date\.issued|date\.available|date\.valid|date\.modified|date|type|format|rights|identifier|source|language)#{Mx[:meta_c]}/i
m=$1
ocnm+=1; ocnd+=1
@dc << case para
when /^#{Mx[:meta_o]}title#{Mx[:meta_c]}/
"\n#{@tr.dc_title}: #{Mx[:fa_underscore_o]}#{@md.dc_title}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}(?:creator|author)#{Mx[:meta_c]}/
"\n#{@tr.creator}: #{Mx[:fa_underscore_o]}#{@md.dc_creator}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /#{Mx[:meta_o]}(?:translator|translated_by)#{Mx[:meta_c]}/
"\n#{@tr.translator}: #{Mx[:fa_underscore_o]}#{@md.translator}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}(?:illustrator|illustrated_by)#{Mx[:meta_c]}/
"\n#{@tr.illustrator}: #{Mx[:fa_underscore_o]}#{@md.illustrator}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}prepared_by#{Mx[:meta_c]}/
"\n#{@tr.prepared_by}: #{Mx[:fa_underscore_o]}#{@md.prepared_by}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}digitized_by#{Mx[:meta_c]}/
"\n#{@tr.digitized_by}: #{Mx[:fa_underscore_o]}#{@md.digitized_by}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}description#{Mx[:meta_c]}/
"\n#{@tr.description}: #{Mx[:fa_underscore_o]}#{@md.dc_description}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}subject#{Mx[:meta_c]}/
"\n#{@tr.subject}: #{Mx[:fa_underscore_o]}#{@md.dc_subject}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}abstract#{Mx[:meta_c]}/
"\n#{@tr.abstract}: #{Mx[:fa_underscore_o]}#{@md.dc_abstract}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}publisher#{Mx[:meta_c]}/
"\n#{@tr.publisher}: #{Mx[:fa_underscore_o]}#{@md.dc_publisher}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}contributor#{Mx[:meta_c]}/
"\n#{@tr.contributor}: #{Mx[:fa_underscore_o]}#{@md.dc_contributor}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}date.created#{Mx[:meta_c]}/
"\n#{@tr.date_created}: #{Mx[:fa_underscore_o]}#{@md.dc_date_created}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}date.issued#{Mx[:meta_c]}/
"\n#{@tr.date_issued}: #{Mx[:fa_underscore_o]}#{@md.dc_date_issued}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}date.available#{Mx[:meta_c]}/
"\n#{@tr.date_available}: #{Mx[:fa_underscore_o]}#{@md.dc_date_available}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}date.modified#{Mx[:meta_c]}/
"\n#{@tr.date_modified}: #{Mx[:fa_underscore_o]}#{@md.dc_date_modified}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}date.valid#{Mx[:meta_c]}/
"\n#{@tr.date_valid}: #{Mx[:fa_underscore_o]}#{@md.dc_date_valid}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}date#{Mx[:meta_c]}/
"\n#{@tr.date}: #{Mx[:fa_underscore_o]}#{@md.dc_date}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}type#{Mx[:meta_c]}/
"\n#{@tr.type}: #{Mx[:fa_underscore_o]}#{@md.dc_type}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}format#{Mx[:meta_c]}/
"\n#{@tr.format}: #{Mx[:fa_underscore_o]}#{@md.dc_format}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}rights#{Mx[:meta_c]}/
"\n#{@tr.rights}: #{Mx[:fa_underscore_o]}#{@md.dc_rights}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}identifier#{Mx[:meta_c]}/
"\n#{@tr.identifier}: #{Mx[:fa_underscore_o]}#{@md.dc_identifier}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}source#{Mx[:meta_c]}/
"\n#{@tr.source}: #{Mx[:fa_underscore_o]}#{@md.dc_source}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}language#{Mx[:meta_c]}/
"\n#{@tr.language}: #{Mx[:fa_underscore_o]}#{@md.dc_language}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}language.original#{Mx[:meta_c]}/
"\n#{@tr.language_original}: #{Mx[:fa_underscore_o]}#{@md.language_original}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}relation#{Mx[:meta_c]}/
"\n#{@tr.relation}: #{Mx[:fa_underscore_o]}#{@md.dc_relation}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}coverage#{Mx[:meta_c]}/
"\n#{@tr.coverage}: #{Mx[:fa_underscore_o]}#{@md.dc_coverage}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}keywords#{Mx[:meta_c]}/
"\n#{@tr.keywords}: #{Mx[:fa_underscore_o]}#{@md.keywords}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}comments#{Mx[:meta_c]}/
"\n#{@tr.comments}: #{Mx[:fa_underscore_o]}#{@md.comments}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}cls_loc#{Mx[:meta_c]}/
"\n#{@cls_dewey}: #{Mx[:fa_underscore_o]}#{@md.cls_dewey}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}cls_dewey#{Mx[:meta_c]}/
"\n#{@tr.cls_dewey}: #{Mx[:fa_underscore_o]}#{@md.cls_dewey}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}(?:cls_gutenberg|cls_pg)#{Mx[:meta_c]}/
"\n#{@tr.cls_gutenberg}: #{Mx[:fa_underscore_o]}#{@md.cls_gutenberg}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
#"\n#{@tr.cls_gutenberg}: #{@md.cls_pg} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}cls_isbn#{Mx[:meta_c]}/
"\n#{@tr.cls_isbn}: #{Mx[:fa_underscore_o]}#{@md.cls_isbn}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}prefix(?:_a)?#{Mx[:meta_c]}/
"\n#{@tr.prefix_a}: #{Mx[:fa_underscore_o]}#{@md.prefix_a}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
when /^#{Mx[:meta_o]}prefix_b#{Mx[:meta_c]}/
"\n#{@tr.prefix_b}: #{Mx[:fa_underscore_o]}#{@md.prefix_b}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"
else para.gsub(/^#{Mx[:meta_o]}(#{m})\s+(.+)/m,"\n#{m.capitalize}: #{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}")
end
end
end
ocnm+=1; ocnv+=1
head_no_rc="#{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}"
ocnm+=1; ocnv+=1
head_no_rc_tag="#{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}"
data.each do |para|
case para
when /^0~(?:cvs|rcs)\+\s+/ #note the + sign to turn on use of cvs id
ocnm+=1; ocnv+=1
@cvs << "#{@tr.sc_number}: #{Mx[:fa_underscore_o]}#{@md.sc_number}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}"
ocnm+=1; ocnv+=1
@cvs << "#{@tr.sc_date}: #{Mx[:fa_underscore_o]}#{@md.sc_date}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}"
ocnm+=1; ocnv+=1
@cvs << "CVS/RCS time: #{Mx[:fa_underscore_o]}#{@md.sc_time}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}"
ocnm+=1; ocnv+=1
when /^0~cvs[+\s]/ #enable pattern above instead if you wish the default to be to include cvs tags from all documents KEEP
when /^0~cvs\s+/ #enable pattern above instead if you wish the default to be to include cvs tags from all documents KEEP
end
end
if true #default version information
ocnm+=1; ocnv+=1
if @md.sc_filename \
and @md.sc_filename.length > 3
@rc << "#{@tr.sourcefile}: #{Mx[:fa_underscore_o]}#{@md.sc_filename}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}"
else @rc << "#{@tr.sourcefile}: #{Mx[:fa_underscore_o]}#{@md.fns}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}"
end
ocnm+=1; ocnv+=1
if @md.file_encoding \
and @md.file_encoding.length > 3 #translate
@rc << "Filetype: #{Mx[:fa_underscore_o]}#{@md.file_encoding}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}"
end
ocnm+=1; ocnv+=1
if @md.dgst #change. enable by default
@rc << "#{@tr.sourcefile_digest}, #{@md.dgst[0]} #{Mx[:fa_underscore_o]}#{@md.dgst[1]}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}"
ocnm+=1; ocnv+=1
end
if @md.dgst_skin #change. enable by default
@rc << "Skin_Digest: #{@md.dgst_skin[0]} #{Mx[:fa_underscore_o]}#{@md.dgst_skin[1]}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}"
ocnm+=1; ocnv+=1
end
@rc << "#{Mx[:fa_bold_o]}Generated#{Mx[:fa_bold_c]} #{head_no_rc}" if @rc.length > 0
@rc << "#{@tr.last_generated}: #{Mx[:fa_underscore_o]}#{Time.now}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}"
ocnm+=1; ocnv+=1
if @md.sisu_version[:version]
@rc << "#{@tr.sisu_version}: #{Mx[:fa_underscore_o]}#{@md.sisu_version[:project]}#{Mx[:fa_underscore_c]} #{Mx[:fa_underscore_o]}#{@md.sisu_version[:version]}#{Mx[:fa_underscore_c]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]}) #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}"
ocnm+=1; ocnv+=1
end
@rc << "#{@tr.ruby_version}: #{Mx[:fa_underscore_o]} #{@md.ruby_version}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}"
end
meta << Mx[:br_page_new] << header1 << header4
meta << "Document Manifest @\n #{base_html}/#{@md.fn[:manifest]} #{Mx[:id_o]}~0;m#{ocnm};m#{ocnm}#{Mx[:id_c]}"
meta << "#{Mx[:fa_bold_o]}Dublin Core#{Mx[:fa_bold_c]} (DC) #{head_no_dc}" if @dc.length > 0
meta << "#{Mx[:fa_italics_o]}DC tags included with this document are provided here.#{Mx[:fa_italics_c]} #{head_no_dc_tag}" if @dc.length > 0
@dc.each { |x| meta << x }
meta << "#{Mx[:fa_bold_o]}Version Information#{Mx[:fa_bold_c]} #{head_no_rc}" if @rc.length > 0
if @cvs.length > 0
meta << "#{Mx[:fa_italics_o]}Note the version information provided here, is specific to the host site.#{Mx[:fa_italics_c]} #{head_no_rc_tag}"
@cvs.each { |x| meta << x }
end
@rc.each { |x| meta << x }
## ENDNOTE RELATED endnote related
meta << "\n#{Mx[:br_eof]}"
meta=object_digest(meta)
end
def stamped(para,hash_class)
@tuned=[]
para=strip_clean_extra_spaces(para)
digest_all=hash_class.hexdigest(para) # print "#{hash_class.name}: "; puts digest_all #length==32 or 64
stripped=strip_clean_of_markup(para)
digest_strip=hash_class.hexdigest(stripped)
unless para =~/#{Mx[:fa_o]}code#{Mx[:fa_c]}/
case para
when /#{Mx[:en_a_o]}[\d*+]+\s+.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}[*+]\d+\s+.+?#{Mx[:en_b_c]}/m
en_and_para,en_and_para_digest=[],[]
para.gsub!(/\s*(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m,' \1') #watch
para_plus_en=para.scan(/.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m)
para_tail=if para =~/(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+([\s\S]+)/m
/(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+(.*?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]})/m.match(para)[1]
else ''
end
para_plus_en << para_tail
en_and_para_digest << endnote_digest(para_plus_en)
para_new=en_and_para_digest.join(' ')
@tuned << para_new + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless para.nil?
else @tuned << para + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless para.nil?
end
else @tuned << para + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless para.nil?
end
@tuned.join
end
def object_digest(data)
# 1. clean/stripped text without any markup, paragraph, headings etc. without endnotes
# 2. endnotes clean/stripped text digest only (there may be several endnotes within a paragraph)
# 3. whole object, text with markup and any endnotes, (question: with or without the endnote digests??? presumption better without, [however may be easier to check with?])
# [digests should not include other digests]
# vim==/<[0-9a-f]\{#{@@dl}\}\(:[0-9a-f]\{#{@@dl}\}\)\?>/
require 'digest/md5'
require 'digest/sha2'
@tuned_file=[]
data.compact!
data.each do |para|
para.strip!
if para=~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}/ \
and para !~/#{Rx[:meta]}/ #test should not be necessary remove
if @env.digest.type =~/sha256/
for hash_class in [ Digest::SHA256 ]
@tuned_file << stamped(para,hash_class)
end
else
for hash_class in [ Digest::MD5 ]
@tuned_file << stamped(para,hash_class)
end
end
else @tuned_file << para unless para.nil?
end
end
@tuned_file=@tuned_file.flatten
#use md5 or to create hash of each dal object including ocn, & add into to each dal object
end
def endnote_digest(data)
para_bit=[]
data.each do |en_plus|
para_bit <<= case en_plus
when /#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/
if en_plus =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/
para_txt,en_open,en_txt,en_close=/(.*?)(#{Mx[:en_a_o]}|#{Mx[:en_b_o]})(.+?)(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m.match(en_plus)[1..4]
stripped_en=strip_clean_of_markup(en_txt)
digest_en_strip=if @env.digest.type =~/sha256/
Digest::SHA256.hexdigest(stripped_en)
else
Digest::MD5.hexdigest(stripped_en)
end
para_txt + en_open + en_txt + Mx[:id_o] + digest_en_strip + Mx[:id_c] + en_close
else puts "Error Exception - problem encountered with:\n#{en_plus}" #arbitrary exception, tidy up
end
else en_plus
end
end
para_bit.join
end
def strip_clean_extra_spaces(s) # dal output tuned
s=s.dup
s=s.gsub(/[ ]+([,.;:?](?:$|\s))/,'\1') unless s =~/#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/
s=s.gsub(/ [ ]+/,' ')
s=s.gsub(/^ [ ]+/,'')
s=s.gsub(/ [ ]+$/,'')
s=s.gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2')
s=s.gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2')
end
def strip_clean_of_markup(s) # used for digest, define rules, make same as in db clean
#consider: <\/?[ib]>|<(?:\/ )?br>|(.+?)<\/del>
s=s.dup
s=s.gsub(/(?:<\/?[ib]>|#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}|#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}|^#{Mx[:lv_o]}[1-6]:\S+?#{Mx[:lv_c]}|#{Mx[:en_a_o]}\d+\s.+?#{Mx[:en_a_c]})/m,'') # markup and endnotes removed
#% same as db clean -->
s=s.gsub(/(.+?)<\/del>/,'DELETED(\1)') # deletions
s=s.gsub(/(\d+)<\/sup>/,'[\1]')
s=s.gsub(/(?:#{Mx[:nbsp]})+/,' ')
#s=s.gsub(//,"[TABLE]\n") # tables
#s=s.gsub(//,'\1') # tables
#s=s.gsub(/¡¡\d+¡/,' ') # tables
#s=s.gsub(/¡/,' ') # tables tidy later
#s=s.gsub(/<.+?>/,'')
s=s.gsub(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:https?|file|ftp)\\\:\S+ /,' [image] ') # else image names found in search
s=s.gsub(/\s\s+/,' ')
s=s.strip
end
end
end
__END__
dal output, rules to simplify parsing
nodes === objects === paragraphs === text blocks separated by \n\n
dal output:
:verse :group and :code have -end
:table is not used