# coding: utf-8
=begin

 * Name: SiSU

 * Description: a framework for document structuring, publishing and search

 * Author: Ralph Amissah

 * Copyright: (C) 1997 - 2009 Ralph Amissah All Rights Reserved.

 * License: GPL 3 or later:

   SiSU, a framework for document structuring, publishing and search

   Copyright (C) Ralph Amissah

   This program is free software: you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the Free
   Software Foundation, either version 3 of the License, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   more details.

   You should have received a copy of the GNU General Public License along with
   this program. If not, see <http://www.gnu.org/licenses/>.

   If you have Internet connection, the latest version of the GPL should be
   available at these locations:
   <http://www.fsf.org/licensing/licenses/gpl.html>
   <http://www.gnu.org/copyleft/gpl.html>

   <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
   <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
   <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>

 * SiSU uses:
   * Standard SiSU markup syntax,
   * Standard SiSU meta-markup syntax, and the
   * Standard SiSU object citation numbering and system

 * Hompages:
   <http://www.jus.uio.no/sisu>
   <http://www.sisudoc.org>

 * Download:
   <http://www.jus.uio.no/sisu/SiSU/download.html>

 * Ralph Amissah
   <ralph@amissah.com>
   <ralph.amissah@gmail.com>

 ** Description: system environment, resource control and configuration details

=end

module SiSU_hash
  class Object_digest
    def initialize(md,data,env=nil)
      @md,@data,@env=md,data,env
      @env ||=SiSU_Env::Info_env.new(@md.fns)
    end
    def object_digest
    # 1. clean/stripped text without any markup, paragraph, headings etc. without endnotes
    # 2. endnotes clean/stripped text digest only (there may be several endnotes within a paragraph)
    # 3. whole object, text with markup and any endnotes, (question: with or without the endnote digests??? presumption better without, [however may be easier to check with?])
    # [digests should not include other digests]
    # vim==/<[0-9a-f]\{#{@@dl}\}\(:[0-9a-f]\{#{@@dl}\}\)\?>/
      require 'digest/md5'
      require 'digest/sha2'
      data=@data
      @tuned_file=[]
      data.compact!
      data.each do |para|
        para.strip!
        if para=~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}/ \
        and para !~/#{Rx[:meta]}/ #test should not be necessary remove
          if @env.digest.type =~/sha256/
            for hash_class in [ Digest::SHA256 ]
              @tuned_file << stamped(para,hash_class)
            end
          else
            for hash_class in [ Digest::MD5 ]
              @tuned_file << stamped(para,hash_class)
            end
          end
        else @tuned_file << para unless para.nil?
        end
      end
      @tuned_file=@tuned_file.flatten
      #use md5 or to create hash of each dal object including ocn, & add into to each dal object
    end
    def endnote_digest(data)
      para_bit=[]
      data.each do |en_plus|
        para_bit <<= case en_plus
        when /#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/
          if en_plus =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/
            para_txt,en_open,en_txt,en_close=/(.*?)(#{Mx[:en_a_o]}|#{Mx[:en_b_o]})(.+?)(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m.match(en_plus)[1..4]
            stripped_en=strip_clean_of_markup(en_txt)
            digest_en_strip=if @env.digest.type =~/sha256/
              Digest::SHA256.hexdigest(stripped_en)
            else
              Digest::MD5.hexdigest(stripped_en)
            end
            para_txt + en_open + en_txt + Mx[:id_o] + digest_en_strip + Mx[:id_c] + en_close
          else puts "Error Exception - problem encountered with:\n#{en_plus}" #arbitrary exception, tidy up
          end
        else en_plus
        end
      end
      para_bit.join
    end
    def stamped(para,hash_class)
      @tuned=[]
      para=strip_clean_extra_spaces(para)
      digest_all=hash_class.hexdigest(para) # print "#{hash_class.name}: "; puts digest_all #length==32 or 64
      stripped=strip_clean_of_markup(para)
      digest_strip=hash_class.hexdigest(stripped)
      unless para =~/#{Mx[:fa_o]}code#{Mx[:fa_c]}/
        case para
        when /#{Mx[:en_a_o]}[\d*+]+\s+.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}[*+]\d+\s+.+?#{Mx[:en_b_c]}/m
          en_and_para,en_and_para_digest=[],[]
          para.gsub!(/\s*(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m,' \1') #watch
          para_plus_en=para.scan(/.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m)
          para_tail=if para =~/(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+([\s\S]+)/m
            /(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+(.*?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]})/m.match(para)[1]
            #/(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+(.+?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]})/m.match(para)[1]
          else ''
          end
          para_plus_en << para_tail
          en_and_para_digest << endnote_digest(para_plus_en)
          para_new=en_and_para_digest.join(' ')
          @tuned << para_new + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless para.nil?
        else @tuned << para + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless para.nil?
        end
      else @tuned << para + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless para.nil?
      end
      @tuned.join
    end
    def strip_clean_extra_spaces(s)                                              # dal output tuned
      s=s.dup
      s=s.gsub(/[ ]+([,.;:?](?:$|\s))/,'\1') unless s =~/#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/
      s=s.gsub(/ [ ]+/,' ')
      s=s.gsub(/^ [ ]+/,'')
      s=s.gsub(/ [ ]+$/,'')
      s=s.gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2')
      s=s.gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2')
    end
    def strip_clean_of_markup(s)                                                 # used for digest, define rules, make same as in db clean
      #consider: <\/?[ib]>|<(?:\/ )?br>|<del>(.+?)<\/del>
      s=s.dup
      s=s.gsub(/(?:<\/?[ib]>|#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}|#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}|^#{Mx[:lv_o]}[1-6]:\S+?#{Mx[:lv_c]}|#{Mx[:en_a_o]}\d+\s.+?#{Mx[:en_a_c]})/m,'') # markup and endnotes removed
                                                                                 #% same as db clean -->
      s=s.gsub(/<del>(.+?)<\/del>/,'DELETED(\1)')                             # deletions
      s=s.gsub(/<sup>(\d+)<\/sup>/,'[\1]')
      s=s.gsub(/(?:#{Mx[:nbsp]})+/,' ')
      #s=s.gsub(/<!T[h]?¡.+?!>/,"[TABLE]\n")                                   # tables
      #s=s.gsub(/<!¡¡\d+(.+?)!>/,'\1')                                         # tables
      #s=s.gsub(/¡¡\d+¡/,' ')                                                  # tables
      #s=s.gsub(/¡/,' ')                                                       # tables tidy later
      #s=s.gsub(/<.+?>/,'')
      s=s.gsub(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:https?|file|ftp)\\\:\S+ /,' [image] ')             # else image names found in search
      s=s.gsub(/\s\s+/,' ')
      s=s.strip
    end
  end
end
__END__