#{@md.dc_title}
#{@md.dc_creator}
TOC - table of contents for individual articles
Full text (with indexed table of contents)
Word index links are to html versions of the text the segmented version followed by the scroll (single document) version.
[For segmented text references [T1], [T2] or [T3] appearing without a link, indicates that the word appears in a title (or subtitle) of the text (that is identifiable by the appended object citation number).]
#{@vz.banner_home_button_only} | #{@env.widget_static.search_form} |
(The word listing/index is Case sensitive: Capitalized words appear before lower case)
word (number of occurences)
linked references to word within document
[if number of occurences exceed number of references - word occurs more than once in at least one reference. Footnote/endnotes are either assigned to the paragraph from which they are referenced or ignored, so it is relevant to check the footnotes referenced from within a paragraph as well.]
(After the page is fully loaded) you can jump directly to a word by appending a hash (#) and the word to the url for this text, (do not forget that words are case sensitive, and may be listed twice (starting with and without an upper case letter)), #your_word # [ http://[web host]/#@fnb/concordance.html#your_word ]
WOK end end class Word @@word_previous='' def initialize(word,freq) @word,@freq=word,freq end def html w=if @word.capitalize==@@word_previous %{\n#@word
(#@freq)
\n\t} else n=@word.strip.gsub(/\s+/,'_') #also need to convert extended character set to html %{\n
(#@freq)
\n\t }
end
@@word_previous=@word.capitalize
w
end
end
class Words
require "#{SiSU_lib}/defaults"
require "#{SiSU_lib}/param"
include SiSU_Viz
include SiSU_Param
require "#{SiSU_lib}/html_format_css"
include SiSU_HTML_Format
require "#{SiSU_lib}/sysenv"
include SiSU_Screen
@@dp=nil
def initialize(md)
begin
@vz=SiSU_Env::Get_init.instance.skin
@md=md
@env=SiSU_Env::Info_env.new(@md.fns)
@path="#{@env.path.output}/#{@md.fnb}"
@dal_array=SiSU_DAL::Source.new(@md).get # dal file drawn here
@freq=Hash.new(0)
@dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern
@rxp_to=Regexp.new("<~(\\d+);(?:[oh]|[0-6]:)\\d+;\\w\\d+><#@dp:#@dp>$")
@rxp_lv1=Regexp.new('^1~') #line start markers removed, ('^1~') for exceptions \n\n4{{{
@rxp_lv2=Regexp.new('^2~')
@rxp_lv3=Regexp.new('^3~')
@rxp_seg=Regexp.new('^4~(.+?)\s+')
@rxp_title=Regexp.new('^0~title\s*(.+?)\s*$')
@rxp_t1=Regexp.new('^T1')
@rxp_t2=Regexp.new('^T2')
@rxp_t3=Regexp.new('^T3')
@rxp_excluded1=/(?:https?|ftp):\/\/\S+/mi
@rxp_excluded0=/^(?:to\d+|\d+| |EOF|thumb_\S+|snap_\S+|_+|-+|ii+|iv|vi+|ix|xi+|xiv|xv|xvi+|xix|xx|\S+?_\S+|[\d_]+\w\S+|[\w\d]{1,2}|\d{1,3}\w?|#@dp|[0-9a-f]{16,64}|\d{2,3}x\d{2,3}|\S{0,2}sha\d|\S{0,3}\d{4}w\d\d|\b\w\d+|\d_all\b|e\.?g\.?)$/mi #this regex causes and cures a stack dump in ruby 1.9 !!!
@rgx_scanlist=%r{(?:(?:[a-zA-Z0-9"\s]){2,7}|(?:[a-zA-Z0-9"\s]){2,7}|http://\S+)|code\{.+?\}code|<\S+?>|\w+}mi
rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error
end
end
def songsheet
begin
File.mkpath(@path) unless FileTest.directory?(@path)
@file_index_all=File.open("#@path/#{@md.fn[:concordance]}",'w')
map_para
rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error
ensure
@file_index_all.close
end
end
protected
def location_scroll(wordlocation,show)
@wordlocation=wordlocation
%{#@wordlocation; }
end
def location_seg(wordlocation,show)
@wordlocation,@show=wordlocation,show
@sfx='.html' #used for hardlinks, previous setting @sfx='', web server takes care of suffix
@word_location_seg=wordlocation.gsub(/(.+?)\#(\d+)/,"#{@md.fnl[:pre]}\\1#{@md.fnl[:mid]}#@sfx#{@md.fnl[:post]}#\\2") unless wordlocation.nil?
case @wordlocation
when @rxp_t1
%{[H]#@show, }
when @rxp_t2
%{[H]#@show, }
when @rxp_t3
%{[H]#@show, }
else %{#@show, }
end
end
def map_para
@seg,toy=nil,nil
@word_map={}
@dal_array.each do |line|
if line !~/<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/ # lines to ignore: # are added but not part of authors substantive text; 0 are mostly machine generated
if line =~@rxp_seg; @seg=line[@rxp_seg,1]
end
if line =~@rxp_to; toy=line[@rxp_to,1]
end
if toy =~/\d+/ and toy !~/^0$/
for word in line.scan(@rgx_scanlist) #%take in word or other match
word=nil if word =~@rxp_excluded0 #watch
word=nil if word =~@rxp_excluded1 #watch
if word
#word.gsub!(/<\/?[i]>/,'')
word.gsub!(/<\/?\S+?>/,'')
word.strip!
word.gsub!(/[\.,;:"]$/,'')
word.gsub!(/["]/,'')
word.gsub!(/^\s*[\(]/,'')
word.gsub!(/[\(]\s*$/,'')
word.gsub!(/^(?:See|e\.?g\.?).+/,'')
word.gsub!(/^\s*[.,;:]\s*/,'')
word.strip!
word.gsub!(/^\d+(st|nd|rd|th)$/,'')
word.gsub!(/^(\d+\.?)+$/, '')
word=nil if word =~/^\s*$/ #watch
if word
word.capitalize! unless word =~/[A-Z][A-Z]/ or word =~/\w+\s\w+/
#word.downcase! if word =~lesser
#word.capitalize! if word =~greater
@freq[word] +=1
@word_map[word] ||= []
if line !~@rxp_lv1 and line !~@rxp_lv2 and line !~@rxp_lv3
@word_map[word] << location_seg("#@seg\##{toy}",toy)
else
@word_map[word] << case line
when @rxp_lv1; location_seg('T1',toy)
when @rxp_lv2; location_seg('T2',toy)
when @rxp_lv3; location_seg('T3',toy)
end
end
end
end
end
end
end
end
scr=' scroll: doc# '
seg=''
@file_index_all << SiSU_Concordance::Source::Doc_title.new('toc',@md).create
for word in @freq.keys.sort! {|a,b| a.downcase<=>b.downcase}
keyword=SiSU_Concordance::Source::Word.new(word,@freq[word]).html
if keyword !~ @rxp_excluded0
if @word_map[word][0] =~ /\d+/
wm=[]
@file_index_all << %{#{keyword}#{seg}#{@word_map[word].uniq.compact.join}}
end
@file_index_all << '