From 6811ac91f21a434fc7d967c11e1b20f33918c6ea Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 19 Mar 2012 22:07:29 -0400 Subject: v3: 3.2 branch is main (v3dv --> v3); dev (v3dv) branch directories removed * v3dv (3.2) "merged" into v3 (previously 3.1) (& removed) * conf/sisu/v3dv --> conf/sisu/v3 * data/sisu/v3dv --> data/sisu/v3 * lib/sisu/v3dv --> lib/sisu/v3 * bin/sisu* (v3dv references changed to v3) * (--dev modifier (superfluous for the time being) runs main v3 branch) --- lib/sisu/v3/concordance.rb | 127 +++++++++++++++++++++++++-------------------- 1 file changed, 70 insertions(+), 57 deletions(-) (limited to 'lib/sisu/v3/concordance.rb') diff --git a/lib/sisu/v3/concordance.rb b/lib/sisu/v3/concordance.rb index 96101585..3d8ab38a 100644 --- a/lib/sisu/v3/concordance.rb +++ b/lib/sisu/v3/concordance.rb @@ -70,7 +70,7 @@ module SiSU_Concordance class Source def initialize(opt) @opt=opt - @particulars=SiSU_Particulars::Combined_singleton.instance.get_all(opt) + @particulars=SiSU_Particulars::CombinedSingleton.instance.get_all(opt) end def read begin @@ -95,23 +95,23 @@ module SiSU_Concordance SiSU_Screen::Ansi.new(@md.opt.cmd,"wc (word count) is off, concordance will be processed for all files including those over the max set size of: #{wordmax} words").warn unless @md.opt.cmd =~/q/ SiSU_Concordance::Source::Words.new(@particulars).songsheet end - rescue; SiSU_Errors::Info_error.new($!,$@,@md.opt.cmd,@md.fns).error + rescue; SiSU_Errors::InfoError.new($!,$@,@md.opt.cmd,@md.fns).error ensure end end private - class Doc_title + class DocTitle include SiSU_Viz #revisit, both requires (html & shared_xml) needed for stand alone operation (sisu -w [filename]) require_relative 'shared_xml' # shared_xml.rb require_relative 'html' # html.rb def initialize(particulars) @particulars,@md=particulars,particulars.md - @data=SiSU_HTML::Source::Html_environment.new(particulars).tuned_file_instructions - @file=SiSU_Env::SiSU_file.new(@md) - @vz=SiSU_Env::Get_init.instance.skin + @data=SiSU_HTML::Source::HTML_Environment.new(particulars).tuned_file_instructions + @file=SiSU_Env::FileOp.new(@md) + @vz=SiSU_Env::GetInit.instance.skin txt_path=%{#{@md.dir_out}} - SiSU_Env::Info_skin.new(@md).select + SiSU_Env::InfoSkin.new(@md).select @fnb=@md.fnb @lex_button=%{SiSU home -->} @doc_details =<' + head_banner=SiSU_HTML_Format::HeadToc.new(@md) + minitoc=SiSU_HTML_MiniToc::TocMini.new(@md,@data).songsheet.join("\n") + stylesheet=SiSU_Style::CSS_HeadInfo.new(@md).stylesheet + make=SiSU_Env::ProcessingSettings.new(@md) + if make.build.manifest_minitoc? + toc='
' + minitoc + '
' + div_class='content' + else + toc='' + div_class='content0' + end + top_band=if make.build.html_top_band? + head_banner.concordance_navigation_band('pdf') + else '' + end < @@ -136,14 +147,14 @@ WOK - #{@css.html_seg} + #{stylesheet.css_head_seg} #{@vz.js_head} #{@vz.js_top} - #{head_banner.concordance_navigation_band('pdf')} + #{top_band} #{toc} -
+
#{@doc_details}

Word index links are to html versions of the text the segmented version followed by the scroll (single document) version.
[For segmented text references [T1], [T2] or [T3] appearing without a link, indicates that the word appears in a title (or subtitle) of the text (that is identifiable by the appended object citation number).]

(The word listing/index is Case sensitive: Capitalized words appear before lower case)

@@ -185,11 +196,11 @@ WOK def initialize(particulars) @particulars=particulars begin - @vz=SiSU_Env::Get_init.instance.skin + @vz=SiSU_Env::GetInit.instance.skin @env,@md,@dal_array=particulars.env,particulars.md,particulars.dal_array - @file=SiSU_Env::SiSU_file.new(@md) + @file=SiSU_Env::FileOp.new(@md) @freq=Hash.new(0) - @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern + @dp=@@dp ||=SiSU_Env::InfoEnv.new.digest.pattern @rxp_lv1=/^#{Mx[:lv_o]}1:/ #fix @rxp_lv # Mx[:lv_o] @rxp_lv2=/^#{Mx[:lv_o]}2:/ #fix @rxp_lv # Mx[:lv_o] @rxp_lv3=/^#{Mx[:lv_o]}3:/ #fix @rxp_lv # Mx[:lv_o] @@ -203,7 +214,7 @@ WOK @alph=SiSU_i18n::Alphabet.new(@md.opt.lng).hash_arrays @alphlst=SiSU_i18n::Alphabet.new(@md.opt.lng).hash_strings @rgx_scanlist=%r{#{Mx[:fa_italics_o]}[#{@alphlst[:l]}#{@alphlst[:u]}0-9"\s]{2,12}#{Mx[:fa_italics_c]}|#{Mx[:fa_bold_o]}[#{@alphlst[:l]}#{@alphlst[:u]}0-9"\s]{2,12}#{Mx[:fa_bold_c]}|(?:https?|file)://\S+|<\S+?>|[#{@alphlst[:l]}#{@alphlst[:u]}]+|\w+}mi - rescue; SiSU_Errors::Info_error.new($!,$@,@md.opt.cmd,@md.fns).error + rescue; SiSU_Errors::InfoError.new($!,$@,@md.opt.cmd,@md.fns).error end end def songsheet @@ -211,7 +222,7 @@ WOK FileUtils::mkdir_p(@file.output_path.html_concordance.dir) unless FileTest.directory?(@file.output_path.html_concordance.dir) @file_concordance=File.open(@file.place_file.html_concordance.dir,'w') map_para - rescue; SiSU_Errors::Info_error.new($!,$@,@md.opt.cmd,@md.fns).error + rescue; SiSU_Errors::InfoError.new($!,$@,@md.opt.cmd,@md.fns).error ensure @file_concordance.close end @@ -238,61 +249,63 @@ WOK @dal_array.each do |line| if defined? line.ocn \ and line.ocn.to_s =~/\d/ - if line.is =~/heading/ \ - and line.ln==4 + if (line.is ==:heading \ + || line.is ==:heading_insert) \ + && line.ln==4 @seg=line.name end ocn=line.ocn.to_s if ocn =~/\d+/ \ and ocn !~/^0$/ - line.obj.gsub!(/#{@rxp_excluded1}/,' ') + line.obj=line.obj.gsub(/#{@rxp_excluded1}/,' ') line.obj=line.obj.split(@rgx_splitlist).join(' ') #%take in word or other match for word in line.obj.scan(@rgx_scanlist) #%take in word or other match if word =~ /^([#{@alphlst[:l]}])/ firstletter=$1 flu=firstletter.tr(@alphlst[:l],@alphlst[:u]) - word.gsub!(/^#{firstletter}/,flu ) + word=word.gsub(/^#{firstletter}/,flu ) end - word.gsub!(/#{Mx[:lnk_o]}|#{Mx[:lnk_c]}|#{Mx[:url_o]}|#{Mx[:url_c]}/,'') - word.gsub!(/#{Mx[:fa_o]}\S+?#{Mx[:fa_o_c]}/,'') - word.gsub!(/#{Mx[:fa_c_o]}\S+?#{Mx[:fa_c]}/,'') - word.gsub!(/#{Mx[:gl_o]}#[a-z]+#{Mx[:gl_c]}/,'') - word.gsub!(/#{Mx[:gl_o]}#[0-9]+#{Mx[:gl_c]}/,'') - word.gsub!(/[0-9a-f]{10,}/,' ') if word =~/[0-9]/ - word.gsub!(/#{Mx[:br_line]}/,' ') - word.gsub!(/^ +/,'') - word.gsub!(/^\S$/,'') + word=word.gsub(/#{Mx[:lnk_o]}|#{Mx[:lnk_c]}|#{Mx[:url_o]}|#{Mx[:url_c]}/,''). + gsub(/#{Mx[:fa_o]}\S+?#{Mx[:fa_o_c]}/,''). + gsub(/#{Mx[:fa_c_o]}\S+?#{Mx[:fa_c]}/,''). + gsub(/#{Mx[:gl_o]}#[a-z]+#{Mx[:gl_c]}/,''). + gsub(/#{Mx[:gl_o]}#[0-9]+#{Mx[:gl_c]}/,'') + word=word.gsub(/[0-9a-f]{10,}/,' ') if word =~/[0-9]/ + word=word.gsub(/#{Mx[:br_line]}/,' '). + gsub(/^ +/,''). + gsub(/^\S$/,'') word=nil if word.empty? word=nil if word =~@rxp_excluded0 #watch word=nil if word =~/^\S$/ if word - word.gsub!(/#{Mx[:br_nl]}|#{Mx[:br_line]}/,' ') - word.gsub!(/#{Mx[:fa_o]}[a-z]{1,7}#{Mx[:fa_o_c]}|#{Mx[:fa_c_o]}[a-z]{1,7}#{Mx[:fa_c]}/,'') - word.gsub!(/#{Mx[:en_a_o]}(?:\d|[*+])*|#{Mx[:en_b_o]}(?:\d|[*+])*|#{Mx[:en_a_c]}|#{Mx[:en_b_c]}/mi,'') - word.gsub!(/#{Mx[:fa_o]}\S+?#{Mx[:fa_o_c]}/,''); word.gsub!(/#{Mx[:fa_c_o]}\S+?#{Mx[:fa_c]}/,'') - word.gsub!(/<\/?\S+?>/,'') - word.gsub!(/^\@+/,'') - word.strip! - word.gsub!(/#{Mx[:tc_p]}.+/,'') - word.gsub!(/[\.,;:"]$/,'') - word.gsub!(/["]/,'') - word.gsub!(/^\s*[\(]/,'') - word.gsub!(/[\(]\s*$/,'') - word.gsub!(/^(?:See|e\.?g\.?).+/,'') - word.gsub!(/^\s*[.,;:]\s*/,'') - word.strip! - word.gsub!(/^\(?[a-zA-Z]\)$/,'') - word.gsub!(/^\d+(st|nd|rd|th)$/,'') - word.gsub!(/^(\d+\.?)+$/, '') - word.gsub!(/#{Mx[:mk_o]}|#{Mx[:mk_c]}/,'') - word.gsub!(/:name#\S+/,'') - word.gsub!(/^\S$/,'') + word=word.gsub(/#{Mx[:br_nl]}|#{Mx[:br_line]}/,' '). + gsub(/#{Mx[:fa_o]}[a-z]{1,7}#{Mx[:fa_o_c]}|#{Mx[:fa_c_o]}[a-z]{1,7}#{Mx[:fa_c]}/,''). + gsub(/#{Mx[:en_a_o]}(?:\d|[*+])*|#{Mx[:en_b_o]}(?:\d|[*+])*|#{Mx[:en_a_c]}|#{Mx[:en_b_c]}/mi,''). + gsub(/#{Mx[:fa_o]}\S+?#{Mx[:fa_o_c]}/,''). + gsub(/#{Mx[:fa_c_o]}\S+?#{Mx[:fa_c]}/,''). + gsub(/<\/?\S+?>/,''). + gsub(/^\@+/,''). + strip. + gsub(/#{Mx[:tc_p]}.+/,''). + gsub(/[\.,;:"]$/,''). + gsub(/["]/,''). + gsub(/^\s*[\(]/,''). + gsub(/[\(]\s*$/,''). + gsub(/^(?:See|e\.?g\.?).+/,''). + gsub(/^\s*[.,;:]\s*/,''). + strip. + gsub(/^\(?[a-zA-Z]\)$/,''). + gsub(/^\d+(st|nd|rd|th)$/,''). + gsub(/^(\d+\.?)+$/, ''). + gsub(/#{Mx[:mk_o]}|#{Mx[:mk_c]}/,''). + gsub(/:name#\S+/,''). + gsub(/^\S$/,'') word=nil if word =~/^\S$/ word=nil if word =~/^\s*$/ #watch if word unless word =~/[A-Z][A-Z]/ \ or word =~/\w+\s\w+/ - word.capitalize! + word=word.capitalize end @freq[word] +=1 @word_map[word] ||= [] @@ -315,9 +328,9 @@ WOK end scr='Full Text scroll: doc#  ' seg='' - head=SiSU_Concordance::Source::Doc_title.new(@particulars).create - head.gsub!(/#{Xx[:html_relative2]}/m,@file.path_rel_links.html_seg_2) - head.gsub!(/#{Xx[:html_relative1]}/m,@file.path_rel_links.html_seg_1) + head=SiSU_Concordance::Source::DocTitle.new(@particulars).create + head=head.gsub(/#{Xx[:html_relative2]}/m,@file.path_rel_links.html_seg_2). + gsub(/#{Xx[:html_relative1]}/m,@file.path_rel_links.html_seg_1) @file_concordance << head @file_concordance << '

' alph=@alph[:u] -- cgit v1.2.3