diff options
Diffstat (limited to 'org/harvest.org')
-rw-r--r-- | org/harvest.org | 1454 |
1 files changed, 1454 insertions, 0 deletions
diff --git a/org/harvest.org b/org/harvest.org new file mode 100644 index 00000000..ccc55123 --- /dev/null +++ b/org/harvest.org @@ -0,0 +1,1454 @@ +-*- mode: org -*- +#+TITLE: sisu harvest +#+DESCRIPTION: documents - structuring, various output representations & search +#+FILETAGS: :sisu:harvest: +#+AUTHOR: Ralph Amissah +#+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] +#+COPYRIGHT: Copyright (C) 2015 - 2021 Ralph Amissah +#+LANGUAGE: en +#+STARTUP: content hideblocks hidestars noindent entitiespretty +#+OPTIONS: H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t +#+PROPERTY: header-args :exports code +#+PROPERTY: header-args+ :noweb yes +#+PROPERTY: header-args+ :eval no +#+PROPERTY: header-args+ :results no +#+PROPERTY: header-args+ :cache no +#+PROPERTY: header-args+ :padline no + +* harvest +** html_harvest.rb + +#+BEGIN_SRC ruby :tangle "../lib/sisu/html_harvest.rb" +# <<sisu_document_header>> +module SiSU_Harvest + @@the_idx_topics,@@the_idx_authors={},{} + class Source + require_relative 'hub_options' # hub_options.rb + require_relative 'html_harvest_topics' # html_harvest_topics.rb + require_relative 'html_harvest_authors' # html_harvest_authors.rb + require_relative 'se' # se.rb + include SiSU_Env + def initialize(opt) + @opt=opt + @env=SiSU_Env::InfoEnv.new + end + def read + begin + harvest_pth=@env.path.webserv + '/' + @opt.base_stub + FileUtils::mkdir_p(harvest_pth) unless FileTest.directory?(harvest_pth) + cases(@opt,@env) + rescue + ensure + SiSU_Env::CreateSite.new(@opt).cp_css + end + end + def help + puts <<WOK + harvest --harvest extracts document index metadata + +WOK + end + def css(opt) + require_relative 'css' # css.rb + css=SiSU_Style::CSS.new + fn_css=SiSU_Env::CSS_Default.new + style=File.new("#{@env.path.pwd}/#{fn_css.harvest}",'w') + style << css.harvest + style.close + end + def cases(opt,env) + case opt.selections.str.inspect + when/--harvest/i + css(opt) if @opt.act[:maintenance][:set]==:on + SiSU_HarvestAuthors::Songsheet.new(opt,env).songsheet + SiSU_HarvestTopics::Songsheet.new(opt,env).songsheet + if @opt.act[:rsync][:set]==:on + require_relative 'remote' # remote.rb + SiSU_Remote::Put.new(opt).rsync_harvest + end + else + help + end + end + end +end +#+END_SRC + +** topics +*** html_harvest_topics.rb + +#+BEGIN_SRC ruby :tangle "../lib/sisu/html_harvest_topics.rb" +# <<sisu_document_header>> +module SiSU_HarvestTopics + require_relative 'html_harvest_author_format' # html_harvest_author_format.rb + require_relative 'html_parts' # html_parts.rb + class Songsheet + @@the_idx_topics={} + def initialize(opt,env) + @opt,@env=opt,env + @file_list=opt.files + end + def songsheet + idx_array={} + @opt.f_pths.each do |y| + lang_hash_file_array={} + name=y[:f] + filename=y[:pth] + '/' + y[:f] + File.open(filename,'r') do |file| + file.each_line("\n\n") do |line| + if line =~/^@(?:title|creator|classify):(?:\s|$)/m + lang_hash_file_array[y[:lng_is]] ||= [] + lang_hash_file_array[y[:lng_is]] << line + elsif line =~/^@\S+?:(?:\s|$)/m \ + or line =~/^(?:\s*\n|\s*$|%+ )/ + else break + end + end + end + lang_hash_file_array.each_pair do |lang,a| + idx_array[lang] ||=[] + idx_array=SiSU_HarvestTopics::Harvest.new( + @opt, + @env, + a, + filename, + name, + idx_array, + lang + ).extract_harvest + end + end + the_hash=SiSU_HarvestTopics::Index.new( + @opt, + @env, + idx_array, + @@the_idx_topics + ).song + SiSU_HarvestTopics::OutputIndex.new( + @opt, + the_hash + ).html_print.html_songsheet + end + end + class Mix + def spaces + Ax[:spaces] + end + end + class Harvest + def initialize(opt,env,data,filename,name,idx_array,lang) + @opt, @env,@data,@filename,@name,@idx_array,@lang= + opt,env, data, filename, name, idx_array, lang + end + def extract_harvest + data, filename, name, idx_array, lang= + @data,@filename,@name,@idx_array,@lang + @idx_lst=@title=@subtitle=@fulltitle=@author=@author_format=nil + rgx={} + rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m + rgx[:title]=/^@title:[ ]+(.+)/ + rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m + rgx[:idx]=/^@classify:.+?:topic_register:[ ]+(.+?)(?:\n\n|\n\s+:\S|\n%)/m + data.each do |para| + if para=~ rgx[:idx] + @idx_list=(rgx[:idx].match(para)[1]).split(/\s*\n\s*/).join + end + if para=~ rgx[:title] + @title=rgx[:title].match(para)[1] + end + if para=~ rgx[:subtitle] + @subtitle=rgx[:subtitle].match(para)[1] + end + if para=~ rgx[:author] + @author_format=rgx[:author].match(para)[1] + end + break if @title && @subtitle && @author && @idx_lst + end + @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title + if @title \ + and @author_format \ + and @idx_list + creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details + @authors,@authorship=creator[:authors],creator[:authorship] + file=if name=~/~[a-z]{2,3}\.ss[mt]$/ + name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') + else + name.sub(/\.ss[mt]$/,'') + end + page=if @env.output_dir_structure.by? == :language + "#{lang}/sisu_manifest.html" + else + "sisu_manifest.#{lang}.html" + end + idx_array[lang] <<=if @idx_list =~/;/ + g=@idx_list.scan(/[^;]+/) + g.each.map do |i| + i=i.strip + { + filename: filename, + file: file, + rough_idx: i, + title: @fulltitle, + author: creator, + page: page, + lang: lang + } + end + else { + filename: filename, + file: file, + rough_idx: @idx_list, + title: @fulltitle, + author: creator, + page: page, + lang: lang, + } + end + else + if (@opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) + p "missing required field in #{@filename} - [title]: <#{@title}>; [author]: <#{@author_format}>; [idx]: <#{@idx_list}>" + end + end + idx_array[lang]=idx_array[lang].flatten + idx_array + end + end + class Index < Mix + def initialize(opt,env,idx_array,the_idx) + @opt, @env,@idx_array,@the_idx= + opt,env, idx_array, the_idx + @@the_idx_topics=@the_idx + end + def song + the_idx=construct_book_topic_keys + construct_book_topic_hash(the_idx) + end + def capital(txt) + txt_a=txt.scan(/\S+/) + tx='' + txt_a.each do |t| + tx += t[0].chr.capitalize + t[1,txt.length] + ' ' + end + tx.strip + end + def capital_(txt) + txt[0].chr.capitalize + txt[1,txt.length] + end + def contents(idx,lang) + names='' + idx[:author][:last_first_format_a].each do |n| + s=n.sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') + names=if @env.output_dir_structure.by? == :language + names += %{<a href="authors.html##{s}">#{n}</a>, } + else + names += %{<a href="authors.#{lang}.html##{s}">#{n}</a>, } + end + end + { + filename: idx[:filename], + file: idx[:file], + author: names, + title: idx[:title], + page: idx[:page] + } + end + def capital_(txt) + txt[0].chr.capitalize + txt[1,txt.length] + end + def key_create(c,alt) + x=nil + x=if c.length==6 + c[0].to_s + '|' + + capital(c[1][0].to_s) + '|' + + capital(c[2][0].to_s) + '|' + + capital(c[3][0].to_s) + '|' + + capital(alt.to_s) + elsif c.length==5 + c[0].to_s + '|' + + capital(c[1][0].to_s) + '|' + + capital(c[2][0].to_s) + '|' + + capital(alt.to_s) + elsif c.length==4 + c[0].to_s + '|' + + capital(c[1][0].to_s) + '|' + + capital(alt.to_s) + elsif c.length==3 + c[0].to_s + '|' + + capital(alt.to_s) + end + end + def construct_book_topic_keys + idx_array=@idx_array + @idx_a=[] + @the_a=[] + idx_array.each_pair do |lang,idx_arr| + @@the_idx_topics[lang] ||= {} + idx_arr.each do |idx| + if idx[:rough_idx] + idx_lst=idx[:rough_idx].scan(/[^:]+/) + else + puts "no topic register in: << #{idx[:filename]} >>" + next + end + idx_a=[] + idx_lst.each do |c| + idx_a << c.scan(/[^|\n]+/m) + end + idx_a << contents(idx,lang) + @idx_a << [lang] + idx_a + end + end + @idx_a.each do |c| + if c.length > 1 \ + and c.is_a?(Array) + if c[2].is_a?(Hash) + c[1].each do |alt| + v=key_create(c,alt) + @the_a << [v, c[2]] if v + end + end + end + if c.length > 2 \ + and c.is_a?(Array) + if c[3].is_a?(Hash) + c[2].each do |alt| + v=key_create(c,alt) + @the_a << [v, c[3]] if v + end + end + end + if c.length > 3 \ + and c.is_a?(Array) + if c[4].is_a?(Hash) + c[3].each do |alt| + v=key_create(c,alt) + @the_a << [v, c[4]] if v + end + end + end + if c.length > 4 \ + and c.is_a?(Array) + if c[5].is_a?(Hash) + c[4].each do |alt| + v=key_create(c,alt) + @the_a << [v, c[5]] if v + end + end + end + if c.length > 5 \ + and c.is_a?(Array) + if c[6].is_a?(Hash) + c[5].each do |alt| + v=key_create(c,alt) + @the_a << [v, c[6]] if v + end + end + end + end + @the_a.sort_by { |x| x[0] } #; y.each {|z| puts z} + end + def construct_book_topic_hash(t) + @the_h={} + t.each do |z| + x=z[0].scan(/[^|]+/) + depth=x.length + extract=(depth-1) + k=case extract + when 4 + { x[0] => { x[1] => { x[2] => { x[3] => { x[4] => z[1] } } } } } + when 3 + { x[0] => { x[1] => { x[2] => { x[3] => z[1] } } } } + when 2 + { x[0] => { x[1] => { x[2] => z[1] } } } + when 1 + { x[0] => { x[1] => z[1] } } + when 0 + { x[0] => z[1] } + end + if extract >= 0 + k.each_pair do |x0,y0| + if extract == 0 + @the_h[x0] ||={ md: [] } + @the_h[x0][:md] << y0 + else + @the_h[x0] ||={} + end + #puts spaces*0 + x0 + if extract >= 1 + y0.each_pair do |x1,y1| + if extract == 1 + @the_h[x0][x1] ||={ md: [] } + @the_h[x0][x1][:md] << y1 + else + @the_h[x0][x1] ||={} + end + #puts spaces*1 + x1 + if extract >= 2 + y1.each_pair do |x2,y2| + if extract == 2 + @the_h[x0][x1][x2] ||={ md: [] } + @the_h[x0][x1][x2][:md] << y2 + else + @the_h[x0][x1][x2] ||={} + end + #puts spaces*2 + x2 + if extract >= 3 + y2.each_pair do |x3,y3| + if extract == 3 + @the_h[x0][x1][x2][x3] ||={ md: [] } + @the_h[x0][x1][x2][x3][:md] << y3 + else + @the_h[x0][x1][x2][x3] ||={} + end + #puts spaces*3 + x3 + if extract == 4 + y3.each_pair do |x4,y4| + if extract == 4 + @the_h[x0][x1][x2][x3][x4] ||={ md: [] } + @the_h[x0][x1][x2][x3][x4][:md] << y4 + else + @the_h[x0][x1][x2][x3][x4] ||={} + end + #puts spaces*4 + x4 + if extract == 5 + y4.each_pair do |x5,y5| + if extract == 5 + @the_h[x0][x1][x2][x3][x4][x5] ||={ md: [] } + @the_h[x0][x1][x2][x3][x4][x5][:md] << y5 + end + #puts spaces*5 + x5 + end + end + end + end + end + end + end + end + end + end + end + end + end + #@the_h.each_pair { |x,y| p x; p y } + @the_h + end + def traverse_base + @the_h.each_pair do |x0,y0| + puts spaces*0 + x0 if x0.is_a?(String) + if y0.is_a?(Hash) + y0.each_pair do |x1,y1| + puts spaces*1 + x1 if x1.is_a?(String) + if y1.is_a?(Hash) + y1.each_pair do |x2,y2| + puts spaces*2 + x2 if x2.is_a?(String) + if y2.is_a?(Hash) + y2.each_pair do |x3,y3| + puts spaces*3 + x3 if x3.is_a?(String) + if y3.is_a?(Hash) + y3.each_pair do |x4,y4| + puts spaces*4 + x4 if x4.is_a?(String) + if y4.is_a?(Hash) + y4.each_pair do |x5,y5| + puts spaces*5 + x5 if x5.is_a?(String) + end + end + end + end + end + end + end + end + end + end + end + end + def traverse + @the_h.each_pair do |x0,y0| + puts spaces*0 + x0 if x0.is_a?(String) + if y0.is_a?(Hash) + if y0.has_key?(:md) + y0[:md].each { |x| puts spaces*5 + x[:title] } + end + y0.each_pair do |x1,y1| + puts spaces*1 + x1 if x1.is_a?(String) + if y1.is_a?(Hash) + if y1.has_key?(:md) + y1[:md].each { |x| puts spaces*5 + x[:title] } + end + y1.each_pair do |x2,y2| + puts spaces*2 + x2 if x2.is_a?(String) + if y2.is_a?(Hash) + if y2.has_key?(:md) + y2[:md].each { |x| puts spaces*5 + x[:title] } + end + y2.each_pair do |x3,y3| + puts spaces*3 + x3 if x3.is_a?(String) + if y3.is_a?(Hash) + if y3.has_key?(:md) + y3[:md].each { |x| puts spaces*5 + x[:title] } + end + y3.each_pair do |x4,y4| + puts spaces*4 + x4 if x4.is_a?(String) + if y4.is_a?(Hash) + if y4.has_key?(:md) + y4[:md].each { |x| puts spaces*5 + x[:title] } + end + y4.each_pair do |x5,y5| + puts spaces*5 + x4 if x4.is_a?(String) + end + end + end + end + end + end + end + end + end + end + end + end + end + class OutputIndex < Mix + require_relative 'i18n' # i18n.rb + def initialize(opt,the_idx) + @opt,@the_idx=opt,the_idx + @env=SiSU_Env::InfoEnv.new + @rc=SiSU_Env::GetInit.new.sisu_yaml.rc + @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] + @alph=@alphabet_list.dup + @letter=@alph.shift + end + def html_file_open + @the_idx.keys.each do |lng| + @output ||={} + @output[lng] ||={} + harvest_pth,file='','' + if @env.output_dir_structure.by? == :language + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + '/' \ + + lng + '/' \ + + 'manifest' + file=harvest_pth + '/' + 'topics.html' + elsif @env.output_dir_structure.by? == :filetype + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + '/' \ + + 'manifest' + file=harvest_pth + '/' + 'topics.' + lng + '.html' + elsif @env.output_dir_structure.by? == :filename + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + file=harvest_pth + '/' + 'topics.' + lng + '.html' + end + FileUtils::mkdir_p(harvest_pth) \ + unless FileTest.directory?(harvest_pth) + fileinfo=(@opt.act[:verbose][:set]==:on \ + || @opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:urls_selected][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) \ + ? ("file://#{file}") + : '' + SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + "harvest topics(#{@opt.files.length} files)", + fileinfo + ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on + @output[lng][:html]=File.new(file,'w') + if @opt.act[:maintenance][:set]==:on + @output[lng][:html_mnt]=File.new("#{@env.path.pwd}/topics.html",'w') + end + end + end + def html_file_close + @the_idx.keys.each do |lng| + @output[lng][:html].close + @output[lng][:html_mnt].close if @output[lng][:html_mnt].is_a?(File) + end + end + def html_print + def html_songsheet + #traverse + html_file_open + html_head + html_alph + html_body_traverse + html_tail + html_file_close + end + def html_body_traverse + @the_idx.each_pair do |x0,y0| + lng=x0 + if x0.is_a?(String) + #do_string_name(lng,'lev0',x0) + #puts spaces*0 + x0 + end + if y0.is_a?(Hash) + if y0.has_key?(:md) + y0[:md].each do |x| + #do_hash(lng,attrib,x) #lv==0 ? + #puts spaces*5 + x[:title] + end + end + y0.each_pair do |x1,y1| + if x1.is_a?(String) + do_string_name(lng,'lev0',x1) + #puts spaces*1 + x1 + end + if y1.is_a?(Hash) + if y1.has_key?(:md) + y1[:md].each do |x| + do_hash(lng,0,x) + #puts spaces*5 + x[:title] + end + end + y1.each_pair do |x2,y2| + if x2.is_a?(String) + do_string(lng,'lev1',x2) + #puts spaces*2 + x2 + end + if y2.is_a?(Hash) + if y2.has_key?(:md) + y2[:md].each do |x| + do_hash(lng,1,x) + #puts spaces*5 + x[:title] + end + end + y2.each_pair do |x3,y3| + if x3.is_a?(String) + do_string(lng,'lev2',x3) + #puts spaces*3 + x3 + end + if y3.is_a?(Hash) + if y3.has_key?(:md) + y3[:md].each do |x| + do_hash(lng,2,x) + #puts spaces*5 + x[:title] + end + end + y3.each_pair do |x4,y4| + if x4.is_a?(String) + do_string(lng,'lev3',x4) + #puts spaces*4 + x4 + end + if y4.is_a?(Hash) + if y4.has_key?(:md) + y4[:md].each do |x| + do_hash(lng,3,x) + #puts spaces*5 + x[:title] + end + end + y4.each_pair do |x5,y5| + if x5.is_a?(String) + do_string(lng,'lev4',x5) + #puts spaces*5 + x5 + end + end + end + end + end + end + end + end + end + end + end + end + end + def html_head_adjust(lng,type='') + css_path,authors='','' + if @env.output_dir_structure.by? == :language + css_path=(type !~/maintenance/) \ + ? '../../_sisu/css/harvest.css' + : 'harvest.css' + authors='authors.html' + elsif @env.output_dir_structure.by? == :filetype + css_path=(type !~/maintenance/) \ + ? '../_sisu/css/harvest.css' + : 'harvest.css' + authors="authors.#{lng}.html" + elsif @env.output_dir_structure.by? == :filename + css_path=(type !~/maintenance/) \ + ? './_sisu/css/harvest.css' + : 'harvest.css' + authors="authors.#{lng}.html" + end + ln=SiSU_i18n::Languages.new.language.list + harvest_languages='' + @the_idx.keys.each do |lg| + if @env.output_dir_structure.by? == :language + harvest_pth="../../#{lg}/manifest" + file=harvest_pth + '/' + 'topics.html' + elsif @env.output_dir_structure.by? == :filetype + harvest_pth='.' + file=harvest_pth + '/' + 'topics.' + lg + '.html' + elsif @env.output_dir_structure.by? == :filename + harvest_pth='.' + file=harvest_pth + '/topics.' + lg + '.html' + end + l=ln[lg][:t] + harvest_languages += + %{<a href="#{file}">#{l}</a> } + end + sv=SiSU_Env::InfoVersion.instance.get_version + if @env.output_dir_structure.by? == :language + home_pth='../..' + output_structure_by='(output organised by language & filetype)' + elsif @env.output_dir_structure.by? == :filetype + home_pth='..' + output_structure_by='(output organised by filetype)' + elsif @env.output_dir_structure.by? == :filename + home_pth='.' + output_structure_by='(output organised by filename)' + else + home_pth='.' + output_structure_by='(output organised by ?)' + end + <<WOK +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8"> +<title>SiSU Metadata Harvest - Topics</title> +<meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> +<meta name="dc.title" content= "SiSU metadata harvest, Topics - SiSU information Structuring Universe, Structured information Serialised Units" /> +<meta name="dc.subject" content= "document structuring, ebook, publishing, PDF, LaTeX, XML, ODF, SQL, postgresql, sqlite, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, granular search, digital library" /> +<meta name="generator" content="#{sv.project} #{sv.version} of #{sv.date_stamp} (n*x and Ruby!)" /> +<link rel="generator" href="http://www.jus.uio.no/sisu/SiSU" /> +<link href="#{css_path}" rel="stylesheet"> +<link rel="shortcut icon" href="../_sisu/image/rb7.ico" /> +</head> +<body lang="en" xml:lang="en"> +<a name="top" id="top"></a> +<a name="up" id="up"></a> +<a name="start" id="start"></a> +<h1>SiSU Metadata Harvest - Topics #{output_structure_by}</h1> +<p>[<a href="#{home_pth}/index.html"> HOME </a>] also see <a href="#{authors}">SiSU Metadata Harvest - Authors</a></p> +<p>#{@env.widget_static.search_form}</p> +<hr /> +<p class="tiny">#{harvest_languages}</p> +<hr /> +WOK + end + def html_head + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] \ + << html_head_adjust(lng,'maintenance') \ + if @opt.act[:maintenance][:set]==:on + @output[lng][:html] << html_head_adjust(lng) + end + end + def html_alph + a=[] + a << '<p>' + @alph.each do |x| + a << ((x =~/[0-9]/) \ + ? '' + : %{<a href="##{x}">#{x}</a>, }) + end + a=a.join + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a \ + if @opt.act[:maintenance][:set]==:on + @output[lng][:html] << a + end + end + def html_tail + a =<<WOK +<hr /> +<a name="bottom" id="bottom"></a> +<a name="down" id="down"></a> +<a name="end" id="end"></a> +<a name="finish" id="finish"></a> +<a name="stop" id="stop"></a> +<a name="credits"></a> +#{SiSU_Proj_HTML::Bits.new.credits_sisu} +</body> +</html> +WOK + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a \ + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html] << a + end + end + def do_html(lng,html) + @output[lng][:html] << html + end + def do_html_maintenance(lng,html) + @output[lng][:html_mnt] << html \ + if @output[lng][:html_mnt].is_a?(File) + end + def do_string(lng,attrib,string) + html=%{<p class="#{attrib}">#{string}</p>} + do_html(lng,html) + do_html_maintenance(lng,html) \ + if @output[lng][:html_mnt].is_a?(File) + end + def do_string_default(lng,attrib,string) + html=%{<p class="#{attrib}">#{string}</p>} + do_html(lng,html) + end + def do_string_maintenance(lng,attrib,string) + html=%{<p class="#{attrib}">#{string}</p>} + do_html_maintenance(lng,html) \ + if @output[lng][:html_mnt].is_a?(File) + end + def do_string_name(lng,attrib,string) + f=/^(\S)/.match(string)[1] + if @lng != lng + @alph=@alphabet_list.dup + @letter=@alph.shift + @lng = lng + end + if @letter < f + while @letter < f + if @alph.length > 0 + @letter=@alph.shift + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html_mnt] \ + << %{\n<p class="letter"><a name="#{@letter}">#{@letter}</a></p><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>} + end + @output[lng][:html] \ + << %{\n<p class="letter"><a name="#{@letter}">#{@letter}</a></p><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>} + else break + end + end + end + name=string.strip.gsub(/\s+/,'_') + html=%{<p class="#{attrib}"><a name="#{name}">#{string}</a></p>} + do_html(lng,html) + do_html_maintenance(lng,html) \ + if @output[lng][:html_mnt].is_a?(File) + end + def do_array(lng,lv,array) + lv+=1 + array.each do |b| + do_case(lng,lv,b) + end + end + def do_hash_md(lng,attrib,hash) + lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert + manifest_at=if @env.output_dir_structure.by? == :language + hash[:file] + Sfx[:html] + elsif @env.output_dir_structure.by? == :filetype + hash[:file] + lang_code_insert + Sfx[:html] + elsif @env.output_dir_structure.by? == :filename + "./#{hash[:file]}/#{hash[:page]}" + else '' #error + end + html=%{<a href="#{manifest_at}">#{hash[:title]}</a> - #{hash[:author]}} + do_string_default(lng,attrib,html) + end + def do_hash_md_maintenance(lng,attrib,hash) + if @output[lng][:html_mnt].is_a?(File) #should not be run for presentation output + html=%{[<a href="#{hash[:file]}.sst">src</a>] <a href="file://#{@env.path.output}/#{hash[:file]}/#{hash[:page]}">#{hash[:title]}</a> - #{hash[:author]}} + do_string_maintenance(lng,attrib,html) + end + end + def do_hash(lng,lv,hash) + lv+=1 + key=[] + hash.each_key do |m| + if m == :md + do_case(lng,lv,hash[m]) + elsif m != :title \ + and m != :author \ + and m != :filename \ + and m != :file \ + and m != :rough_idx \ + and m != :page + key << m + elsif m == :title + do_hash_md(lng,'work',hash) + do_hash_md_maintenance(lng,'work',hash) + end + end + if key.length > 0 + key.sort.each do |m| + attrib="lev#{lv}" + lv==0 ? do_string_name(lng,attrib,m) : do_string(lng,attrib,m) + do_case(lng,lv,hash[m]) + end + end + end + def do_case(lng,lv,a) + case a + when String + attrib="lev#{lv}" + if a=~/S/ + lv==0 ? do_string_name(lng,attrib,a) : do_string(lng,attrib,a) + end + when Array + do_array(lng,lv,a) + when Hash + do_hash(lng,lv,a) + end + end + #def html_body + # the_idx=@the_idx + # the_idx.each_pair do |lng,lng_array| + # lng_array.sort.each do |a| + # do_case(lng,-1,a) + # end + # end + #end + self + end + end +end +__END__ +terms -|_ t{tl1} -|_ {fa}[fa]{filenames and other details} + | |_ {tl2} -|_ {fa}[fa]{filenames and other details} + | | |_{tl3} -|_ {fa}[fa]{filenames and other details} + | | | |_{tl4} - {fa}[fa]{filenames and other details} + | | | | + | | | |_{tl4a} - {fa}[fa]{filenames and other details} + | | | | + | | | |_{tl4b} - {fa}[fa]{filenames and other details} + | | | | + | | | |_ ... + | | | + | | |_{tl3a} - {fa}[fa]{filenames and other details} + | | + | |_{tl2a} - {fa}[fa]{filenames and other details} + | + |_ t{tl1a} -|_ {fa}[fa]{filenames and other details} + |_ ... +#+END_SRC + +** authors +*** html_harvest_authors.rb + +#+BEGIN_SRC ruby :tangle "../lib/sisu/html_harvest_authors.rb" +# <<sisu_document_header>> +module SiSU_HarvestAuthors + require_relative 'html_harvest_author_format' # html_harvest_author_format.rb + require_relative 'html_parts' # html_parts.rb + class Songsheet + @@the_idx_authors={} + def initialize(opt,env) + @opt,@env=opt,env + @file_list=opt.files + end + def songsheet + idx_array={} + @opt.f_pths.each do |y| + lang_hash_file_array={} + name=y[:f] + filename=y[:pth] + '/' + y[:f] + File.open(filename,'r') do |file| + file.each_line("\n\n") do |line| + if line =~/^@(?:title|creator|date):(?:\s|$)/m + lang_hash_file_array[y[:lng_is]] ||= [] + lang_hash_file_array[y[:lng_is]] << line + elsif line =~/^@\S+?:(?:\s|$)/m \ + or line =~/^(?:\s*\n|%+ )/ + else break + end + end + end + lang_hash_file_array.each_pair do |lang,a| + idx_array[lang] ||= [] + idx_array=SiSU_HarvestAuthors::Harvest.new( + @opt, + @env, + a, + filename, + name, + idx_array, + lang + ).extract_harvest + end + end + the_idx=SiSU_HarvestAuthors::Index.new( + idx_array, + @@the_idx_authors + ).construct_book_author_index + SiSU_HarvestAuthors::OutputIndex.new( + @opt, + the_idx + ).html_print.html_songsheet + end + end + class Harvest + def initialize(opt,env,data,filename,name,idx_array,lang) + @opt, @env,@data,@filename,@name,@idx_array,@lang= + opt,env, data, filename, name, idx_array, lang + end + def extract_harvest + data, filename, name, idx_array, lang = + @data,@filename,@name,@idx_array,@lang + @title=@subtitle=@fulltitle=@author=@author_format=@date=nil + @authors=[] + rgx={} + rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m + rgx[:title]=/^@title:[ ]+(.+)/ + rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m + rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m + data.each do |para| + if para=~ rgx[:title] + @title=rgx[:title].match(para)[1] + end + if para=~ rgx[:subtitle] + @subtitle=rgx[:subtitle].match(para)[1] + end + if para=~ rgx[:author] + @author_format=rgx[:author].match(para)[1] + end + if para=~ rgx[:date] + @date=rgx[:date].match(para)[1] + end + break if @title && @subtitle && @author && @date + end + @fulltitle=@subtitle \ + ? (@title + ' - ' + @subtitle) + : @title + if @title \ + and @author_format + creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details + @authors,@authorship=creator[:authors],creator[:authorship] + file=if name=~/~[a-z]{2,3}\.ss[mt]$/ + name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') + else + name.sub(/\.ss[mt]$/,'') + end + page=if @env.output_dir_structure.by? == :language + "#{lang}/sisu_manifest.html" + else + "sisu_manifest.#{lang}.html" + end + idx_array[lang] <<= { + filename: filename, + file: file, + date: @date, + title: @fulltitle, + author: creator, + page: page, + lang: lang + } + else + #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}" + end + idx_array[lang]=idx_array[lang].flatten + idx_array + end + end + class Index + def initialize(idx_array,the_idx) + @idx_array,@the_idx=idx_array,the_idx + @@the_idx_authors=@the_idx + end + def capital(txt) + txt[0].chr.capitalize + txt[1,txt.length] + end + def construct_book_author_index + idx_array=@idx_array + idx_array.each_pair do |lang,idx_arr| + @@the_idx_authors[lang] ||= {} + idx_arr.each do |idx| + idx[:author][:last_first_format_a].each do |author| + author=author.strip + if @@the_idx_authors[lang][author].is_a?(NilClass) + @@the_idx_authors[lang][author]={ md: [] } + end + @@the_idx_authors[lang][author][:md] << { + filename: idx[:filename], + file: idx[:file], + author: idx[:author], + title: idx[:title], + date: idx[:date], + page: idx[:page], + lang: idx[:lang] + } + end + end + end + @the_idx=@@the_idx_authors + end + end + class OutputIndex + require_relative 'i18n' # i18n.rb + def initialize(opt,the_idx) + @opt,@the_idx=opt,the_idx + @env=SiSU_Env::InfoEnv.new + @rc=SiSU_Env::GetInit.new.sisu_yaml.rc + @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] + @alph=@alphabet_list.dup + @letter=@alph.shift + end + def html_file_open + @the_idx.keys.each do |lng| + @output ||={} + @output[lng] ||={} + harvest_pth,file='','' + if @env.output_dir_structure.by? == :language + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + '/' \ + + lng + '/' \ + + 'manifest' + file="#{harvest_pth}/authors.html" + elsif @env.output_dir_structure.by? == :filetype + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + '/' \ + + 'manifest' + file="#{harvest_pth}/authors.#{lng}.html" + elsif @env.output_dir_structure.by? == :filename + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + file="#{harvest_pth}/authors.#{lng}.html" + end + FileUtils::mkdir_p(harvest_pth) \ + unless FileTest.directory?(harvest_pth) + fileinfo=(@opt.act[:verbose][:set]==:on \ + || @opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:urls_selected][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) \ + ? ("file://#{file}") : '' + SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + "harvest authors (#{@opt.files.length} files)", + fileinfo + ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on + @output[lng][:html]=File.new(file,'w') + end + end + def html_file_close + @the_idx.keys.each do |lng| + @output[lng][:html].close + @output[lng][:html_mnt].close \ + if @output[lng][:html_mnt].is_a?(File) + end + end + def html_print + def html_songsheet + html_file_open + html_head + html_alph + html_body + html_tail + html_file_close + end + def html_head_adjust(lng,type='') + css_path,topics='','' + if @env.output_dir_structure.by? == :language + css_path=(type !~/maintenance/) \ + ? '../../_sisu/css/harvest.css' + : 'harvest.css' + topics='topics.html' + elsif @env.output_dir_structure.by? == :filetype + css_path=(type !~/maintenance/) \ + ? '../_sisu/css/harvest.css' + : 'harvest.css' + topics="topics.#{lng}.html" + elsif @env.output_dir_structure.by? == :filename + css_path=(type !~/maintenance/) \ + ? './_sisu/css/harvest.css' + : 'harvest.css' + topics="topics.#{lng}.html" + end + ln=SiSU_i18n::Languages.new.language.list + harvest_languages='' + @the_idx.keys.each do |lg| + if @env.output_dir_structure.by? == :language + harvest_pth="../../#{lg}/manifest" + file="#{harvest_pth}/authors.html" + elsif @env.output_dir_structure.by? == :filetype + harvest_pth='.' + file="#{harvest_pth}/authors.#{lg}.html" + elsif @env.output_dir_structure.by? == :filename + harvest_pth='.' + file="#{harvest_pth}/authors.#{lg}.html" + end + l=ln[lg][:t] + harvest_languages += + %{<a href="#{file}">#{l}</a> } + end + sv=SiSU_Env::InfoVersion.instance.get_version + if @env.output_dir_structure.by? == :language + home_pth='../..' + output_structure_by= + '(output organised by language & filetype)' + elsif @env.output_dir_structure.by? == :filetype + home_pth='..' + output_structure_by= + '(output organised by filetype)' + elsif @env.output_dir_structure.by? == :filename + home_pth='.' + output_structure_by= + '(output organised by filename)' + else + home_pth='.' + output_structure_by='(output organised by ?)' + end + <<WOK +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8"> +<title>SiSU Metadata Harvest - Authors</title> +<meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> +<meta name="dc.title" content= "SiSU metadata harvest, Authors - SiSU information Structuring Universe, Structured information Serialised Units" /> +<meta name="dc.subject" content= "document structuring, ebook, publishing, PDF, LaTeX, XML, ODF, SQL, postgresql, sqlite, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, granular search, digital library" /> +<meta name="generator" content="#{sv.project} #{sv.version} of #{sv.date_stamp} (n*x and Ruby!)" /> +<link rel="generator" href="http://www.jus.uio.no/sisu/SiSU" /> +<link href="#{css_path}" rel="stylesheet" > +<link rel="shortcut icon" href="../_sisu/image/rb7.ico" /> +</head> +<body lang="en" xml:lang="en"> +<a name="top" id="top"></a> +<a name="up" id="up"></a> +<a name="start" id="start"></a> +<h1>SiSU Metadata Harvest - Authors #{output_structure_by}</h1> +<p>[<a href="#{home_pth}/index.html"> HOME </a>] also see <a href="#{topics}">SiSU Metadata Harvest - Topics</a></p> +<p>#{@env.widget_static.search_form}</p> +<hr /> +<p class="tiny">#{harvest_languages}</p> +<hr /> +WOK + end + def html_head + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] \ + << html_head_adjust(lng,'maintenance') \ + if @opt.act[:maintenance][:set]==:on + @output[lng][:html] \ + << html_head_adjust(lng) + end + end + def html_alph + a=[] + a << '<p>' + @alph.each do |x| + a << ((x =~/[0-9]/) \ + ? '' + : %{<a href="##{x}">#{x}</a>, }) + end + a=a.join + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a \ + if @opt.act[:maintenance][:set]==:on + @output[lng][:html] << a + end + end + def html_tail + a =<<WOK +<hr /> +<a name="bottom" id="bottom"></a> +<a name="down" id="down"></a> +<a name="end" id="end"></a> +<a name="finish" id="finish"></a> +<a name="stop" id="stop"></a> +<a name="credits"></a> +#{SiSU_Proj_HTML::Bits.new.credits_sisu} +</body> +</html> +WOK + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a \ + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html] << a + end + end + def do_html(lng,html) + @output[lng][:html_mnt] << html \ + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html] << html + end + def do_string_name(lng,attrib,string) + f=/^(\S)/.match(string[0])[1] + if @lng != lng + @alph=@alphabet_list.dup + @letter=@alph.shift + @lng = lng + end + if @letter < f + while @letter < f + if @alph.length > 0 + @letter=@alph.shift + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html_mnt] \ + << %{\n<p class="letter"><a name="#{@letter}"></p>#{@letter}</a><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>} + end + @output[lng][:html] \ + << %{\n<p class="letter"><a name="#{@letter}">#{@letter}</a></p><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>} + else break + end + end + end + end + def html_body + the_idx=@the_idx + the_idx.each_pair do |lng,lng_array| + lng_array.sort.each do |a| + do_string_name(lng,'',a) + name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') + x = %{<p class="author"><a name="#{name}">#{a[0]}</a></p>} + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html_mnt] << x + end + @output[lng][:html] << x + lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert + works=[] + a[1][:md].each do |i| + manifest_at=if @env.output_dir_structure.by? == :language + i[:file] + Sfx[:html] + elsif @env.output_dir_structure.by? == :filetype + i[:file] + lang_code_insert + Sfx[:html] + elsif @env.output_dir_structure.by? == :filename + './' + i[:file] + '/' + i[:page] + else '' #error + end + work=[ + "#{i[:date]} #{i[:title]}", + %{<p class="publication">#{i[:date]} <a href="#{manifest_at}">#{i[:title]}</a>, #{i[:author][:authors_s]}</p>} + ] + works<<=(@output[lng][:html_mnt].is_a?(File)) \ + ? (work.concat([%{<p class="publication">[<a href="#{i[:file]}.sst">src</a>] #{i[:date]} <a href="file://#{manifest_at}">#{i[:title]}</a>, #{i[:author][:authors_s]} -- [<a href="#{i[:file]}.sst">#{i[:file]}.sst</a>]</p>}])) + : work + end + works.sort_by {|y| y[0]}.each do |z| + @output[lng][:html] << z[1] + @output[lng][:html_mnt] << z[2] \ + if @output[lng][:html_mnt].is_a?(File) + end + end + end + end + self + end + def screen_print + def cycle + the_idx=@the_idx + the_idx.sort.each do |a| + puts a[0] + a[1][:md].each do |x| + puts "\t" + x[:file] + end + end + end + self + end + end +end +__END__ +#+END_SRC + +*** html_harvest_author_format.rb + +#+BEGIN_SRC ruby :tangle "../lib/sisu/html_harvest_author_format.rb" +# <<sisu_document_header>> +module SiSU_FormatAuthor + class Author + def initialize(author_param) + @author_param=author_param + end + def author_details + @authors,@author_array=[],[] + authors=@author_param.scan(/[^;]+/) + authors.each do |a| + a=a.strip + if a =~/"(.+?)"/ + @authors << { the: $1 } + @author_array << $1.upcase + else #if a =~/,/ + x=a.scan(/[^,]+/) + x[0]=x[0].strip + x[1]=x[1].strip if x[1] + if x.length==1 + @authors << { the: x[0] } + @author_array << x[0].upcase + elsif x.length==2 + @authors << { the: x[0], others: x[1] } + @author_array << "#{x[0].upcase}, #{x[1]}" + else #p x.length + end + end + end + l = @authors.length + authors_string='' + @authors.each_with_index do |a,i| + authors_string += if a[:others] + if (l - i) > 1 + "#{a[:others]} #{a[:the]}, " + else + "#{a[:others]} #{a[:the]}" + end + else + if (l - i) > 2 + "#{a[:the]}, " + else + "#{a[:the]}" + end + end + end + { + last_first_a: authors, + last_first_format_a: @author_array, + authors_h: @authors, + authors_s: authors_string, + authors_param: @author_param + } + end + end +end +__END__ +#+END_SRC + +* document header + +#+NAME: sisu_document_header +#+BEGIN_SRC text +encoding: utf-8 +- Name: SiSU + + - Description: documents, structuring, processing, publishing, search + harvest + + - Author: Ralph Amissah + <ralph.amissah@gmail.com> + + - Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2019, + 2020, 2021, Ralph Amissah, + All Rights Reserved. + + - License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <http://www.fsf.org/licensing/licenses/gpl.html> + <http://www.gnu.org/licenses/gpl.html> + + <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html> + + - SiSU uses: + - Standard SiSU markup syntax, + - Standard SiSU meta-markup syntax, and the + - Standard SiSU object citation numbering and system + + - Homepages: + <http://www.sisudoc.org> + + - Git + <https://git.sisudoc.org/projects/> + <https://git.sisudoc.org/projects/?p=software/sisu.git;a=summary> + <https://git.sisudoc.org/projects/?p=markup/sisu-markup-samples.git;a=summary> +#+END_SRC |