aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v0/param.rb
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2008-12-02 23:54:23 -0500
committerRalph Amissah <ralph@amissah.com>2008-12-02 23:54:23 -0500
commit0e6fc15ada3c5d9a86b227163f35a54993b32529 (patch)
tree90ac98f2dadf8a2731fac4921fb5d9263eeedeb9 /lib/sisu/v0/param.rb
parentsha256 for 0.69.4 (diff)
sisu harvest, introduce module along with header syntax addition & modification
* sisu markup, additional header and new format rule: * @creator: / @author: header field, introduced author name format rules for more usable metadata harvesting: surname comma other names, additional authors separated by semi-colon * param added meta-tag, @topic_register: formatting topic levels are separated from sub-levels by a colon, a semi-colon separates main topics if there are multiple topics at lowest sub-level, a pipe can be used to create multiple headings * harvest module, harvests metadata from document set currently extracts: (i) authors and their writings from document set; (ii) topics and associated writings from document set (topics use topic_register header). harvest (when run against documents common to a directory of a site) extracts metadata and organises the documents on a site by author and topic information provided (there is a new "topic_register" header, with formatting rules similar to those of the book index), results are placed in [output_path]/sisu_site_metadata. sisu --harvest *.sst * by author (see change in param @creator: / @author: header field) * by topic / subject index (see addition in param of @topic_register: header field) initially there should be an example samples here: http://www.jus.uio.no/sisu/sisu_site_metadata/harvest_authors.html http://www.jus.uio.no/sisu/sisu_site_metadata/harvest_topics.html together with update markup source files The authors and their writings list will be made to take on a more biblographical form, with the use of additional fields as required. (concept example, suitable for medium sized sites [to remove size constraint: implement SQL equivalent]) make feature more robust * css, for harvest output added * remote placement of sisu_site_metadata (output produced by metadata harvest) * sisu markup, update document samples accordingly * tidy copyright marks in program headers, remove repetition of dates [version bump because formatting rule introduced to author / creator header - where new site metadata harvest feature is used, (at present changes changes should not be noticed except when using metadata harvest)]
Diffstat (limited to 'lib/sisu/v0/param.rb')
-rw-r--r--lib/sisu/v0/param.rb45
1 files changed, 39 insertions, 6 deletions
diff --git a/lib/sisu/v0/param.rb b/lib/sisu/v0/param.rb
index 6ab30e86..2829abe1 100644
--- a/lib/sisu/v0/param.rb
+++ b/lib/sisu/v0/param.rb
@@ -14,8 +14,7 @@
SiSU, a framework for document structuring, publishing and search
- Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
- 2007, 2008 Ralph Amissah
+ Copyright (C) Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
@@ -119,19 +118,20 @@ module SiSU_Param
@doc={ :lv=>[] }
@doc[:fns],@doc[:fnb],@doc[:scr_suffix]='','',''
@@publisher='SiSU scribe'
- attr_accessor :cmd,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:sfx,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:dc_title,:html_title,:subtitle,:subtitle_tex,:creator_home,:dc_creator,:translator,:illustrator,:prepared_by,:digitized_by,:dc_subject,:dc_description,:dc_publisher,:dc_contributor,:dc_date,:dc_date_created,:dc_date_issued,:dc_date_available,:dc_date_valid,:dc_date_modified,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:dc_type,:dc_format,:dc_identifier,:dc_source,:dc_language,:language_original,:dc_relation,:dc_coverage,:dc_rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:flag_auto_heading_num,:markup,:markup_instruction,:markup_version,:markup_declared,:make_bold,:make_italic,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:creator_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:man_section,:man_name,:man_synopsis,:ec,:opt,:sem_tag,:book_idx,:doc_cont_idx
+ attr_accessor :cmd,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:sfx,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:dc_title,:html_title,:subtitle,:subtitle_tex,:creator_home,:dc_creator,:authors,:authorship,:translator,:illustrator,:prepared_by,:digitized_by,:dc_subject,:dc_description,:dc_publisher,:dc_contributor,:dc_date,:dc_date_created,:dc_date_issued,:dc_date_available,:dc_date_valid,:dc_date_modified,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:dc_type,:dc_format,:dc_identifier,:dc_source,:dc_language,:language_original,:dc_relation,:dc_coverage,:dc_rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:flag_auto_heading_num,:markup,:markup_instruction,:markup_version,:markup_declared,:make_bold,:make_italic,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:creator_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:man_section,:man_name,:man_synopsis,:ec,:opt,:sem_tag,:book_idx,:topic_register,:original_publication_details
def initialize(fns_array,opt)
- @env=@fn=@fns=@fnb=@fnn=@fnt=@fnl=@flv=@fnz=@fnstex=@ocn=@sfx_src=@sfx=@pdf=@file_type=@dir_out=@dir_tex=@dir_lout=@txt_path=@flag_endnotes=@flag_auto_endnotes=@flag_separate_endnotes=@flag_separate_endnotes_make=@site_skin=@sisu=@sisu_version=@ruby_version=@title=@dc_title=@html_title=@subtitle=@subtitle_tex=@creator_home=@dc_creator=@translator=@illustrator=@prepared_by=@digitized_by=@dc_subject=@dc_description=@dc_publisher=@dc_contributor=@dc_date=@dc_date_created=@dc_date_issued=@dc_date_available=@dc_date_valid=@dc_date_modified=@date_scheme=@date_created_scheme=@date_issued_scheme=@date_available_scheme=@date_valid_scheme=@date_modified_scheme=@dc_type=@dc_format=@dc_identifier=@dc_source=@dc_language=@language_original=@dc_relation=@dc_coverage=@dc_rights=@keywords=@comments=@abstract=@cls_loc=@cls_dewey=@cls_pg=@cls_isbn=@papersize=@toc=@lv1=@lv2=@lv3=@lv4=@lv5=@lv6=@pagenew=@pagebreak=@num_top=@toc_lev_limit=@flag_auto_heading_num=@make_bold=@make_italic=@flag_tables=@vocabulary=@doc_skin=@doc_css=@yaml=@lnk=@prefix_a=@prefix_b=@suffix=@information=@contact=@icon=@ad_url=@ad_png=@ad_alt=@ad_began=@promo=@ad_home=@stmp=@stmpd=@sc_filename=@sc_number=@sc_date=@sc_time=@sc_info=@yamladdr=@locale=@wc_lines=@wc_words=@wc_bytes=@file_encoding=@file_size=@firstseg=@programs=@creator_copymark=@lang=@en=@dgst=@dgst_skin=@generated=@heading_seg_first=@base_program=@man_synopsis=@doc_cont_idx=nil
+ @env=@fn=@fns=@fnb=@fnn=@fnt=@fnl=@flv=@fnz=@fnstex=@ocn=@sfx_src=@sfx=@pdf=@file_type=@dir_out=@dir_tex=@dir_lout=@txt_path=@flag_endnotes=@flag_auto_endnotes=@flag_separate_endnotes=@flag_separate_endnotes_make=@site_skin=@sisu=@sisu_version=@ruby_version=@title=@dc_title=@html_title=@subtitle=@subtitle_tex=@creator_home=@dc_creator=@translator=@illustrator=@prepared_by=@digitized_by=@dc_subject=@dc_description=@dc_publisher=@dc_contributor=@dc_date=@dc_date_created=@dc_date_issued=@dc_date_available=@dc_date_valid=@dc_date_modified=@date_scheme=@date_created_scheme=@date_issued_scheme=@date_available_scheme=@date_valid_scheme=@date_modified_scheme=@dc_type=@dc_format=@dc_identifier=@dc_source=@dc_language=@language_original=@dc_relation=@dc_coverage=@dc_rights=@keywords=@comments=@abstract=@cls_loc=@cls_dewey=@cls_pg=@cls_isbn=@papersize=@toc=@lv1=@lv2=@lv3=@lv4=@lv5=@lv6=@pagenew=@pagebreak=@num_top=@toc_lev_limit=@flag_auto_heading_num=@make_bold=@make_italic=@flag_tables=@vocabulary=@doc_skin=@doc_css=@yaml=@lnk=@prefix_a=@prefix_b=@suffix=@information=@contact=@icon=@ad_url=@ad_png=@ad_alt=@ad_began=@promo=@ad_home=@stmp=@stmpd=@sc_filename=@sc_number=@sc_date=@sc_time=@sc_info=@yamladdr=@locale=@wc_lines=@wc_words=@wc_bytes=@file_encoding=@file_size=@firstseg=@programs=@creator_copymark=@lang=@en=@dgst=@dgst_skin=@generated=@heading_seg_first=@base_program=@man_synopsis=@topic_register=@original_publication_details=nil
@man_section=1
@man_name='man page "name/whatis" information not provided, set in header @man: name=[whatis information]'
@data,@fns,@cmd,@mod,@opt=fns_array,opt.fns,opt.cmd,opt.mod,opt #@data used as data
@flag_tables,@set_header_title,@set_heading_top,@set_heading_seg,@heading_seg_first_flag,@flag_promo,@book_idx=false,false,false,false,false,false,false
@seg_autoname_safe=true
@sem_tag=false
- @markup_instruction,@markup_declared,@image='','','' #check which other values should be set to empty rather than nil
+ @authorship,@markup_instruction,@markup_declared,@image='','','','' #check which other values should be set to empty rather than nil
@markup=@markup_instruction #use @markup_instruction
@doc,@fn,@make_italic,@make_bold,@tag_hash,@ec={},{},{},{},{},{},{}
@flv,@lang,@seg_names,@tags,@tag_array,@tag_a,@ec[:image],@ec[:audio],@ec[:multimedia]=Array.new(9){[]}
+ @authors=[]
@papersize_array=[]
@rgx_image=/(?:^|[^_\\])\{\s*(\S+?\.(?:png|jpg|gif))/
@rgx_audio=/\{\s*(\S+?\.(?:mp3|ogg))/
@@ -291,6 +291,38 @@ module SiSU_Param
else @dc_creator=/(?:0~|@)(?:creator|author)-?:?\s+(.+?)$/m.match(para)[1]
end
@dc_creator.strip!
+ authors=@dc_creator.scan(/[^;]+/)
+ authors.each do |a|
+ if a =~/"(.+?)"/
+ @authors << { :the => $1 }
+ else #if a =~/,/
+ x=a.scan(/[^,]+/)
+ if x.length == 1
+ @authors << { :the => x[0].strip }
+ elsif x.length == 2
+ @authors << { :the => x[0].strip, :others => x[1].strip }
+ else #p x.length
+ end
+ end
+ end
+ l = @authors.length
+ authorship=''
+ @authors.each_with_index do |a,i|
+ authorship += if a[:others]
+ if (l - i) > 1
+ "#{a[:others].strip} #{a[:the].strip}, "
+ else
+ "#{a[:others].strip} #{a[:the].strip}"
+ end
+ else
+ if (l - i) > 2
+ "#{a[:the].strip}, "
+ else
+ "#{a[:the].strip}"
+ end
+ end
+ end
+ @authorship=@dc_creator=authorship
when /^(?:0~(?:translator|translated_by)|@(?:translator|translated_by):)\s+(.+?)$/m #% metainfo
@translator=$1
when /^(?:0~(?:illustrator|illustrated_by)|@(?:illustrator|illustrated_by):)\s+(.+?)$/m #% metainfo
@@ -517,8 +549,9 @@ module SiSU_Param
end
when /^(?:0~suffix|@suffix:)\s+(.+?)$/m; @suffix=$1 #% metainfo
when /^(?:0~information|@information:)\s+(.+?)$/m; @information=$1 #% metainfo
- when /^(?:0~doc_cont(?:ent)?_in?de?x|@doc_cont(?:ent)?_in?de?x:)\s+(.+?)$/m; @doc_cont_idx=$1 #% metainfo, similar syntax to book index, leave out the ={} i.e. use equivalent of ={(.+?)}
+ when /^(?:0~topic_register|@topic_register:)\s+(.+?)$/m; @topic_register=$1 #% metainfo, similar syntax to book index, leave out the ={} i.e. use equivalent of ={(.+?)}
when /^(?:0~contact|@contact:)\s+(.+?)$/m; @contact=$1 #% metainfo
+ when /^(?:0~original_publication|@original_publication:)\s+(.+?)$/m; @original_publication=$1 #% details of original publication
when /^(?:0~icon|@icon:)\s+(.+?)$/m; @icon=$1 #% processing
when /^(?:0~promo|@promo:)\s+(.+?)$/m
@flag_promo=true