From 2cd369570683630bcabeb1eb55c82cc75fcf015b Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Fri, 15 Feb 2008 02:28:57 +0000 Subject: refinement to semantic tag regex matching --- lib/sisu/v0/shared_sem.rb | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v0/shared_sem.rb b/lib/sisu/v0/shared_sem.rb index da989999..0c9e485c 100644 --- a/lib/sisu/v0/shared_sem.rb +++ b/lib/sisu/v0/shared_sem.rb @@ -65,19 +65,20 @@ module SiSU_sem @para=para end def rgx + def exclude + /^<:code>/ + end def each_csc - /[a-z]+[:;]\{|\}[:;][a-z]+/ + /\b[a-z]+[:;]\{|\}[:;][a-z]+\b/ end def each_c - /[a-z]+:\{|\}:[a-z]+/ + /\b[a-z]+:\{|\}:[a-z]+\b/ end def each_sc - /[a-z]+;\{|\};[a-z]+/ + /\b[a-z]+;\{|\};[a-z]+\b/ end def pair_csc /(([a-z]+)(?::\{(.+?)\}:\2|;\{(.+?)\};\2))/ - #/(([a-z]+);\{(.+?)\};\2)/ - #/(([a-z]+)([:;])\{(.+?)\}\3\2)/ end def pair_c /(([a-z]+):\{(.+?)\}:\2)/ @@ -92,7 +93,9 @@ module SiSU_sem end def rm def sem_marker_parts - @para.gsub!(rgx.each_csc,'') + unless @para =~ rgx.exclude + @para.gsub!(rgx.each_csc,'') + end @para end def sem_marker_added_extra_parts -- cgit v1.2.3 From daf2b7fe5f1479cecbe0c7283b34e47324d10e1b Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 16 Feb 2008 00:33:34 +0000 Subject: markup tag match refinement --- lib/sisu/v0/shared_sem.rb | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v0/shared_sem.rb b/lib/sisu/v0/shared_sem.rb index 0c9e485c..f6303ee0 100644 --- a/lib/sisu/v0/shared_sem.rb +++ b/lib/sisu/v0/shared_sem.rb @@ -69,25 +69,36 @@ module SiSU_sem /^<:code>/ end def each_csc - /\b[a-z]+[:;]\{|\}[:;][a-z]+\b/ + /\b[a-z]+[:;]\{|\}[:;][a-z]+\b/m + #/\b(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)[:;]\{|\}[:;](?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)\b/m end def each_c - /\b[a-z]+:\{|\}:[a-z]+\b/ + /\b[a-z]+:\{|\}:[a-z]+\b/m + #/\b(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+):\{|\}:(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)\b/m end def each_sc - /\b[a-z]+;\{|\};[a-z]+\b/ + /\b[a-z]+;\{|\};[a-z]+\b/m + #/\b(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+);\{|\};(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)\b/m end def pair_csc - /(([a-z]+)(?::\{(.+?)\}:\2|;\{(.+?)\};\2))/ + /(([a-z]+)(?::\{(.+?)\}:\2|;\{(.+?)\};\2))/m + #/(([a-z]+(?:[_.][a-z]+)+|[a-z]+)(?::\{(.+?)\}:\2|;\{(.+?)\};\2))/m end def pair_c - /(([a-z]+):\{(.+?)\}:\2)/ + /(([a-z]+):\{(.+?)\}:\2)/m + #/(([a-z]+(?:[_.][a-z]+)+|[a-z]+):\{(.+?)\}:\2)/m end def pair_sc - /(([a-z]+);\{.+?\};\2)/ + /(([a-z]+);\{.+?\};\2)/m + #/(([a-z]+(?:[_.][a-z]+)+|[a-z]+);\{.+?\};\2)/m end def whole_csc_ae - /(([a-z]+)(?::\[(.+?)\]:\2|;\[(.+?)\];\2))/ + /(([a-z]+)(?::\[(.+?)\]:\2|;\[(.+?)\];\2))/m + #/(([a-z]+(?:[_.][a-z]+)+|[a-z]+)(?::\[(.+?)\]:\2|;\[(.+?)\];\2))/m + end + def each_csc_ae + /\b[a-z]+[:;]\[|\][:;][a-z]+\b/m + #/\b(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)[:;]\[|\][:;](?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)\b/m end self end @@ -99,7 +110,11 @@ module SiSU_sem @para end def sem_marker_added_extra_parts - @para.gsub!(rgx.whole_csc_ae,'') + unless @para =~ rgx.exclude + @para.gsub!(rgx.whole_csc_ae,'') + if para =~rgx.each_csc_ae; puts "WARNING semantic taggin error: #@para" + end + end @para end def all -- cgit v1.2.3