Development

Changeset 6883

You must first sign up to be able to contribute.

Changeset 6883

Show
Ignore:
Timestamp:
01/02/08 10:43:33 (1 year ago)
Author:
Carl.Vondrick
Message:

sfLucene: refactored highlighting system

  • highlights XML-based data (XML, HTML, XHTML) using the DOM (closes #2728)
  • lays architecture for true Zend query highlighting (refs #2729)
  • customizable highlight markers
  • unit tests for all

enjoy!

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • plugins/sfLucenePlugin/branches/1.1/CHANGELOG

    r6808 r6883  
    1919  * Added own sfEventDispatcher to each sfLucene instance 
    2020  * BC: sfLuceneCriteria constructor now requires sfLucene instance 
     21  * Refactored highlighting system 
    2122 
    2223Version 0.1.1 Beta 
  • plugins/sfLucenePlugin/branches/1.1/lib/filter/sfLuceneHighlightFilter.class.php

    r6816 r6883  
    100100    try 
    101101    { 
    102       if (!$this->highlight()) 
    103       { 
    104         // highlighting did not occur, so remove notice 
    105         $this->removeNotice(); 
    106       } 
     102      $this->highlight(); 
    107103    } 
    108104    catch (Exception $e) 
     
    122118  { 
    123119    $terms = $this->getContext()->getRequest()->getParameter($this->getParameter('highlight_qs')); 
    124     $terms = $this->prepareTerms($terms); 
    125120 
    126121    // attempt to highlight from sfLucene 
    127     if (count($terms)) 
    128     { 
     122    if ($terms) 
     123    { 
     124      $terms = $this->prepareTerms($terms); 
     125 
     126      $this->doHighlight($terms); 
    129127      $this->addNotice($terms); 
    130128      $this->addCss(); 
    131       $this->doHighlight($terms); 
    132129 
    133130      return true; 
     
    153150            $terms = $this->prepareTerms($matches[1]); 
    154151 
     152            $this->doHighlight($terms); 
    155153            $this->addNotice($terms, $value['name']); 
    156154            $this->addCss(); 
    157             $this->doHighlight($terms); 
    158155 
    159156            // stop looking for referers now. 
     
    165162    } 
    166163 
    167     // we failed to do anything, so return false 
     164    $this->removeNotice(); 
    168165    return false; 
    169166  } 
     
    177174 
    178175    // configure highlighter 
    179     $lighter = new sfLuceneHighlighter($content); 
     176    $lighter = new sfLuceneHighlighterXHTML($content); 
    180177    $lighter->addKeywords($terms); 
    181     $lighter->addHighlighters($this->getParameter('highlight_strings')); 
    182     $lighter->hasBody(true); 
    183  
    184     $this->getContext()->getResponse()->setContent($lighter->highlight()); 
     178 
     179    $this->getContext()->getResponse()->setContent($lighter->highlight()->export()); 
    185180  } 
    186181 
     
    203198      $this->getContext()->getResponse()->setContent(substr($content, 0, $pos) . $css . substr($content, $pos)); 
    204199    } 
    205   } 
    206  
    207   /** 
    208    * Prepares terms by exploding them out 
    209    */ 
    210   protected function prepareTerms($terms) 
    211   { 
    212     $terms = preg_split('/\W+/', trim($terms), -1, PREG_SPLIT_NO_EMPTY); 
    213  
    214     $terms = array_unique($terms); 
    215  
    216     return $terms; 
    217200  } 
    218201 
     
    234217    $content = $this->getContext()->getResponse()->getContent(); 
    235218 
    236     $term_string = implode($terms, ', '); 
     219    $term_string = ''; 
     220 
     221    foreach ($terms as $term) 
     222    { 
     223      $term_string .= $term->getHighlighter()->highlight($term->getName()) . ', '; 
     224    } 
     225 
     226    $term_string = substr($term_string, 0, -2); 
    237227 
    238228    $route = $route = $this->getContext()->getRouting()->getCurrentInternalUri(); 
     
    256246  } 
    257247 
     248  protected function prepareTerms($terms) 
     249  { 
     250    $highlighters = sfLuceneHighlighterMarkerSprint::generate($this->getParameter('highlight_strings')); 
     251 
     252    return sfLuceneHighlighterKeywordNamedInsensitive::explode($highlighters, mb_strtolower($terms)); 
     253  } 
     254 
    258255  /** 
    259256   * Helper function to do translations 
  • plugins/sfLucenePlugin/branches/1.1/lib/helper/sfLuceneHelper.php

    r6652 r6883  
    3030} 
    3131 
    32 function highlight_result_text($text, $query, $size = 200, $highlighter = '<strong class="highlight">%s</strong>') 
     32function highlight_result_text($text, $query, $size = 200, $sprint = '<strong class="highlight">%s</strong>') 
    3333{ 
    34   $h = new sfLuceneHighlighter($text); 
    35   $h->addKeywordSlug($query); 
    36   $h->addHighlighter($highlighter); 
    37   $h->hasBody(false); 
    38   $h->densityCrop($size); 
    39   return $h->highlight(); 
     34  $highlighter = new sfLuceneHighlighterString($text); 
     35 
     36  $marker = new sfLuceneHighlighterMarkerSprint($sprint); 
     37  $harness = new sfLuceneHighlighterMarkerHarness(array($marker)); 
     38 
     39  $keywords = sfLuceneHighlighterKeywordNamedInsensitive::explode($harness, $query); 
     40 
     41  $highlighter->addKeywords($keywords); 
     42  $highlighter->strip()->crop($size); 
     43 
     44  return $highlighter->highlight()->export(); 
    4045} 
    4146 
    42 function highlight_keywords($text, $keywords, $highlighter = '<strong class="highlight">%s</strong>') 
     47function highlight_keywords($text, $keywords, $sprint = '<strong class="highlight">%s</strong>') 
    4348{ 
    44   $h = new sfLuceneHighlighter($text); 
    45   $h->addKeywordSlug($keywords); 
    46   $h->addHighlighter($highlighter); 
    47   $h->hasBody(false); 
     49  $highlighter = new sfLuceneHighlighterXHTMLPart($text); 
    4850 
    49   return $h->highlight(); 
     51  $marker = new sfLuceneHighlighterMarkerSprint($sprint); 
     52  $harness = new sfLuceneHighlighterMarkerHarness(array($marker)); 
     53 
     54  $keywords = sfLuceneHighlighterKeywordNamedInsensitive::explode($harness, $keywords); 
     55 
     56  $highlighter->addKeywords($keywords); 
     57 
     58  return $highlighter->highlight()->export(); 
    5059} 
    5160 
  • plugins/sfLucenePlugin/branches/1.1/test/unit/filter/sfLuceneHighlightFilterTest.php

    r6735 r6883  
    1717require dirname(__FILE__) . '/../../bootstrap/unit.php'; 
    1818 
    19 $t = new lime_test(21, new lime_output_color()); 
     19$t = new lime_test(17, new lime_output_color()); 
    2020 
    2121$chain = new sfFilterChain(); 
     
    4444$request->setParameter('h', 'test'); 
    4545 
    46 $response->setContent('Hello'); 
    47 try { 
    48   $highlight->execute($chain); 
    49   $t->fail('highlighter rejects content without a body'); 
    50 } catch (sfException $e) { 
    51   $t->pass('highlighter rejects content without a body'); 
    52 
    53  
    54 $response->setContent('<body>Hello'); 
    55 try { 
    56   $highlight->execute($chain); 
    57   $t->fail('highlighter rejects content without a body ending tag'); 
    58 } catch (sfException $e) { 
    59   $t->pass('highlighter rejects content without a body ending tag'); 
    60 
    61  
    62 $response->setContent('Hello</body>'); 
    63 try { 
    64   $highlight->execute($chain); 
    65   $t->fail('highlighter rejects content without a body starting tag'); 
    66 } catch (sfException $e) { 
    67   $t->pass('highlighter rejects content without a body starting tag'); 
    68 
    69  
    70 $response->setContent('<body>Hello</body>'); 
     46$response->setContent('<html><body>Hello</body></html>'); 
    7147try { 
    7248  $highlight->execute($chain); 
     
    7652} 
    7753 
    78 $response->setContent('<body>2 > 1</body>'); 
    79 try { 
    80   $highlight->execute($chain); 
    81   $t->fail('highlighter rejects content with a carat mismatch'); 
    82 } catch (sfException $e) { 
    83   $t->pass('highlighter rejects content with a carat mismatch'); 
    84 
    85  
    86 $response->setContent('<body>I am <b>cool</b>!</body>'); 
     54$response->setContent('<html><body>I am <b>cool</b>!</body></html>'); 
    8755try { 
    8856  $highlight->execute($chain); 
     
    9462$t->diag('testing highlighting'); 
    9563 
    96 $response->setContent('<body>highlight the keyword</body>'); 
     64$response->setContent('<html><body>highlight the keyword</body></html>'); 
    9765$request->setParameter('h', 'keyword'); 
    9866$highlight->execute($chain); 
    99 $t->is($response->getContent(), '<body>highlight the <highlighted>keyword</highlighted></body>', 'highlighter highlights a single keyword'); 
     67$t->is($response->getContent(), "<?xml version=\"1.0\"?>\n<html><body>highlight the <highlighted>keyword</highlighted></body></html>\n", 'highlighter highlights a single keyword'); 
    10068 
    101 $response->setContent('<body>highlight the keyword</body>'); 
    102 $request->setParameter('h', 'highlight keyword'); 
     69$response->setContent('<html><body>highlight the keyword yay!</body></html>'); 
     70$request->setParameter('h', 'highlight KEYWORD'); 
    10371$highlight->execute($chain); 
    104 $t->is($response->getContent(), '<body><highlighted>highlight</highlighted> the <highlighted2>keyword</highlighted2></body>', 'highlighter highlights multiple keywords'); 
     72$t->is($response->getContent(), "<?xml version=\"1.0\"?>\n<html><body><highlighted>highlight</highlighted> the <highlighted2>keyword</highlighted2> yay!</body></html>\n", 'highlighter highlights multiple keywords'); 
    10573 
    106 $response->setContent('<body>~notice~ keyword</body>'); 
     74$response->setContent('<html><body>~notice~ keyword</body></html>'); 
    10775$request->setParameter('h', 'keyword'); 
    10876$highlight->execute($chain); 
    10977$t->like($response->getContent(), '#<body><keywords><highlighted>keyword</highlighted></keywords><remove>~remove~</remove>#', 'highlighter adds notice string'); 
    11078 
    111 $response->setContent('<head></head><body>keyword</body>'); 
     79$response->setContent('<html><head><title>foobar</title></head><body>keyword</body></html>'); 
    11280$highlight->execute($chain); 
    11381$t->like($response->getContent(), '#<link .*?href=".*?/search\.css".*?/>\n</head>#', 'highlighter adds search stylesheet'); 
    11482 
    115 $response->setContent('<head></head><body>~notice~ google search test</body>'); 
     83$response->setContent('<html><head><title>foobar</title></head><body>~notice~ google search test</body></html>'); 
    11684$request->getParameterHolder()->remove('h'); 
    11785$_SERVER['HTTP_REFERER'] = 'http://www.google.com/search?num=50&hl=en&safe=off&q=google&btnG=Search'; 
     
    11987 
    12088$t->like($response->getContent(), '#<highlighted>google</highlighted> search test#', 'highlighter highlights results from Google'); 
    121 $t->like($response->getContent(), '#<from><highlighted>Google</highlighted></from><keywords><highlighted>google</highlighted></keywords><remove>~remove~</remove>#', 'highlighter adds correct notice for results from Google'); 
     89$t->like($response->getContent(), '#<from>Google</from><keywords><highlighted>google</highlighted></keywords><remove>~remove~</remove>#', 'highlighter adds correct notice for results from Google'); 
    12290$t->like($response->getContent(), '#<link .*?href=".*?/search\.css".*?/>\n</head>#', 'highlighter adds search stylesheet for results from Google'); 
    12391 
     
    165133configure_i18n(); 
    166134 
    167 $response->setContent('<body>highlight the keyword</body>'); 
     135$response->setContent('<html><body>highlight the keyword</body></html>'); 
    168136$request->setParameter('h', 'keyword'); 
    169137$highlight->execute($chain); 
    170 $t->is($response->getContent(), '<body>highlight the <highlighted>keyword</highlighted></body>', 'highlighter highlights a single keyword with i18n'); 
     138 
     139$t->is($response->getContent(), "<?xml version=\"1.0\"?>\n<html><body>highlight the <highlighted>keyword</highlighted></body></html>\n", 'highlighter highlights a single keyword with i18n'); 
  • plugins/sfLucenePlugin/branches/1.1/test/unit/helper/sfLuceneHelperTest.php

    r6652 r6883  
    7676$t->is(highlight_result_text('Hello.  This is a pretty <em class="thing">awesome</em> thing to be talking about.', 'thing talking'), 'Hello.  This is a pretty awesome <strong class="highlight">thing</strong> to be <strong class="highlight">talking</strong> about.', 'highlight_result_text() highlights text and strips out HTML'); 
    7777 
    78 $t->is(highlight_result_text('Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. This is a pretty <em class="thing">awesome</em> thing to be talking about.  Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. ', 'thing talking', 50), '...This is a pretty awesome <strong class="highlight">thing</strong> to be <strong class="highlight">talking</strong> about....', 'highlight_result_text() highlights and truncates text'); 
     78$t->is(highlight_result_text('Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. This is a pretty <em class="thing">awesome</em> thing to be talking about.  Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. Foo bar. ', 'thing talking', 50), '...is is a pretty awesome <strong class="highlight">thing</strong> to be <strong class="highlight">talking</strong> about....', 'highlight_result_text() highlights and truncates text'); 
    7979 
    8080$t->is(highlight_keywords('Hello.  This is a pretty <em class="thing">awesome</em> thing to be talking about.', 'thing talking'), 'Hello.  This is a pretty <em class="thing">awesome</em> <strong class="highlight">thing</strong> to be <strong class="highlight">talking</strong> about.', 'highlight_kewyords() highlights text');