Ниже приведен мой код поисковой системы на сайте. Прямо сейчас только ищет то, что упоминается как ProductName и ProductNumber. Я не знал, что нужно изменить, чтобы искать весь ProductDescription
Вот файл Search.php
protected $_index;
protected $_indexed = array();
/**
*
* @var Zend_Http_Client
*/
protected $_httpClient;
public function __construct()
{
try {
$indexDir = realpath($_SERVER['DOCUMENT_ROOT'] . '/../tmp/search');
$this->_index = Zend_Search_Lucene::open($indexDir);
} catch (Zend_Search_Lucene_Exception $e) {
$this->_index = Zend_Search_Lucene::create($indexDir);
}
$this->_httpClient = new Zend_Http_Client();
$this->_httpClient->setConfig(array('timeout' => 10));
Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive());
}
public function indexUrl($url)
{
if (is_array($url)) {
foreach ($url as $uri) {
$this->_indexUrl($uri);
}
} else if (is_string($url)) {
$this->_indexUrl($url);
}
}
public function indexWholePage()
{
$pageUrl = $this->_getHostName();
$this->_indexUrl($pageUrl . '/');
}
protected function _indexUrl($url)
{
if (in_array($url, $this->_indexed))
return;
$log = Zend_Registry::get('Zend_Log');
$log->log($url, Zend_Log::NOTICE);
$this->_httpClient->setUri($url);
$response = $this->_httpClient->request();
$this->_indexed[] = $url;
if ($response->isSuccessful()) {
$body = $response->getBody();
$doc = Zend_Search_Lucene_Document_Html::loadHTML($body, true);
foreach ($doc->getLinks() as $link) {
if ($this->_isValidPageLink($link) && !in_array($this->_getHostName() . $link, $this->_indexed)) {
$this->_indexUrl($this->_getHostName() . $link);
}
}
$t = new Zend_Search_Lucene_Index_Term($url, 'url');
$q = new Zend_Search_Lucene_Search_Query_Term($t);
$hits = $this->_index->find($q);
foreach ($hits as $hit) {
if ($hit->md5 == md5($body)) {
return;
} else {
$this->_index->delete($hit->id);
}
}
$doc->addField(Zend_Search_Lucene_Field::Keyword('url', $url));
$doc->addField(Zend_Search_Lucene_Field::UnIndexed('md5', md5($body)));
$this->_index->addDocument($doc);
$log = Zend_Registry::get('Zend_Log');
$log->log('done', Zend_Log::NOTICE);
}
}
public function search($query)
{
return $this->_index->find($query);
}
public function deleteIndex()
{
}
protected function _getHostName()
{
$host = isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : '';
$proto = (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] !== "off") ? 'https' : 'http';
$port = isset($_SERVER['SERVER_PORT']) ? $_SERVER['SERVER_PORT'] : 80;
$uri = $proto . '://' . $host;
if ((('http' == $proto) && (80 != $port)) || (('https' == $proto) && (443 != $port))) {
$uri .= ':' . $port;
}
return $uri;
}
protected function _isValidPageLink($url)
{
$hostName = $this->_getHostName();
if (substr($url, 0, strlen($hostName)) == $hostName ||
substr($url, 0, 1) == '/' || substr($url, 0, 1) == '?') {
if (@preg_match('#^(.+)\.(jpg|gif|png|pdf|doc|xls)$#i', $url)) {
return false;
}
return true;
}
return false;
}
А вот форма php для генерации результатов поиска. Реализации Lucene, которые я нашел после поиска, совершенно не похожи на те, что здесь есть. Это мой первый раз с ZendFramework.
<form method="get" action="/search.html" class="searchForm" enctype="application/x-www-form-urlencoded" id="searchForm">
<fieldset>
<input type="text" id="search_text" name="q" value="<?php echo $this->escape($this->query) ?>"><br>
<input type="submit" value="search" id="search" name="search">
</fieldset>
</form>
<h1>Search results</h1>
<?php if(empty($this->searchString)): ?>
<p><strong>Please write text of minimal lenght of<?php echo $this->minimumLength ?></strong></p>
<?php else: ?>
<?php if(count($this->products)){ ?>
<?php foreach ($this->products as $product): ?>
<?php $link = '/'.$this->permalink($product->product_name).','.$product->product_id.','.$product->category_id.',p.html'; ?>
<div class="productlist clearfix">
<a href="<?= $link; ?>" class="clearfix">
<div class="txt">
<h2><?= $product->product_name ?><?php if(strlen($product->product_number) > 2){ echo '<small> [ '.$product->product_number.' ]</small>'; } ?></h2>
<p><?= stripslashes($product->product_intro2) ?></p>
</div>
<div class="pic">
<?php if($product->has_media): ?>
<?php echo $this->thumb($product->media_src, 110, 110) ?>
<?php endif; ?>
<p style="text-align: center;">More</p>
</div>
</a>
</div>
<hr/>
<?php endforeach; ?>
<?php }else{ ?>
<p>0 product was found</p>
<?php } ?>
<div style="clear: both;">
<?php echo $this->products; ?>
</div><?php endif ?>
Задача ещё не решена.
Других решений пока нет …