Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 92 |
|
0.00% |
0 / 13 |
CRAP | |
0.00% |
0 / 1 |
| ElasticSearch | |
0.00% |
0 / 92 |
|
0.00% |
0 / 13 |
992 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
| run | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
| readTopics | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
20 | |||
| readServices | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
| readLocations | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
| readAuthorities | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
| getConnection | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
| getIndex | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
| setHost | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| setPort | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| setTransport | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| setAlias | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
| dropOldIndex | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
| 1 | <?php |
| 2 | |
| 3 | /** |
| 4 | * @package Zmsdldb |
| 5 | * @copyright BerlinOnline Stadtportal GmbH & Co. KG |
| 6 | **/ |
| 7 | |
| 8 | namespace BO\Zmsdldb\Indexer; |
| 9 | |
| 10 | use BO\Zmsdldb\FileAccess; |
| 11 | |
| 12 | /** |
| 13 | * Index DLDB data into ElasticSearch |
| 14 | */ |
| 15 | class ElasticSearch |
| 16 | { |
| 17 | const ES_INDEX_PREFIX = 'dldb-'; |
| 18 | |
| 19 | const ES_INDEX_DATE = 'Ymd-His'; |
| 20 | |
| 21 | protected $localeList = array( |
| 22 | 'de', |
| 23 | 'en' |
| 24 | ); |
| 25 | |
| 26 | /** |
| 27 | * Access to DLDB files |
| 28 | * |
| 29 | * @var FileAccess $dldb |
| 30 | */ |
| 31 | protected $dldb; |
| 32 | |
| 33 | /** |
| 34 | * hostname for ES |
| 35 | * |
| 36 | * @var String $host |
| 37 | */ |
| 38 | protected $host = 'localhost'; |
| 39 | |
| 40 | /** |
| 41 | * port for ES |
| 42 | * |
| 43 | * @var String $port |
| 44 | */ |
| 45 | protected $port = '9200'; |
| 46 | |
| 47 | /** |
| 48 | * transport method for ES |
| 49 | * |
| 50 | * @var String $transport |
| 51 | */ |
| 52 | protected $transport = 'Http'; |
| 53 | |
| 54 | /** |
| 55 | * The client used to talk to elastic search. |
| 56 | * |
| 57 | * @var \Elastica\Client |
| 58 | */ |
| 59 | protected $connection; |
| 60 | |
| 61 | /** |
| 62 | * Index from elastic search |
| 63 | * |
| 64 | * @var \Elastica\Index $index |
| 65 | */ |
| 66 | protected $index; |
| 67 | |
| 68 | /** |
| 69 | * Due to backward compatibility, the first parameter has two possible meanings |
| 70 | * |
| 71 | * @param String $importDirOrLocationFile |
| 72 | * @param String $servicesFile |
| 73 | * (optional) |
| 74 | */ |
| 75 | public function __construct($importOrLocationFile, $servicesFile = null) |
| 76 | { |
| 77 | if (is_dir($importOrLocationFile)) { |
| 78 | $this->dldb = new FileAccess(); |
| 79 | $this->dldb->loadFromPath($importOrLocationFile); |
| 80 | } elseif (is_file($importOrLocationFile)) { |
| 81 | $this->dldb = new FileAccess($importOrLocationFile, $servicesFile); |
| 82 | } else { |
| 83 | throw new \Exception("Invalid import parameters for ElasticSearch indexer"); |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | /** |
| 88 | * |
| 89 | * @return self |
| 90 | */ |
| 91 | public function run() |
| 92 | { |
| 93 | $this->readTopics(); |
| 94 | $this->readServices(); |
| 95 | $this->readLocations(); |
| 96 | $this->readAuthorities(); |
| 97 | return $this; |
| 98 | } |
| 99 | |
| 100 | /** |
| 101 | * |
| 102 | * @return self |
| 103 | */ |
| 104 | protected function readTopics() |
| 105 | { |
| 106 | $esTypeTopic = $this->getIndex()->getType('topic'); |
| 107 | $esTypeLinks = $this->getIndex()->getType('links'); |
| 108 | $docs = array(); |
| 109 | $links = array(); |
| 110 | foreach ($this->dldb->fromTopic()->fetchList() as $topic) { |
| 111 | $docs[] = new \Elastica\Document('de' . $topic['id'], $topic); |
| 112 | if ($topic->isLinked()) { |
| 113 | $link = array( |
| 114 | "rank" => 0, |
| 115 | "link" => "/" . $topic["path"] . "/", |
| 116 | "name" => $topic['name'], |
| 117 | "hightlight" => 0, |
| 118 | "meta" => array( |
| 119 | "keywords" => $topic['meta']['keywords'], |
| 120 | "titles" => $topic['meta']['titles'] |
| 121 | ) |
| 122 | ); |
| 123 | $links[] = new \Elastica\Document($link['link'], $link); |
| 124 | foreach ($topic['links'] as $link) { |
| 125 | $links[] = new \Elastica\Document($link['link'], $link); |
| 126 | } |
| 127 | } |
| 128 | } |
| 129 | $esTypeTopic->addDocuments($docs); |
| 130 | $esTypeLinks->addDocuments($links); |
| 131 | return $docs; |
| 132 | } |
| 133 | |
| 134 | /** |
| 135 | * |
| 136 | * @return self |
| 137 | */ |
| 138 | protected function readServices() |
| 139 | { |
| 140 | $esType = $this->getIndex()->getType('service'); |
| 141 | $docs = array(); |
| 142 | foreach ($this->localeList as $locale) { |
| 143 | foreach ($this->dldb->fromService($locale)->fetchList() as $service) { |
| 144 | $serviceId = $locale . $service['id']; |
| 145 | $docs[] = new \Elastica\Document($serviceId, $service); |
| 146 | } |
| 147 | } |
| 148 | $esType->addDocuments($docs); |
| 149 | return $docs; |
| 150 | } |
| 151 | |
| 152 | /** |
| 153 | * |
| 154 | * @return self |
| 155 | */ |
| 156 | protected function readLocations() |
| 157 | { |
| 158 | $esType = $this->getIndex()->getType('location'); |
| 159 | $docs = array(); |
| 160 | foreach ($this->localeList as $locale) { |
| 161 | foreach ($this->dldb->fromLocation($locale)->fetchList() as $location) { |
| 162 | $locationId = $locale . $location['id']; |
| 163 | $docs[] = new \Elastica\Document($locationId, $location); |
| 164 | } |
| 165 | } |
| 166 | $esType->addDocuments($docs); |
| 167 | return $docs; |
| 168 | } |
| 169 | |
| 170 | /** |
| 171 | * |
| 172 | * @return self |
| 173 | */ |
| 174 | protected function readAuthorities() |
| 175 | { |
| 176 | $esType = $this->getIndex()->getType('authority'); |
| 177 | $docs = array(); |
| 178 | foreach ($this->localeList as $locale) { |
| 179 | foreach ($this->dldb->fromAuthority($locale)->fetchSource() as $authority) { |
| 180 | $authorityId = $locale . $authority['id']; |
| 181 | $docs[] = new \Elastica\Document($authorityId, $authority); |
| 182 | } |
| 183 | } |
| 184 | $esType->addDocuments($docs); |
| 185 | return $docs; |
| 186 | } |
| 187 | |
| 188 | /** |
| 189 | * |
| 190 | * @return \Elastica\Client |
| 191 | */ |
| 192 | protected function getConnection() |
| 193 | { |
| 194 | if (null === $this->connection) { |
| 195 | $this->connection = new \Elastica\Client(array( |
| 196 | 'host' => $this->host, |
| 197 | 'port' => $this->port, |
| 198 | 'transport' => $this->transport |
| 199 | )); |
| 200 | } |
| 201 | return $this->connection; |
| 202 | } |
| 203 | |
| 204 | /** |
| 205 | * |
| 206 | * @return \Elastica\Index |
| 207 | */ |
| 208 | protected function getIndex() |
| 209 | { |
| 210 | if (null === $this->index) { |
| 211 | $connection = $this->getConnection(); |
| 212 | $this->index = $connection->getIndex(self::ES_INDEX_PREFIX . date(self::ES_INDEX_DATE)); |
| 213 | if (! $this->index->exists()) { |
| 214 | $indexSettings = file_get_contents(__DIR__ . DIRECTORY_SEPARATOR . 'ElasticSearch_Index.json'); |
| 215 | $indexSettings = json_decode($indexSettings, true); |
| 216 | $this->index->create($indexSettings); |
| 217 | } |
| 218 | } |
| 219 | return $this->index; |
| 220 | } |
| 221 | |
| 222 | /** |
| 223 | * |
| 224 | * @return self |
| 225 | */ |
| 226 | public function setHost($host) |
| 227 | { |
| 228 | $this->host = $host; |
| 229 | return $this; |
| 230 | } |
| 231 | |
| 232 | /** |
| 233 | * |
| 234 | * @return self |
| 235 | */ |
| 236 | public function setPort($port) |
| 237 | { |
| 238 | $this->port = $port; |
| 239 | return $this; |
| 240 | } |
| 241 | |
| 242 | /** |
| 243 | * |
| 244 | * @return self |
| 245 | */ |
| 246 | public function setTransport($transport) |
| 247 | { |
| 248 | $this->transport = $transport; |
| 249 | return $this; |
| 250 | } |
| 251 | |
| 252 | /** |
| 253 | * refresh index and add alias |
| 254 | * |
| 255 | * @return self |
| 256 | */ |
| 257 | public function setAlias($alias) |
| 258 | { |
| 259 | $this->getIndex()->refresh(); |
| 260 | $this->getIndex()->addAlias($alias, true); |
| 261 | return $this; |
| 262 | } |
| 263 | |
| 264 | /** |
| 265 | * Drop all old indice with the prefix ES_INDEX_PREFIX and no alias |
| 266 | * |
| 267 | * @return self |
| 268 | */ |
| 269 | public function dropOldIndex() |
| 270 | { |
| 271 | $client = $this->getConnection(); |
| 272 | $status = $client->getStatus(); |
| 273 | $indexList = $status->getIndexNames(); |
| 274 | $currentIndex = $this->getIndex()->getName(); |
| 275 | foreach ($indexList as $index) { |
| 276 | if ($currentIndex != $index && 0 === strpos($index, self::ES_INDEX_PREFIX)) { |
| 277 | $candidateIndex = $client->getIndex($index); |
| 278 | if (! $candidateIndex->getStatus()->getAliases()) { |
| 279 | $candidateIndex->delete(); |
| 280 | } |
| 281 | } |
| 282 | } |
| 283 | return $this; |
| 284 | } |
| 285 | } |