Commit b18b4bc1 authored by Valery Sizov's avatar Valery Sizov

Merge branch 'elastic_tools' into 'master'

More advanced elastic indexer

https://gitlab.com/gitlab-com/operations/issues/56

See merge request !152
parents 798e6ccf a23edfed
class IndexStatus < ActiveRecord::Base
belongs_to :project
validates :project_id, uniqueness: true
validates :project_id, uniqueness: true, presence: true
end
class CreateIndexStatuses < ActiveRecord::Migration
def change
create_table :index_statuses do |t|
t.integer :project_id
t.integer :project_id, null: false
t.datetime :indexed_at
t.text :note
t.string :last_commit
......@@ -9,6 +9,6 @@ class CreateIndexStatuses < ActiveRecord::Migration
t.timestamps null: false
end
add_index :index_statuses, :project_id
add_index :index_statuses, :project_id, unique: true
end
end
......@@ -447,7 +447,7 @@ ActiveRecord::Schema.define(version: 20160129075828) do
add_index "identities", ["user_id"], name: "index_identities_on_user_id", using: :btree
create_table "index_statuses", force: :cascade do |t|
t.integer "project_id"
t.integer "project_id", null: false
t.datetime "indexed_at"
t.text "note"
t.string "last_commit"
......@@ -455,7 +455,7 @@ ActiveRecord::Schema.define(version: 20160129075828) do
t.datetime "updated_at", null: false
end
add_index "index_statuses", ["project_id"], name: "index_index_statuses_on_project_id", using: :btree
add_index "index_statuses", ["project_id"], name: "index_index_statuses_on_project_id", unique: true, using: :btree
create_table "issues", force: :cascade do |t|
t.string "title"
......
......@@ -114,15 +114,19 @@ sudo gitlab-rake gitlab:elastic:index_repositories
bundle exec rake gitlab:elastic:index_repositories RAILS_ENV=production
```
If you want to run several tasks in parallel (probably in separate terminal windows) you can pass parameters `ID_FROM` and `ID_TO` like this:
If you want to run several tasks in parallel (probably in separate terminal
windows) you can provide the `ID_FROM` and `ID_TO` parameters:
```
ID_FROM=1001 ID_TO=2000 sudo gitlab-rake gitlab:elastic:index_repositories
```
Both parameters are optional. Keep in mind also that this task will skip repositories (and certain commits) you have already indexed. It stores the last commit SHA of every indexed repository in the database.
As an example, if you have 3000 of repositories and you want to run tree separate indexer your commands would be like:
Both parameters are optional. Keep in mind that this task will skip repositories
(and certain commits) that have already been indexed. It stores the last commit
SHA of every indexed repository in the database. As an example, if you have
3,000 repositories and you want to run three separate indexing tasks, you might
run:
```
ID_TO=1000 sudo gitlab-rake gitlab:elastic:index_repositories
......@@ -139,7 +143,9 @@ sudo gitlab-rake gitlab:elastic:index_wikis
# Installations from source
bundle exec rake gitlab:elastic:index_wikis RAILS_ENV=production
```
Wiki indexer also supports `ID_FROM` and `ID_TO` parameters if you want to limit a project set.
The wiki indexer also supports the `ID_FROM` and `ID_TO` parameters if you want
to limit a project set.
To index all database entities:
......@@ -213,27 +219,31 @@ time drop.
curl -XPOST 'http://localhost:9200/_forcemerge?max_num_segments=5'
```
To minimize a downtime of search feature we recommend next sequence of actions:
1. Configure elastic search in gitlab.yml or gitlab.rb for omnibus installations but
do not enable it, just set a host and port.
To minimize downtime of the search feature we recommend the following:
1. Create empty indexes. Run
```
# Omnibus installations
sudo gitlab-rake gitlab:elastic:create_empty_indexes
1. Configure Elasticsearch in `gitlab.yml`, or `gitlab.rb` for Omnibus
installations, but do not enable it, just set a host and port.
# Installations from source
bundle exec rake gitlab:elastic:create_empty_indexes
```
1. Create empty indexes:
1. Index all repositories by `gitlab:elastic:index_repositories` rake task(see above). Probably you will do it in parallel
```
# Omnibus installations
sudo gitlab-rake gitlab:elastic:create_empty_indexes
1. Enable elasticsearch and restart GitLab application.
# Installations from source
bundle exec rake gitlab:elastic:create_empty_indexes
```
1. Run indexers for database, wikis and repositories. By running reposotory indexer twise you will be sure that eveything is indexed because some commits could be pushed while you performed initial indexing. And don't worry, repository indexer will skip repositories and commits that are already indexed, in other words, it will be much faster this time.
1. Index all repositories using the `gitlab:elastic:index_repositories` Rake
task (see above). You'll probably want to do this in parallel.
1. Enable Elasticsearch and restart GitLab.
1. Run indexers for database, wikis, and repositories. By running the repository
indexer twice you will be sure that everything is indexed because some
commits could be pushed while you performed initial indexing. The repository
indexer will skip repositories and commits that are already indexed, so it
will be much shorter than the first run.
[ee-109]: https://gitlab.com/gitlab-org/gitlab-ee/merge_requests/109 "Elasticsearch Merge Request"
[elasticsearch]: https://www.elastic.co/products/elasticsearch "Elasticsearch website"
......
......@@ -11,9 +11,9 @@ namespace :gitlab do
puts "Indexing #{project.name_with_namespace} (ID=#{project.id})..."
index_status = IndexStatus.find_or_create_by(project: project)
heads_sha = project.repository.commit.sha
head_sha = project.repository.commit.sha
if index_status.last_commit == heads_sha
if index_status.last_commit == head_sha
puts "Skipped".yellow
next
end
......@@ -24,7 +24,7 @@ namespace :gitlab do
# During indexing the new commits can be pushed,
# the last_commit parameter only indicates that at least this commit is in index
index_status.update(last_commit: heads_sha, indexed_at: DateTime.now)
index_status.update(last_commit: head_sha, indexed_at: DateTime.now)
puts "Done!".green
rescue StandardError => e
puts "#{e.message}, trace - #{e.backtrace}"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment