Commit 59eb9ad2 authored by Andreas Brandl's avatar Andreas Brandl

Use plain SQL schema

Relates to https://gitlab.com/gitlab-org/gitlab/issues/29465
parent 27b57558
---
title: Convert schema to plain SQL using structure.sql
merge_request: 22808
author:
type: other
......@@ -144,7 +144,7 @@ module Gitlab
# Use SQL instead of Active Record's schema dumper when creating the database.
# This is necessary if your schema can't be completely dumped by the schema dumper,
# like if you have constraints or database-specific column types
# config.active_record.schema_format = :sql
config.active_record.schema_format = :sql
# Configure webpack
config.webpack.config_file = "config/webpack.config.js"
......
......@@ -31,7 +31,7 @@ MSG
DATABASE_APPROVED_LABEL = 'database::approved'
non_geo_db_schema_updated = !git.modified_files.grep(%r{\Adb/schema\.rb}).empty?
non_geo_db_schema_updated = !git.modified_files.grep(%r{\Adb/structure\.sql}).empty?
geo_db_schema_updated = !git.modified_files.grep(%r{\Aee/db/geo/schema\.rb}).empty?
non_geo_migration_created = !git.added_files.grep(%r{\A(db/(post_)?migrate)/}).empty?
......@@ -40,7 +40,7 @@ geo_migration_created = !git.added_files.grep(%r{\Aee/db/geo/(post_)?migrate/}).
format_str = gitlab_danger.ci? ? SCHEMA_NOT_UPDATED_MESSAGE_FULL : SCHEMA_NOT_UPDATED_MESSAGE_SHORT
if non_geo_migration_created && !non_geo_db_schema_updated
warn format(format_str, migrations: 'migrations', schema: gitlab_danger.html_link("db/schema.rb"))
warn format(format_str, migrations: 'migrations', schema: gitlab_danger.html_link("db/structure.sql"))
end
if geo_migration_created && !geo_db_schema_updated
......
......@@ -81,7 +81,7 @@ the following preparations into account.
#### Preparation when adding migrations
- Ensure `db/schema.rb` is updated.
- Ensure `db/structure.sql` is updated.
- Make migrations reversible by using the `change` method or include a `down` method when using `up`.
- Include either a rollback procedure or describe how to rollback changes.
- Add the output of the migration(s) to the MR description.
......@@ -137,7 +137,7 @@ the following preparations into account.
- [Check indexes are present for foreign keys](migration_style_guide.md#adding-foreign-key-constraints)
- Ensure that migrations execute in a transaction or only contain
concurrent index/foreign key helpers (with transactions disabled)
- Check consistency with `db/schema.rb` and that migrations are [reversible](migration_style_guide.md#reversibility)
- Check consistency with `db/structure.sql` and that migrations are [reversible](migration_style_guide.md#reversibility)
- Check queries timing (If any): Queries executed in a migration
need to fit comfortably within `15s` - preferably much less than that - on GitLab.com.
- For column removals, make sure the column has been [ignored in a previous release](what_requires_downtime.md#dropping-columns)
......
......@@ -12,7 +12,7 @@ extra security. The Omnibus reconfigure script contains commands that give
write access to the `git` user only where needed.
For example, the `git` user is allowed to write in the `log/` directory, in
`public/uploads`, and they are allowed to rewrite the `db/schema.rb` file.
`public/uploads`, and they are allowed to rewrite the `db/structure.sql` file.
In other cases, the reconfigure script tricks GitLab into not trying to write a
file. For instance, GitLab will generate a `.secret` file if it cannot find one
......
......@@ -18,7 +18,7 @@ users. We will discuss each component below.
The PostgreSQL database holds all metadata for projects, issues, merge
requests, users, etc. The schema is managed by the Rails application
[db/schema.rb](https://gitlab.com/gitlab-org/gitlab/blob/master/db/schema.rb).
[db/structure.sql](https://gitlab.com/gitlab-org/gitlab/blob/master/db/structure.sql).
GitLab Web/API servers and Sidekiq nodes talk directly to the database via a
Rails object relational model (ORM). Most SQL queries are accessed via this
......
......@@ -31,7 +31,7 @@ current version with `cat VERSION`).
cd /home/git/gitlab
sudo -u git -H git fetch --all
sudo -u git -H git checkout -- Gemfile.lock db/schema.rb locale
sudo -u git -H git checkout -- Gemfile.lock db/structure.sql locale
sudo -u git -H git checkout LATEST_TAG -b LATEST_TAG
```
......
......@@ -165,7 +165,7 @@ sudo make prefix=/usr/local install
cd /home/git/gitlab
sudo -u git -H git fetch --all --prune
sudo -u git -H git checkout -- db/schema.rb # local changes will be restored automatically
sudo -u git -H git checkout -- db/structure.sql # local changes will be restored automatically
sudo -u git -H git checkout -- locale
```
......
# frozen_string_literal: true
module Gitlab
module Database
class SchemaCleaner
attr_reader :original_schema
def initialize(original_schema)
@original_schema = original_schema
end
def clean(io)
structure = original_schema.dup
# Postgres compat fix for PG 9.6 (which doesn't support (AS datatype) syntax for sequences)
structure.gsub!(/CREATE SEQUENCE [^.]+\.\S+\n(\s+AS integer\n)/) { |m| m.gsub(Regexp.last_match[1], '') }
# Also a PG 9.6 compatibility fix, see below.
structure.gsub!(/^CREATE EXTENSION IF NOT EXISTS plpgsql.*/, '')
structure.gsub!(/^COMMENT ON EXTENSION.*/, '')
# Remove noise
structure.gsub!(/^SET.+/, '')
structure.gsub!(/^SELECT pg_catalog\.set_config\('search_path'.+/, '')
structure.gsub!(/^--.*/, "\n")
structure.gsub!(/\n{3,}/, "\n\n")
io << "SET search_path=public;\n\n"
# Adding plpgsql explicitly is again a compatibility fix for PG 9.6
# In more recent versions of pg_dump, the extension isn't explicitly dumped anymore.
# We use PG 9.6 still on CI and for schema checks - here this is still the case.
io << <<~SQL.strip
CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;
SQL
io << structure
nil
end
end
end
end
......@@ -4,7 +4,7 @@
# ActiveRecord access. We rely on the initial values being true or false to
# determine whether to define a predicate method because for a newly-added
# column that has not been migrated yet, there is no way to determine the
# column type without parsing db/schema.rb.
# column type without parsing db/structure.sql.
module Gitlab
class FakeApplicationSettings < OpenStruct
include ApplicationSettingImplementation
......
......@@ -50,7 +50,7 @@ namespace :gitlab do
else
# Add post-migrate paths to ensure we mark all migrations as up
Gitlab::Database.add_post_migrate_path_to_rails(force: true)
Rake::Task['db:schema:load'].invoke
Rake::Task['db:structure:load'].invoke
Rake::Task['db:seed_fu'].invoke
end
end
......@@ -78,5 +78,20 @@ namespace :gitlab do
else
task :setup_ee
end
desc 'This adjusts and cleans db/structure.sql - it runs after db:structure:dump'
task :clean_structure_sql do
structure_file = 'db/structure.sql'
schema = File.read(structure_file)
File.open(structure_file, 'wb+') do |io|
Gitlab::Database::SchemaCleaner.new(schema).clean(io)
end
end
# Inform Rake that gitlab:schema:fix_structure_sql should be run every time rake db:structure:dump is run
Rake::Task['db:structure:dump'].enhance do
Rake::Task['gitlab:db:clean_structure_sql'].invoke
end
end
end
#!/bin/sh
schema_changed() {
if [ ! -z "$(git diff --name-only -- db/schema.rb)" ]; then
printf "db/schema.rb after rake db:migrate:reset is different from one in the repository"
if [ ! -z "$(git diff --name-only -- db/structure.sql)" ]; then
printf "db/structure.sql after rake db:migrate:reset is different from one in the repository"
printf "The diff is as follows:\n"
diff=$(git diff -p --binary -- db/schema.rb)
diff=$(git diff -p --binary -- db/structure.sql)
printf "%s" "$diff"
exit 1
else
printf "db/schema.rb after rake db:migrate:reset matches one in the repository"
printf "db/structure.sql after rake db:migrate:reset matches one in the repository"
fi
}
......
......@@ -20,7 +20,7 @@ function setup_db_user_only() {
function setup_db() {
setup_db_user_only
bundle exec rake db:drop db:create db:schema:load db:migrate
bundle exec rake db:drop db:create db:structure:load db:migrate
bundle exec rake gitlab:db:setup_ee
}
......
SET statement_timeout = 0;
SET lock_timeout = 0;
SET idle_in_transaction_session_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SELECT pg_catalog.set_config('search_path', '', false);
SET check_function_bodies = false;
SET xmloption = content;
SET client_min_messages = warning;
SET row_security = off;
--
-- Name: pg_trgm; Type: EXTENSION; Schema: -; Owner: -
--
CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public;
--
-- Name: EXTENSION pg_trgm; Type: COMMENT; Schema: -; Owner: -
--
COMMENT ON EXTENSION pg_trgm IS 'text similarity measurement and index searching based on trigrams';
SET default_tablespace = '';
SET default_with_oids = false;
--
-- Name: abuse_reports; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE public.abuse_reports (
id integer NOT NULL,
reporter_id integer,
user_id integer,
message text,
created_at timestamp without time zone,
updated_at timestamp without time zone,
message_html text,
cached_markdown_version integer
);
--
-- Name: abuse_reports_id_seq; Type: SEQUENCE; Schema: public; Owner: -
--
CREATE SEQUENCE public.abuse_reports_id_seq
AS integer
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
--
-- Name: abuse_reports id; Type: DEFAULT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.abuse_reports ALTER COLUMN id SET DEFAULT nextval('public.abuse_reports_id_seq'::regclass);
--
--
-- Name: abuse_reports abuse_reports_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.abuse_reports
ADD CONSTRAINT abuse_reports_pkey PRIMARY KEY (id);
--
-- Name: index_abuse_reports_on_user_id; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_abuse_reports_on_user_id ON public.abuse_reports USING btree (user_id);
INSERT INTO "schema_migrations" (version) VALUES
('20200305121159'),
('20200306095654'),
('20200306160521'),
('20200306170211'),
('20200306170321'),
('20200306170531'),
('20200309140540'),
('20200309195209'),
('20200309195710'),
('20200310132654'),
('20200310135823');
SET search_path=public;
CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;
CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public;
CREATE TABLE public.abuse_reports (
id integer NOT NULL,
reporter_id integer,
user_id integer,
message text,
created_at timestamp without time zone,
updated_at timestamp without time zone,
message_html text,
cached_markdown_version integer
);
CREATE SEQUENCE public.abuse_reports_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER TABLE ONLY public.abuse_reports ALTER COLUMN id SET DEFAULT nextval('public.abuse_reports_id_seq'::regclass);
ALTER TABLE ONLY public.abuse_reports
ADD CONSTRAINT abuse_reports_pkey PRIMARY KEY (id);
CREATE INDEX index_abuse_reports_on_user_id ON public.abuse_reports USING btree (user_id);
INSERT INTO "schema_migrations" (version) VALUES
('20200305121159'),
('20200306095654'),
('20200306160521'),
('20200306170211'),
('20200306170321'),
('20200306170531'),
('20200309140540'),
('20200309195209'),
('20200309195710'),
('20200310132654'),
('20200310135823');
......@@ -229,6 +229,7 @@ describe Gitlab::Danger::Helper do
'ee/FOO_VERSION' | :unknown
'db/schema.rb' | :database
'db/structure.sql' | :database
'db/migrate/foo' | :database
'db/post_migrate/foo' | :database
'ee/db/migrate/foo' | :database
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Database::SchemaCleaner do
let(:example_schema) { fixture_file(File.join('gitlab', 'database', 'structure_example.sql')) }
let(:io) { StringIO.new }
subject do
described_class.new(example_schema).clean(io)
io.string
end
it 'removes comments on extensions' do
expect(subject).not_to include('COMMENT ON EXTENSION')
end
it 'includes the plpgsql extension' do
expect(subject).to include('CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;')
end
it 'sets the search_path' do
expect(subject.split("\n").first).to eq('SET search_path=public;')
end
it 'cleans up the full schema as expected (blackbox test with example)' do
expected_schema = fixture_file(File.join('gitlab', 'database', 'structure_example_cleaned.sql'))
expect(subject).to eq(expected_schema)
end
end
......@@ -2,27 +2,29 @@
require 'spec_helper'
# Check consistency of db/schema.rb version, migrations' timestamps, and the latest migration timestamp
# Check consistency of db/structure.sql version, migrations' timestamps, and the latest migration timestamp
# stored in the database's schema_migrations table.
describe ActiveRecord::Schema, schema: :latest do
let(:latest_migration_timestamp) do
let(:all_migrations) do
migrations_paths = %w[db/migrate db/post_migrate]
.map { |path| Rails.root.join(*path, '*') }
migrations = Dir[*migrations_paths]
migrations.map { |migration| File.basename(migration).split('_').first.to_i }.max
migrations.map { |migration| File.basename(migration).split('_').first.to_i }.sort
end
it '> schema version equals last migration timestamp' do
defined_schema_version = File.open(Rails.root.join('db', 'schema.rb')) do |file|
file.find { |line| line =~ /ActiveRecord::Schema.define/ }
end.match(/(\d{4}_\d{2}_\d{2}_\d{6})/)[0].to_i
expect(defined_schema_version).to eq(latest_migration_timestamp)
let(:latest_migration_timestamp) do
all_migrations.max
end
it '> schema version should equal the latest migration timestamp stored in schema_migrations table' do
expect(latest_migration_timestamp).to eq(ActiveRecord::Migrator.current_version.to_i)
end
it 'the schema_migrations table contains all schema versions' do
versions = ActiveRecord::Base.connection.execute('SELECT version FROM schema_migrations ORDER BY version').map { |m| Integer(m['version']) }
expect(versions).to eq(all_migrations)
end
end
......@@ -23,11 +23,7 @@ describe 'gitlab:app namespace rake task' do
end
before(:all) do
Rake.application.rake_require 'tasks/gitlab/helpers'
Rake.application.rake_require 'tasks/gitlab/backup'
Rake.application.rake_require 'tasks/gitlab/shell'
Rake.application.rake_require 'tasks/gitlab/db'
Rake.application.rake_require 'tasks/cache'
Rails.application.load_tasks
# empty task as env is already loaded
Rake::Task.define_task :environment
......
......@@ -16,7 +16,7 @@ describe 'gitlab:db namespace rake task' do
before do
# Stub out db tasks
allow(Rake::Task['db:migrate']).to receive(:invoke).and_return(true)
allow(Rake::Task['db:schema:load']).to receive(:invoke).and_return(true)
allow(Rake::Task['db:structure:load']).to receive(:invoke).and_return(true)
allow(Rake::Task['db:seed_fu']).to receive(:invoke).and_return(true)
end
......@@ -24,14 +24,14 @@ describe 'gitlab:db namespace rake task' do
it 'invokes db:migrate when schema has already been loaded' do
allow(ActiveRecord::Base.connection).to receive(:tables).and_return(%w[table1 table2])
expect(Rake::Task['db:migrate']).to receive(:invoke)
expect(Rake::Task['db:schema:load']).not_to receive(:invoke)
expect(Rake::Task['db:structure:load']).not_to receive(:invoke)
expect(Rake::Task['db:seed_fu']).not_to receive(:invoke)
expect { run_rake_task('gitlab:db:configure') }.not_to raise_error
end
it 'invokes db:shema:load and db:seed_fu when schema is not loaded' do
allow(ActiveRecord::Base.connection).to receive(:tables).and_return([])
expect(Rake::Task['db:schema:load']).to receive(:invoke)
expect(Rake::Task['db:structure:load']).to receive(:invoke)
expect(Rake::Task['db:seed_fu']).to receive(:invoke)
expect(Rake::Task['db:migrate']).not_to receive(:invoke)
expect { run_rake_task('gitlab:db:configure') }.not_to raise_error
......@@ -39,7 +39,7 @@ describe 'gitlab:db namespace rake task' do
it 'invokes db:shema:load and db:seed_fu when there is only a single table present' do
allow(ActiveRecord::Base.connection).to receive(:tables).and_return(['default'])
expect(Rake::Task['db:schema:load']).to receive(:invoke)
expect(Rake::Task['db:structure:load']).to receive(:invoke)
expect(Rake::Task['db:seed_fu']).to receive(:invoke)
expect(Rake::Task['db:migrate']).not_to receive(:invoke)
expect { run_rake_task('gitlab:db:configure') }.not_to raise_error
......@@ -48,7 +48,7 @@ describe 'gitlab:db namespace rake task' do
it 'does not invoke any other rake tasks during an error' do
allow(ActiveRecord::Base).to receive(:connection).and_raise(RuntimeError, 'error')
expect(Rake::Task['db:migrate']).not_to receive(:invoke)
expect(Rake::Task['db:schema:load']).not_to receive(:invoke)
expect(Rake::Task['db:structure:load']).not_to receive(:invoke)
expect(Rake::Task['db:seed_fu']).not_to receive(:invoke)
expect { run_rake_task('gitlab:db:configure') }.to raise_error(RuntimeError, 'error')
# unstub connection so that the database cleaner still works
......@@ -57,8 +57,8 @@ describe 'gitlab:db namespace rake task' do
it 'does not invoke seed after a failed schema_load' do
allow(ActiveRecord::Base.connection).to receive(:tables).and_return([])
allow(Rake::Task['db:schema:load']).to receive(:invoke).and_raise(RuntimeError, 'error')
expect(Rake::Task['db:schema:load']).to receive(:invoke)
allow(Rake::Task['db:structure:load']).to receive(:invoke).and_raise(RuntimeError, 'error')
expect(Rake::Task['db:structure:load']).to receive(:invoke)
expect(Rake::Task['db:seed_fu']).not_to receive(:invoke)
expect(Rake::Task['db:migrate']).not_to receive(:invoke)
expect { run_rake_task('gitlab:db:configure') }.to raise_error(RuntimeError, 'error')
......@@ -79,7 +79,7 @@ describe 'gitlab:db namespace rake task' do
it 'adds post deployment migrations before schema load if the schema is not already loaded' do
allow(ActiveRecord::Base.connection).to receive(:tables).and_return([])
expect(Gitlab::Database).to receive(:add_post_migrate_path_to_rails).and_call_original
expect(Rake::Task['db:schema:load']).to receive(:invoke)
expect(Rake::Task['db:structure:load']).to receive(:invoke)
expect(Rake::Task['db:seed_fu']).to receive(:invoke)
expect(Rake::Task['db:migrate']).not_to receive(:invoke)
expect { run_rake_task('gitlab:db:configure') }.not_to raise_error
......@@ -90,7 +90,7 @@ describe 'gitlab:db namespace rake task' do
allow(ActiveRecord::Base.connection).to receive(:tables).and_return(%w[table1 table2])
expect(Rake::Task['db:migrate']).to receive(:invoke)
expect(Gitlab::Database).not_to receive(:add_post_migrate_path_to_rails)
expect(Rake::Task['db:schema:load']).not_to receive(:invoke)
expect(Rake::Task['db:structure:load']).not_to receive(:invoke)
expect(Rake::Task['db:seed_fu']).not_to receive(:invoke)
expect { run_rake_task('gitlab:db:configure') }.not_to raise_error
expect(rails_paths['db/migrate'].include?(File.join(Rails.root, 'db', 'post_migrate'))).to be(false)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment