circuit_breaker_spec.rb 10 KB
Newer Older
1 2 3
require 'spec_helper'

describe Gitlab::Git::Storage::CircuitBreaker, clean_gitlab_redis_shared_state: true, broken_storage: true do
4
  let(:storage_name) { 'default' }
5
  let(:circuit_breaker) { described_class.new(storage_name, hostname) }
6
  let(:hostname) { Gitlab::Environment.hostname }
7 8 9 10 11 12
  let(:cache_key) { "storage_accessible:#{storage_name}:#{hostname}" }

  before do
    # Override test-settings for the circuitbreaker with something more realistic
    # for these specs.
    stub_storage_settings('default' => {
13
                            'path' => TestEnv.repos_path
14 15
                          },
                          'broken' => {
16
                            'path' => 'tmp/tests/non-existent-repositories'
17 18
                          },
                          'nopath' => { 'path' => nil }
19 20
                         )
  end
21 22 23 24 25 26 27 28 29

  def value_from_redis(name)
    Gitlab::Git::Storage.redis.with do |redis|
      redis.hmget(cache_key, name)
    end.first
  end

  def set_in_redis(name, value)
    Gitlab::Git::Storage.redis.with do |redis|
30
      redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, cache_key)
31 32 33 34 35 36 37 38 39 40 41 42 43 44
      redis.hmset(cache_key, name, value)
    end.first
  end

  describe '.reset_all!' do
    it 'clears all entries form redis' do
      set_in_redis(:failure_count, 10)

      described_class.reset_all!

      key_exists = Gitlab::Git::Storage.redis.with { |redis| redis.exists(cache_key) }

      expect(key_exists).to be_falsey
    end
45 46 47 48

    it 'does not break when there are no keys in redis' do
      expect { described_class.reset_all! }.not_to raise_error
    end
49 50 51 52 53 54 55 56 57 58 59
  end

  describe '.for_storage' do
    it 'only builds a single circuitbreaker per storage' do
      expect(described_class).to receive(:new).once.and_call_original

      breaker = described_class.for_storage('default')

      expect(breaker).to be_a(described_class)
      expect(described_class.for_storage('default')).to eq(breaker)
    end
60 61 62 63 64 65 66 67

    it 'returns a broken circuit breaker for an unknown storage' do
      expect(described_class.for_storage('unknown').circuit_broken?).to be_truthy
    end

    it 'returns a broken circuit breaker when the path is not set' do
      expect(described_class.for_storage('nopath').circuit_broken?).to be_truthy
    end
68 69 70 71 72 73 74
  end

  describe '#initialize' do
    it 'assigns the settings' do
      expect(circuit_breaker.hostname).to eq(hostname)
      expect(circuit_breaker.storage).to eq('default')
      expect(circuit_breaker.storage_path).to eq(TestEnv.repos_path)
75 76 77 78 79 80 81 82
    end
  end

  context 'circuitbreaker settings' do
    before do
      stub_application_setting(circuitbreaker_failure_count_threshold: 0,
                               circuitbreaker_failure_wait_time: 1,
                               circuitbreaker_failure_reset_time: 2,
83 84 85
                               circuitbreaker_storage_timeout: 3,
                               circuitbreaker_access_retries: 4,
                               circuitbreaker_backoff_threshold: 5)
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
    end

    describe '#failure_count_threshold' do
      it 'reads the value from settings' do
        expect(circuit_breaker.failure_count_threshold).to eq(0)
      end
    end

    describe '#failure_wait_time' do
      it 'reads the value from settings' do
        expect(circuit_breaker.failure_wait_time).to eq(1)
      end
    end

    describe '#failure_reset_time' do
      it 'reads the value from settings' do
        expect(circuit_breaker.failure_reset_time).to eq(2)
      end
    end

    describe '#storage_timeout' do
      it 'reads the value from settings' do
        expect(circuit_breaker.storage_timeout).to eq(3)
      end
110
    end
111 112 113 114 115 116 117 118 119 120 121 122

    describe '#access_retries' do
      it 'reads the value from settings' do
        expect(circuit_breaker.access_retries).to eq(4)
      end
    end

    describe '#backoff_threshold' do
      it 'reads the value from settings' do
        expect(circuit_breaker.backoff_threshold).to eq(5)
      end
    end
123 124 125
  end

  describe '#perform' do
126 127 128
    it 'raises the correct exception when the circuit is open' do
      set_in_redis(:last_failure, 1.day.ago.to_f)
      set_in_redis(:failure_count, 999)
129 130

      expect { |b| circuit_breaker.perform(&b) }
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
        .to raise_error do |exception|
        expect(exception).to be_kind_of(Gitlab::Git::Storage::CircuitOpen)
        expect(exception.retry_after).to eq(1800)
      end
    end

    it 'raises the correct exception when backing off' do
      Timecop.freeze do
        set_in_redis(:last_failure, 1.second.ago.to_f)
        set_in_redis(:failure_count, 90)

        expect { |b| circuit_breaker.perform(&b) }
          .to raise_error do |exception|
          expect(exception).to be_kind_of(Gitlab::Git::Storage::Failing)
          expect(exception.retry_after).to eq(30)
        end
      end
148 149 150 151 152 153 154 155 156
    end

    it 'yields the block' do
      expect { |b| circuit_breaker.perform(&b) }
        .to yield_control
    end

    it 'checks if the storage is available' do
      expect(circuit_breaker).to receive(:check_storage_accessible!)
157
                                   .and_call_original
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172

      circuit_breaker.perform { 'hello world' }
    end

    it 'returns the value of the block' do
      result = circuit_breaker.perform { 'return value' }

      expect(result).to eq('return value')
    end

    it 'raises possible errors' do
      expect { circuit_breaker.perform { raise Rugged::OSError.new('Broken') } }
        .to raise_error(Rugged::OSError)
    end

173 174 175
    it 'tracks that the storage was accessible' do
      set_in_redis(:failure_count, 10)
      set_in_redis(:last_failure, Time.now.to_f)
176

177
      circuit_breaker.perform { '' }
178

179 180 181 182
      expect(value_from_redis(:failure_count).to_i).to eq(0)
      expect(value_from_redis(:last_failure)).to be_empty
      expect(circuit_breaker.failure_count).to eq(0)
      expect(circuit_breaker.last_failure).to be_nil
183
    end
184

185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
    it 'maintains known storage keys' do
      Timecop.freeze do
        # Insert an old key to expire
        old_entry = Time.now.to_i - 3.days.to_i
        Gitlab::Git::Storage.redis.with do |redis|
          redis.zadd(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, old_entry, 'to_be_removed')
        end

        circuit_breaker.perform { '' }

        known_keys = Gitlab::Git::Storage.redis.with do |redis|
          redis.zrange(Gitlab::Git::Storage::REDIS_KNOWN_KEYS, 0, -1)
        end

        expect(known_keys).to contain_exactly(cache_key)
      end
    end

203
    it 'only performs the accessibility check once' do
204 205
      expect(Gitlab::Git::Storage::ForkedStorageCheck)
        .to receive(:storage_available?).once.and_call_original
206

207
      2.times { circuit_breaker.perform { '' } }
208 209
    end

210 211 212 213 214 215 216 217 218 219 220
    it 'calls the check with the correct arguments' do
      stub_application_setting(circuitbreaker_storage_timeout: 30,
                               circuitbreaker_access_retries: 3)

      expect(Gitlab::Git::Storage::ForkedStorageCheck)
        .to receive(:storage_available?).with(TestEnv.repos_path, 30, 3)
              .and_call_original

      circuit_breaker.perform { '' }
    end

221
    context 'with the feature disabled' do
222
      before do
223
        stub_feature_flags(git_storage_circuit_breaker: false)
224 225 226 227 228 229 230 231 232
      end

      it 'returns the block without checking accessibility' do
        expect(circuit_breaker).not_to receive(:check_storage_accessible!)

        result = circuit_breaker.perform { 'hello' }

        expect(result).to eq('hello')
      end
233

234 235 236
      it 'allows enabling the feature using an ENV var' do
        stub_env('GIT_STORAGE_CIRCUIT_BREAKER', 'true')
        expect(circuit_breaker).to receive(:check_storage_accessible!)
237

238
        result = circuit_breaker.perform { 'hello' }
239

240
        expect(result).to eq('hello')
241 242 243
      end
    end

244 245
    context 'the storage is not available' do
      let(:storage_name) { 'broken' }
246

247
      it 'raises the correct exception' do
248 249
        expect(circuit_breaker).to receive(:track_storage_inaccessible)

250
        expect { circuit_breaker.perform { '' } }
251 252 253 254 255 256
          .to raise_error do |exception|
          expect(exception).to be_kind_of(Gitlab::Git::Storage::Inaccessible)
          expect(exception.retry_after).to eq(30)
        end
      end

257 258 259
      it 'tracks that the storage was inaccessible' do
        Timecop.freeze do
          expect { circuit_breaker.perform { '' } }.to raise_error(Gitlab::Git::Storage::Inaccessible)
260

261 262 263 264 265 266
          expect(value_from_redis(:failure_count).to_i).to eq(1)
          expect(value_from_redis(:last_failure)).not_to be_empty
          expect(circuit_breaker.failure_count).to eq(1)
          expect(circuit_breaker.last_failure).to be_within(1.second).of(Time.now)
        end
      end
267 268 269
    end
  end

270 271 272 273
  describe '#circuit_broken?' do
    it 'is working when there is no last failure' do
      set_in_redis(:last_failure, nil)
      set_in_redis(:failure_count, 0)
274

275
      expect(circuit_breaker.circuit_broken?).to be_falsey
276 277
    end

278 279 280
    it 'is broken when there are too many failures' do
      set_in_redis(:last_failure, 1.day.ago.to_f)
      set_in_redis(:failure_count, 200)
281

282
      expect(circuit_breaker.circuit_broken?).to be_truthy
283
    end
284
  end
285

286 287 288 289 290
  describe '#backing_off?' do
    it 'is true when there was a recent failure' do
      Timecop.freeze do
        set_in_redis(:last_failure, 1.second.ago.to_f)
        set_in_redis(:failure_count, 90)
291

292 293
        expect(circuit_breaker.backing_off?).to be_truthy
      end
294 295
    end

296 297 298 299
    context 'the `failure_wait_time` is set to 0' do
      before do
        stub_application_setting(circuitbreaker_failure_wait_time: 0)
      end
300

301 302 303 304
      it 'is working even when there are failures' do
        Timecop.freeze do
          set_in_redis(:last_failure, 0.seconds.ago.to_f)
          set_in_redis(:failure_count, 90)
305

306 307 308
          expect(circuit_breaker.backing_off?).to be_falsey
        end
      end
309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328
    end
  end

  describe '#last_failure' do
    it 'returns the last failure time' do
      time = Time.parse("2017-05-26 17:52:30")
      set_in_redis(:last_failure, time.to_i)

      expect(circuit_breaker.last_failure).to eq(time)
    end
  end

  describe '#failure_count' do
    it 'returns the failure count' do
      set_in_redis(:failure_count, 7)

      expect(circuit_breaker.failure_count).to eq(7)
    end
  end
end