Commit 1d6b538f authored by Rémy Coutable's avatar Rémy Coutable

Merge branch 'jv-integrate-workhorse' into 'master'

Vendor gitlab-workhorse into the main repository

See merge request gitlab-org/gitlab!48690
parents b53c6d07 f7895ad5
...@@ -109,3 +109,4 @@ include: ...@@ -109,3 +109,4 @@ include:
- local: .gitlab/ci/releases.gitlab-ci.yml - local: .gitlab/ci/releases.gitlab-ci.yml
- local: .gitlab/ci/notify.gitlab-ci.yml - local: .gitlab/ci/notify.gitlab-ci.yml
- local: .gitlab/ci/dast.gitlab-ci.yml - local: .gitlab/ci/dast.gitlab-ci.yml
- local: .gitlab/ci/workhorse.gitlab-ci.yml
workhorse:
image: golang:1.14
stage: test
needs: []
script:
- rm .git/hooks/post-checkout
- git checkout .
- scripts/update-workhorse check
- make -C workhorse
...@@ -32,6 +32,7 @@ AllCops: ...@@ -32,6 +32,7 @@ AllCops:
- 'builds/**/*' - 'builds/**/*'
- 'plugins/**/*' - 'plugins/**/*'
- 'file_hooks/**/*' - 'file_hooks/**/*'
- 'workhorse/**/*'
CacheRootDirectory: tmp CacheRootDirectory: tmp
MaxFilesInCache: 18000 MaxFilesInCache: 18000
......
...@@ -139,4 +139,12 @@ def warn_or_fail_commits(failed_linters, default_to_fail: true) ...@@ -139,4 +139,12 @@ def warn_or_fail_commits(failed_linters, default_to_fail: true)
end end
end end
lint_commits(git.commits) # As part of https://gitlab.com/groups/gitlab-org/-/epics/4826 we are
# vendoring workhorse commits from the stand-alone gitlab-workhorse
# repo. There is no point in linting commits that we want to vendor as
# is.
def workhorse_changes?
git.diff.any? { |file| file.path.start_with?('workhorse/') }
end
lint_commits(git.commits) unless workhorse_changes?
#!/bin/sh
set -e
WORKHORSE_DIR=workhorse/
WORKHORSE_REF="v$(cat GITLAB_WORKHORSE_VERSION)"
if [ $# -gt 1 ] || ([ $# = 1 ] && [ x$1 != xcheck ]); then
echo "Usage: update-workhorse [check]"
exit 1
fi
clean="$(git status --porcelain)"
if [ -n "$clean" ] ; then
echo 'error: working directory is not clean:'
echo "$clean"
exit 1
fi
git fetch https://gitlab.com/gitlab-org/gitlab-workhorse.git "$WORKHORSE_REF"
git rm -rf --quiet -- "$WORKHORSE_DIR"
git read-tree --prefix="$WORKHORSE_DIR" -u FETCH_HEAD
status="$(git status --porcelain)"
if [ x$1 = xcheck ]; then
if [ -n "$status" ]; then
cat <<MSG
error: $WORKHORSE_DIR does not match $WORKHORSE_REF
During the transition period of https://gitlab.com/groups/gitlab-org/-/epics/4826,
the workhorse/ directory in this repository is read-only. To make changes:
1. Submit a MR to https://gitlab.com/gitlab-org/gitlab-workhorse
2. Once your MR is merged, have a new gitlab-workhorse tag made
by a maintainer
3. Update the GITLAB_WORKHORSE_VERSION file in this repository
4. Run scripts/update-workhorse to update the workhorse/ directory
MSG
exit 1
fi
exit 0
fi
if [ -z "$status" ]; then
echo "warn: $WORKHORSE_DIR is already up to date, exiting without commit"
exit 0
fi
tree=$(git write-tree)
msg="Update vendored workhorse to $WORKHORSE_REF"
commit=$(git commit-tree -p HEAD -p FETCH_HEAD^{commit} -m "$msg" "$tree")
git update-ref HEAD "$commit"
git log -1
testdata/data
testdata/scratch
testdata/public
/gitlab-workhorse
/gitlab-resize-image
/gitlab-zip-cat
/gitlab-zip-metadata
/_build
coverage.html
/*.toml
workflow:
rules: &workflow_rules
# For merge requests, create a pipeline.
- if: '$CI_MERGE_REQUEST_IID'
# For `master` branch, create a pipeline (this includes on schedules, pushes, merges, etc.).
- if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH'
# For tags, create a pipeline.
- if: '$CI_COMMIT_TAG'
# For stable branches, create a pipeline.
- if: '$CI_COMMIT_BRANCH =~ /^[\d-]+-stable$/'
default:
image: golang:1.13
tags:
- gitlab-org
# Disable DIND for SAST because we need to execute a before_script in the gosec-sast job
variables:
SAST_DISABLE_DIND: "true"
verify:
script:
- make verify
changelog:
script:
- _support/check_changelog.sh
rules:
- if: '$CI_MERGE_REQUEST_IID'
.test:
services:
- name: registry.gitlab.com/gitlab-org/build/cng/gitaly:latest
# Disable the hooks so we don't have to stub the GitLab API
command: ["/usr/bin/env", "GITALY_TESTING_NO_GIT_HOOKS=1", "/scripts/process-wrapper"]
alias: gitaly
variables:
GITALY_ADDRESS: "tcp://gitaly:8075"
script:
- go version
- apt-get update && apt-get -y install libimage-exiftool-perl
- make test
test using go 1.13:
extends: .test
image: golang:1.13
test using go 1.14:
extends: .test
image: golang:1.14
test:release:
rules:
- if: '$CI_COMMIT_TAG'
script:
- git describe --exact-match
include:
- template: Security/SAST.gitlab-ci.yml
- template: Security/Dependency-Scanning.gitlab-ci.yml
- template: Security/Secret-Detection.gitlab-ci.yml
gosec-sast:
before_script:
- apk add make
- make install
rules: *workflow_rules
gemnasium-dependency_scanning:
rules: *workflow_rules
secret_detection:
rules: *workflow_rules
code_navigation:
image: golang:latest
allow_failure: true
script:
- go get github.com/sourcegraph/lsif-go/cmd/lsif-go
- lsif-go
artifacts:
reports:
lsif: dump.lsif
* @jacobvosmaer-gitlab @nick.thomas @nolith @patrickbajao
This diff is collapsed.
## Contributing
Thank you for your interest in contributing to this GitLab project! We welcome
all contributions. By participating in this project, you agree to abide by the
[code of conduct](#code-of-conduct).
## Contributor license agreement
By submitting code as an individual you agree to the [individual contributor
license agreement][individual-agreement].
By submitting code as an entity you agree to the [corporate contributor license
agreement][corporate-agreement].
## Code of conduct
As contributors and maintainers of this project, we pledge to respect all people
who contribute through reporting issues, posting feature requests, updating
documentation, submitting pull requests or patches, and other activities.
We are committed to making participation in this project a harassment-free
experience for everyone, regardless of level of experience, gender, gender
identity and expression, sexual orientation, disability, personal appearance,
body size, race, ethnicity, age, or religion.
Examples of unacceptable behavior by participants include the use of sexual
language or imagery, derogatory comments or personal attacks, trolling, public
or private harassment, insults, or other unprofessional conduct.
Project maintainers have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct. Project maintainers who do not follow the
Code of Conduct may be removed from the project team.
This code of conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community.
Instances of abusive, harassing, or otherwise unacceptable behavior can be
reported by emailing contact@gitlab.com.
This Code of Conduct is adapted from the [Contributor Covenant][contributor-covenant], version 1.1.0,
available at [http://contributor-covenant.org/version/1/1/0/](http://contributor-covenant.org/version/1/1/0/).
[contributor-covenant]: http://contributor-covenant.org
[individual-agreement]: https://docs.gitlab.com/ee/legal/individual_contributor_license_agreement.html
[corporate-agreement]: https://docs.gitlab.com/ee/legal/corporate_contributor_license_agreement.html
The MIT License (MIT)
Copyright (c) 2015-2017 GitLab B.V.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
PREFIX=/usr/local
PKG := gitlab.com/gitlab-org/gitlab-workhorse
BUILD_DIR ?= $(CURDIR)
TARGET_DIR ?= $(BUILD_DIR)/_build
TARGET_SETUP := $(TARGET_DIR)/.ok
BIN_BUILD_DIR := $(TARGET_DIR)/bin
COVERAGE_DIR := $(TARGET_DIR)/cover
VERSION_STRING := $(shell git describe)
ifeq ($(strip $(VERSION_STRING)),)
VERSION_STRING := v$(shell cat VERSION)
endif
BUILD_TIME := $(shell date -u +%Y%m%d.%H%M%S)
GOBUILD := go build -ldflags "-X main.Version=$(VERSION_STRING) -X main.BuildTime=$(BUILD_TIME)"
EXE_ALL := gitlab-resize-image gitlab-zip-cat gitlab-zip-metadata gitlab-workhorse
INSTALL := install
BUILD_TAGS := tracer_static tracer_static_jaeger continuous_profiler_stackdriver
MINIMUM_SUPPORTED_GO_VERSION := 1.11
export GOBIN := $(TARGET_DIR)/bin
export PATH := $(GOBIN):$(PATH)
export GOPROXY ?= https://proxy.golang.org
export GO111MODULE=on
LOCAL_GO_FILES = $(shell find . -type f -name '*.go' | grep -v -e /_ -e /testdata/ -e '^\./\.')
define message
@echo "### $(1)"
endef
.NOTPARALLEL:
.PHONY: all
all: clean-build $(EXE_ALL)
$(TARGET_SETUP):
$(call message,"Setting up target directory")
rm -rf "$(TARGET_DIR)"
mkdir -p "$(TARGET_DIR)"
touch "$(TARGET_SETUP)"
gitlab-resize-image: $(TARGET_SETUP) $(shell find cmd/gitlab-resize-image/ -name '*.go')
$(call message,Building $@)
$(GOBUILD) -tags "$(BUILD_TAGS)" -o $(BUILD_DIR)/$@ $(PKG)/cmd/$@
gitlab-zip-cat: $(TARGET_SETUP) $(shell find cmd/gitlab-zip-cat/ -name '*.go')
$(call message,Building $@)
$(GOBUILD) -tags "$(BUILD_TAGS)" -o $(BUILD_DIR)/$@ $(PKG)/cmd/$@
gitlab-zip-metadata: $(TARGET_SETUP) $(shell find cmd/gitlab-zip-metadata/ -name '*.go')
$(call message,Building $@)
$(GOBUILD) -tags "$(BUILD_TAGS)" -o $(BUILD_DIR)/$@ $(PKG)/cmd/$@
gitlab-workhorse: $(TARGET_SETUP) $(shell find . -name '*.go' | grep -v '^\./_')
$(call message,Building $@)
$(GOBUILD) -tags "$(BUILD_TAGS)" -o $(BUILD_DIR)/$@ $(PKG)
.PHONY: install
install: $(EXE_ALL)
$(call message,$@)
mkdir -p $(DESTDIR)$(PREFIX)/bin/
cd $(BUILD_DIR) && $(INSTALL) $(EXE_ALL) $(DESTDIR)$(PREFIX)/bin/
.PHONY: test
test: $(TARGET_SETUP) prepare-tests
$(call message,$@)
@go test -tags "$(BUILD_TAGS)" ./...
@echo SUCCESS
.PHONY: coverage
coverage: $(TARGET_SETUP) prepare-tests
$(call message,$@)
@go test -tags "$(BUILD_TAGS)" -cover -coverprofile=test.coverage ./...
go tool cover -html=test.coverage -o coverage.html
rm -f test.coverage
.PHONY: clean
clean: clean-workhorse clean-build
$(call message,$@)
rm -rf testdata/data testdata/scratch
.PHONY: clean-workhorse
clean-workhorse:
$(call message,$@)
rm -f $(EXE_ALL)
.PHONY: check-version
check-version:
@test -n "$(VERSION)" || (echo "VERSION not set." ; exit 1)
.PHONY: tag
tag: check-version
$(call message,$@)
sh _support/tag.sh "$(VERSION)"
.PHONY: signed_tag
signed_tag: check-version
$(call message,$@)
TAG_OPTS=-s sh _support/tag.sh "$(VERSION)"
.PHONY: clean-build
clean-build:
$(call message,$@)
rm -rf $(TARGET_DIR)
.PHONY: prepare-tests
prepare-tests: testdata/data/group/test.git $(EXE_ALL)
prepare-tests: testdata/scratch
testdata/data/group/test.git:
$(call message,$@)
git clone --quiet --bare https://gitlab.com/gitlab-org/gitlab-test.git $@
testdata/scratch:
mkdir -p testdata/scratch
.PHONY: verify
verify: lint vet detect-context detect-assert check-formatting staticcheck deps-check
.PHONY: lint
lint: $(TARGET_SETUP)
$(call message,Verify: $@)
go install golang.org/x/lint/golint
@_support/lint.sh ./...
.PHONY: vet
vet: $(TARGET_SETUP)
$(call message,Verify: $@)
@go vet ./...
.PHONY: detect-context
detect-context: $(TARGET_SETUP)
$(call message,Verify: $@)
_support/detect-context.sh
.PHONY: detect-assert
detect-assert:
$(call message,Verify: $@)
_support/detect-assert.sh
.PHONY: check-formatting
check-formatting: $(TARGET_SETUP) install-goimports
$(call message,Verify: $@)
@_support/validate-formatting.sh $(LOCAL_GO_FILES)
# Megacheck will tailor some responses given a minimum Go version, so pass that through the CLI
# Additionally, megacheck will not return failure exit codes unless explicitly told to via the
# `-simple.exit-non-zero` `-unused.exit-non-zero` and `-staticcheck.exit-non-zero` flags
.PHONY: staticcheck
staticcheck: $(TARGET_SETUP)
$(call message,Verify: $@)
go install honnef.co/go/tools/cmd/staticcheck
@ $(GOBIN)/staticcheck -go $(MINIMUM_SUPPORTED_GO_VERSION) ./...
# In addition to fixing imports, goimports also formats your code in the same style as gofmt
# so it can be used as a replacement.
.PHONY: fmt
fmt: $(TARGET_SETUP) install-goimports
$(call message,$@)
@goimports -w -local $(PKG) -l $(LOCAL_GO_FILES)
.PHONY: goimports
install-goimports: $(TARGET_SETUP)
$(call message,$@)
go install golang.org/x/tools/cmd/goimports
.PHONY: deps-check
deps-check:
go mod tidy
@if git diff --quiet --exit-code -- go.mod go.sum; then \
echo "go.mod and go.sum are ok"; \
else \
echo ""; \
echo "go.mod and go.sum are modified, please commit them";\
exit 1; \
fi;
# GitLab-Workhorse development process
## Maintainers
GitLab-Workhorse has the following maintainers:
- Nick Thomas `@nick.thomas`
- Jacob Vosmaer `@jacobvosmaer-gitlab`
- Alessio Caiazza `@nolith`
This list is defined at https://about.gitlab.com/team/.
## Changelog
GitLab-Workhorse keeps a changelog which is generated when a new release
is created. The changelog is generated from entries that are included on each
merge request. To generate an entry on your branch run:
`_support/changelog "Change descriptions"`.
After the merge request is created, the ID of the merge request needs to be set
in the generated file. If you already know the merge request ID, run:
`_support/changelog -m <ID> "Change descriptions"`.
Any new merge request must contain either a new entry or a justification in the
merge request description why no changelog entry is needed.
## Merging and reviewing contributions
Contributions must be reviewed by at least one Workhorse maintainer.
The final merge must be performed by a maintainer.
## Releases
New versions of Workhorse can be released by one of the Workhorse
maintainers. The release process is:
- pick a release branch. For x.y.0, use `master`. For all other
versions (x.y.1, x.y.2 etc.) , use `x-y-stable`. Also see [below](#versioning)
- run `make tag VERSION=x.y.z"` or `make signed_tag VERSION=x.y.z` on the release branch. This will
compile the changelog, bump the VERSION file, and make a tag matching it.
- push the branch and the tag to gitlab.com
- the new version will only be deployed to `gitlab.com` if [`GITLAB_WORKHORSE_VERSION`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/GITLAB_WORKHORSE_VERSION) is updated accordingly;
if applicable, please remind the person who originally asked for a new release to make this change
(the MR should include a link back to the [version tag](https://gitlab.com/gitlab-org/gitlab-workhorse/-/tags) and a copy of the changelog)
## Security releases
Workhorse is included in the packages we create for GitLab, and each version of
GitLab specifies the version of Workhorse it uses in the `GITLAB_WORKHORSE_VERSION`
file, so security fixes in Workhorse are tightly coupled to the [general security release](https://about.gitlab.com/handbook/engineering/workflow/#security-issues)
workflow, with some elaborations to account for the changes happening across two
repositories. In particular, the Workhorse maintainer takes responsibility for
creating new patch versions of Workhorse that can be used in the security
release.
As security fixes are backported three releases in addition to master, and
changes need to happen across two repositories, up to eight merge requests, and
four Workhorse releases, can be required to fix a security issue in Workhorse.
This is a lot of overhead, so in general, it is better to fix security issues
without changing Workhorse. Where changes **are** necessary, this section
documents the necessary steps.
If you're working on a security fix in Workhorse, you need two sets of merge
requests:
* The fix itself, in the `gitlab-org/security/gitlab-workhorse` repository
* A merge request to change the version of workhorse included in the GitLab
security release, in the `gitlab-org/security/gitlab` repository.
If the Workhorse maintainer isn't also a GitLab maintainer, reviews will need to
be split across several people. If changes to GitLab **code** are required in
addition to the change of Workhorse version, they both happen in the same merge
request.
Start by creating a single merge request targeting `master` in Workhorse. Ensure
you include a changelog! If code changes are needed in GitLab as well, create a
GitLab merge request targeting `master` at this point, but don't worry about the
`GITLAB_WORKHORSE_VERSION` file yet.
Once the changes have passed review, the Workhorse maintainer will determine the
new versions of Workhorse that will be needed, and communicate that to the
author. To do this, examine the `GITLAB_WORKHORSE_VERSION` file on each GitLab
stable branch; for instance, if the security release consisted of GitLab
versions `12.10.1`, `12.9.2`, `12.8.3`, and `12.7.4`, we would see the following:
```
gitlab$ git fetch security master 12-10-stable-ee 12-9-stable-ee 12-8-stable-ee 12-7-stable-ee`
gitlab$ git show refs/remotes/security/master:GITLAB_WORKHORSE_VERSION
8.30.1
gitlab$ git show refs/remotes/security/12-10-stable-ee:GITLAB_WORKHORSE_VERSION
8.30.1
gitlab$ git show refs/remotes/security/12-9-stable-ee:GITLAB_WORKHORSE_VERSION
8.25.2
gitlab$ git show refs/remotes/security/12-8-stable-ee:GITLAB_WORKHORSE_VERSION
8.21.2
gitlab$ git show refs/remotes/security/12-7-stable-ee:GITLAB_WORKHORSE_VERSION
8.21.2
```
In this example, there are three distinct Workhorse stable branches to be
concerned with, plus Workhorse master: `8-30-stable`, `8-25-stable`, and
`8-21-stable`, and we can predict that we are going to need to create Workhorse
releases `8.30.2`, `8.25.3`, and `8.21.3`.
The author needs to create a merge request targeting each Workhorse stable
branch, and verify that the fix works once backported. They also need to create
(or update, if they already exist) GitLab merge requests, setting the
`GITLAB_WORKHORSE_VERSION` file to the predicted workhorse version, and assign
all the MRs back to the appropriate maintainer(s). The pipeline for the GitLab
MRs will fail until the Workhorse releases have been tagged; you can use the
`=workhorse_branch_name` syntax in the `GITLAB_WORKHORSE_VERSION` file to verify
that the MRs interact as expected, if necessary.
Once all involved maintainers are happy with the overall change, the Workhorse
maintainer will merge each of the Workhorse MRs and generate new Workhorse
releases from the stable branches. The tags will be present on the `security`
mirror and `dev.gitlab.org` **only** at this point.
Once the Workhorse tags exist, the GitLab maintainer ensures that all the GitLab
MRs are green and assigns those MRs on to the release bot.
The release managers merge the GitLab MRs, tag GitLab releases that reference
the new Workhorse tags, and release them in the usual way.
Once the security release is done, the Workhorse maintainer is responsible for
syncing the changes to the `gitlab-org/gitlab-workhorse` repository. Push the
changes to `master`, the new tags, and all the changes to the stable branches.
This process is quite involved, very manual, and extremely error-prone; work is
ongoing on automating it.
## Versioning
Workhorse uses a variation of SemVer. We don't use "normal" SemVer
because we have to be able to integrate into GitLab stable branches.
A version has the format MAJOR.MINOR.PATCH.
- Major and minor releases are tagged on the `master` branch
- If the change is backwards compatible, increment the MINOR counter
- If the change breaks compatibility, increment MAJOR and set MINOR to `0`
- Patch release tags must be made on stable branches
- Only make a patch release when targeting a GitLab stable branch
This means that tags that end in `.0` (e.g. `8.5.0`) must always be on
the master branch, and tags that end in anthing other than `.0` (e.g.
`8.5.2`) must always be on a stable branch.
> The reason we do this is that SemVer suggests something like a
> refactoring constitutes a "patch release", while the GitLab stable
> branch quality standards do not allow for back-porting refactorings
> into a stable branch.
# GitLab Workhorse
GitLab Workhorse is a smart reverse proxy for GitLab. It handles
"large" HTTP requests such as file downloads, file uploads, Git
push/pull and Git archive downloads.
Workhorse itself is not a feature, but there are [several features in
GitLab](doc/architecture/gitlab_features.md) that would not work efficiently without Workhorse.
## Documentation
Workhorse documentation is available in the [`doc` folder of this repository](doc/).
* Architectural overview
* [GitLab features that rely on Workhorse](doc/architecture/gitlab_features.md)
* [Websocket channel support](doc/architecture/channel.md)
* Operating Workhorse
* [Source installation](doc/operations/install.md)
* [Workhorse configuration](doc/operations/configuration.md)
* [Contributing](CONTRIBUTING.md)
* [Adding new features](doc/development/new_features.md)
* [Testing your code](doc/development/tests.md)
## License
This code is distributed under the MIT license, see the [LICENSE](LICENSE) file.
#!/usr/bin/env ruby
#
# Generate a changelog entry file in the correct location.
#
# Automatically stages the file and amends the previous commit if the `--amend`
# argument is used.
#
# Stolen from gitlab-org/gitaly, lifted from gitlab-org/gitlab-ce
require 'optparse'
require 'yaml'
Options = Struct.new(
:amend,
:author,
:dry_run,
:force,
:merge_request,
:title,
:type
)
INVALID_TYPE = -1
class ChangelogOptionParser
Type = Struct.new(:name, :description)
TYPES = [
Type.new('added', 'New feature'),
Type.new('fixed', 'Bug fix'),
Type.new('changed', 'Feature change'),
Type.new('deprecated', 'New deprecation'),
Type.new('removed', 'Feature removal'),
Type.new('security', 'Security fix'),
Type.new('performance', 'Performance improvement'),
Type.new('other', 'Other')
].freeze
TYPES_OFFSET = 1
class << self
def parse(argv)
options = Options.new
parser = OptionParser.new do |opts|
opts.banner = "Usage: #{__FILE__} [options] [title]\n\n"
# Note: We do not provide a shorthand for this in order to match the `git
# commit` interface
opts.on('--amend', 'Amend the previous commit') do |value|
options.amend = value
end
opts.on('-f', '--force', 'Overwrite an existing entry') do |value|
options.force = value
end
opts.on('-m', '--merge-request [integer]', Integer, 'Merge Request ID') do |value|
options.merge_request = value
end
opts.on('-n', '--dry-run', "Don't actually write anything, just print") do |value|
options.dry_run = value
end
opts.on('-u', '--git-username', 'Use Git user.name configuration as the author') do |value|
options.author = git_user_name if value
end
opts.on('-t', '--type [string]', String, "The category of the change, valid options are: #{TYPES.map(&:name).join(', ')}") do |value|
options.type = parse_type(value)
end
opts.on('-h', '--help', 'Print help message') do
$stdout.puts opts
exit
end
end
parser.parse!(argv)
# Title is everything that remains, but let's clean it up a bit
options.title = argv.join(' ').strip.squeeze(' ').tr("\r\n", '')
options
end
def read_type
read_type_message
type = TYPES[$stdin.getc.to_i - TYPES_OFFSET]
assert_valid_type!(type)
type.name
end
private
def parse_type(name)
type_found = TYPES.find do |type|
type.name == name
end
type_found ? type_found.name : INVALID_TYPE
end
def read_type_message
$stdout.puts "\n>> Please specify the index for the category of your change:"
TYPES.each_with_index do |type, index|
$stdout.puts "#{index + TYPES_OFFSET}. #{type.description}"
end
$stdout.print "\n?> "
end
def assert_valid_type!(type)
unless type
$stderr.puts "Invalid category index, please select an index between 1 and #{TYPES.length}"
exit 1
end
end
def git_user_name
%x{git config user.name}.strip
end
end
end
class ChangelogEntry
attr_reader :options
def initialize(options)
@options = options
assert_feature_branch!
assert_title!
assert_new_file!
# Read type from $stdin unless is already set
options.type ||= ChangelogOptionParser.read_type
assert_valid_type!
$stdout.puts "\e[32mcreate\e[0m #{file_path}"
$stdout.puts contents
unless options.dry_run
write
amend_commit if options.amend
end
end
private
def contents
yaml_content = YAML.dump(
'title' => title,
'merge_request' => options.merge_request,
'author' => options.author,
'type' => options.type
)
remove_trailing_whitespace(yaml_content)
end
def write
File.write(file_path, contents)
end
def amend_commit
%x{git add #{file_path}}
exec("git commit --amend")
end
def fail_with(message)
$stderr.puts "\e[31merror\e[0m #{message}"
exit 1
end
def assert_feature_branch!
return unless branch_name == 'master'
fail_with "Create a branch first!"
end
def assert_new_file!
return unless File.exist?(file_path)
return if options.force
fail_with "#{file_path} already exists! Use `--force` to overwrite."
end
def assert_title!
return if options.title.length > 0 || options.amend
fail_with "Provide a title for the changelog entry or use `--amend`" \
" to use the title from the previous commit."
end
def assert_valid_type!
return unless options.type && options.type == INVALID_TYPE
fail_with 'Invalid category given!'
end
def title
if options.title.empty?
last_commit_subject
else
options.title
end
end
def last_commit_subject
%x{git log --format="%s" -1}.strip
end
def file_path
File.join(
unreleased_path,
branch_name.gsub(/[^\w-]/, '-') << '.yml'
)
end
def unreleased_path
path = File.join('changelogs', 'unreleased')
path = File.join('ee', path) if ee?
path
end
def ee?
@ee ||= File.exist?(File.expand_path('../CHANGELOG-EE.md', __dir__))
end
def branch_name
@branch_name ||= %x{git symbolic-ref --short HEAD}.strip
end
def remove_trailing_whitespace(yaml_content)
yaml_content.gsub(/ +$/, '')
end
end
if $0 == __FILE__
options = ChangelogOptionParser.parse(ARGV)
ChangelogEntry.new(options)
end
# vim: ft=ruby
#!/bin/sh
set -e
# we skip the changelog check if the merge requet title ends with "NO CHANGELOG"
if echo "$CI_MERGE_REQUEST_TITLE" | grep -q ' NO CHANGELOG$'; then
echo "Changelog not needed"
exit 0
fi
target=${CI_MERGE_REQUEST_TARGET_BRANCH_NAME:-master}
if git diff --name-only "origin/$target" | grep -q '^changelogs/' ; then
echo "Changelog included"
else
echo "Please add a changelog running '_support/changelog'"
echo "or disable this check adding 'NO CHANGELOG' at the end of the merge request title"
echo "/title $CI_MERGE_REQUEST_TITLE NO CHANGELOG"
exit 1
fi
#!/bin/sh
git grep 'testify/assert"' | \
grep -e '^[^:]*\.go' | \
awk '{
print "error: please use testify/require instead of testify/assert"
print
exit 1
}'
#!/bin/sh
git grep 'context.\(Background\|TODO\)' | \
grep -v -e '^[^:]*_test\.go:' -v -e "lint:allow context.Background" -e '^vendor/' -e '^_support/' -e '^cmd/[^:]*/main.go' | \
grep -e '^[^:]*\.go' | \
awk '{
print "Found disallowed use of context.Background or TODO"
print
exit 1
}'
package main
import (
"fmt"
"net/http"
"os"
"gitlab.com/gitlab-org/labkit/log"
)
func main() {
if len(os.Args) == 1 {
fmt.Fprintf(os.Stderr, "Usage: %s /path/to/test-repo.git\n", os.Args[0])
os.Exit(1)
}
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, `{"RepoPath":"%s","ArchivePath":"%s"}`, os.Args[1], r.URL.Path)
})
log.Fatal(http.ListenAndServe("localhost:8080", nil))
}
#!/usr/bin/env ruby
# Generates the changelog from the yaml entries in changelogs/unreleased
#
# Lifted form gitlab-org/gitaly
require 'yaml'
require 'fileutils'
class ChangelogEntry
attr_reader :title, :merge_request, :type, :author
def initialize(file_path)
yaml = YAML.safe_load(File.read(file_path))
@title = yaml['title']
@merge_request = yaml['merge_request']
@type = yaml['type']
@author = yaml['author']
end
def to_s
str = ""
str << "- #{title}\n"
str << " https://gitlab.com/gitlab-org/gitlab-workhorse/-/merge_requests/#{merge_request}\n"
str << " Contributed by #{author}\n" if author
str
end
end
ROOT_DIR = File.expand_path('../..', __FILE__)
UNRELEASED_ENTRIES = File.join(ROOT_DIR, 'changelogs', 'unreleased')
CHANGELOG_FILE = File.join(ROOT_DIR, 'CHANGELOG')
def main(version)
entries = []
Dir["#{UNRELEASED_ENTRIES}/*.yml"].each do |yml|
entries << ChangelogEntry.new(yml)
FileUtils.rm(yml)
end
sections = []
types = entries.map(&:type).uniq.sort
types.each do |type|
text = ''
text << "### #{type.capitalize}\n"
entries.each do |e|
next unless e.type == type
text << e.to_s
end
sections << text
end
sections << '- No changes.' if sections.empty?
new_version_entry = ["## v#{version}\n\n", sections.join("\n"), "\n"].join
current_changelog = File.read(CHANGELOG_FILE).lines
header = current_changelog.shift(2)
new_changelog = [header, new_version_entry, current_changelog.join]
File.write(CHANGELOG_FILE, new_changelog.join)
end
unless ARGV.count == 1
warn "Usage: #{$0} VERSION"
warn "Specify version as x.y.z"
abort
end
main(ARGV.first)
#!/bin/sh
# Unfortunately, workhorse fails many lint checks which we currently ignore
LINT_RESULT=$(golint "$@"|grep -Ev 'should have|should be|use ALL_CAPS in Go names')
if [ -n "${LINT_RESULT}" ]; then
echo >&2 "Formatting or imports need fixing: 'make fmt'"
echo ">>${LINT_RESULT}<<"
exit 1
fi
set -e
main() {
version=$1
set_version
changelog
git commit VERSION -m "Update VERSION to $version"
tag_name="v${version}"
git tag $TAG_OPTS -m "Version ${version}" -a ${tag_name}
git show ${tag_name}
cat <<'EOF'
Remember to now push your tag, either to gitlab.com (for a
normal release) or dev.gitlab.org (for a security release).
EOF
}
set_version() {
if ! echo "${version}" | grep -q '^[0-9]\+\.[0-9]\+\.[0-9]\+$' ; then
echo "Invalid VERSION: ${version}"
exit 1
fi
if git tag --list | grep -q "^v${version}$" ; then
echo "Tag already exists for ${version}"
exit 1
fi
echo "$version" > VERSION
}
changelog() {
_support/generate_changelog "$version"
git commit CHANGELOG changelogs/unreleased --file - <<EOF
Update CHANGELOG for ${version}
[ci skip]
EOF
}
main "$@"
#!/bin/sh
IMPORT_RESULT=$(goimports -e -local "gitlab.com/gitlab-org/gitlab-workhorse" -l "$@")
if [ -n "${IMPORT_RESULT}" ]; then
echo >&2 "Formatting or imports need fixing: 'make fmt'"
echo "${IMPORT_RESULT}"
exit 1
fi
package main
import (
"context"
"fmt"
"net/http"
"net/http/httptest"
"regexp"
"testing"
"gitlab.com/gitlab-org/labkit/correlation"
"github.com/dgrijalva/jwt-go"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/api"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/helper"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/secret"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/testhelper"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/upstream/roundtripper"
)
func okHandler(w http.ResponseWriter, _ *http.Request, _ *api.Response) {
w.WriteHeader(201)
fmt.Fprint(w, "{\"status\":\"ok\"}")
}
func runPreAuthorizeHandler(t *testing.T, ts *httptest.Server, suffix string, url *regexp.Regexp, apiResponse interface{}, returnCode, expectedCode int) *httptest.ResponseRecorder {
if ts == nil {
ts = testAuthServer(t, url, nil, returnCode, apiResponse)
defer ts.Close()
}
// Create http request
ctx := correlation.ContextWithCorrelation(context.Background(), "12345678")
httpRequest, err := http.NewRequestWithContext(ctx, "GET", "/address", nil)
require.NoError(t, err)
parsedURL := helper.URLMustParse(ts.URL)
testhelper.ConfigureSecret()
a := api.NewAPI(parsedURL, "123", roundtripper.NewTestBackendRoundTripper(parsedURL))
response := httptest.NewRecorder()
a.PreAuthorizeHandler(okHandler, suffix).ServeHTTP(response, httpRequest)
require.Equal(t, expectedCode, response.Code)
return response
}
func TestPreAuthorizeHappyPath(t *testing.T) {
runPreAuthorizeHandler(
t, nil, "/authorize",
regexp.MustCompile(`/authorize\z`),
&api.Response{},
200, 201)
}
func TestPreAuthorizeSuffix(t *testing.T) {
runPreAuthorizeHandler(
t, nil, "/different-authorize",
regexp.MustCompile(`/authorize\z`),
&api.Response{},
200, 404)
}
func TestPreAuthorizeJsonFailure(t *testing.T) {
runPreAuthorizeHandler(
t, nil, "/authorize",
regexp.MustCompile(`/authorize\z`),
"not-json",
200, 500)
}
func TestPreAuthorizeContentTypeFailure(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_, err := w.Write([]byte(`{"hello":"world"}`))
require.NoError(t, err, "write auth response")
}))
defer ts.Close()
runPreAuthorizeHandler(
t, ts, "/authorize",
regexp.MustCompile(`/authorize\z`),
"",
200, 200)
}
func TestPreAuthorizeRedirect(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, "/", http.StatusMovedPermanently)
}))
defer ts.Close()
runPreAuthorizeHandler(t, ts, "/willredirect",
regexp.MustCompile(`/willredirect\z`),
"",
301, 301)
}
func TestPreAuthorizeJWT(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
token, err := jwt.Parse(r.Header.Get(secret.RequestHeader), func(token *jwt.Token) (interface{}, error) {
// Don't forget to validate the alg is what you expect:
if _, ok := token.Method.(*jwt.SigningMethodHMAC); !ok {
return nil, fmt.Errorf("Unexpected signing method: %v", token.Header["alg"])
}
testhelper.ConfigureSecret()
secretBytes, err := secret.Bytes()
if err != nil {
return nil, fmt.Errorf("read secret from file: %v", err)
}
return secretBytes, nil
})
require.NoError(t, err, "decode token")
claims, ok := token.Claims.(jwt.MapClaims)
require.True(t, ok, "claims cast")
require.True(t, token.Valid, "JWT token valid")
require.Equal(t, "gitlab-workhorse", claims["iss"], "JWT token issuer")
w.Header().Set("Content-Type", api.ResponseContentType)
_, err = w.Write([]byte(`{"hello":"world"}`))
require.NoError(t, err, "write auth response")
}))
defer ts.Close()
runPreAuthorizeHandler(
t, ts, "/authorize",
regexp.MustCompile(`/authorize\z`),
"",
200, 201)
}
package main
import (
"fmt"
"net/url"
)
func parseAuthBackend(authBackend string) (*url.URL, error) {
backendURL, err := url.Parse(authBackend)
if err != nil {
return nil, err
}
if backendURL.Host == "" {
backendURL, err = url.Parse("http://" + authBackend)
if err != nil {
return nil, err
}
}
if backendURL.Scheme != "http" {
return nil, fmt.Errorf("invalid scheme, only 'http' is allowed: %q", authBackend)
}
if backendURL.Host == "" {
return nil, fmt.Errorf("missing host in %q", authBackend)
}
return backendURL, nil
}
package main
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestParseAuthBackendFailure(t *testing.T) {
failures := []string{
"",
"ftp://localhost",
"https://example.com",
}
for _, example := range failures {
t.Run(example, func(t *testing.T) {
_, err := parseAuthBackend(example)
require.Error(t, err)
})
}
}
func TestParseAuthBackend(t *testing.T) {
successes := []struct{ input, host, scheme string }{
{"http://localhost:8080", "localhost:8080", "http"},
{"localhost:3000", "localhost:3000", "http"},
{"http://localhost", "localhost", "http"},
{"localhost", "localhost", "http"},
}
for _, example := range successes {
t.Run(example.input, func(t *testing.T) {
result, err := parseAuthBackend(example.input)
require.NoError(t, err)
require.Equal(t, example.host, result.Host, "host")
require.Equal(t, example.scheme, result.Scheme, "scheme")
})
}
}
package main
import (
"net/http"
"net/http/httptest"
"net/url"
"regexp"
"testing"
"github.com/gorilla/websocket"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/config"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/helper"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/testhelper"
)
const cablePath = "/-/cable"
func TestSingleBackend(t *testing.T) {
cableServerConns, cableBackendServer := startCableServer()
defer cableBackendServer.Close()
config := newUpstreamWithCableConfig(cableBackendServer.URL, "")
workhorse := startWorkhorseServerWithConfig(config)
defer workhorse.Close()
cableURL := websocketURL(workhorse.URL, cablePath)
client, _, err := dialWebsocket(cableURL, nil)
require.NoError(t, err)
defer client.Close()
server := (<-cableServerConns).conn
defer server.Close()
require.NoError(t, say(client, "hello"))
requireReadMessage(t, server, websocket.TextMessage, "hello")
require.NoError(t, say(server, "world"))
requireReadMessage(t, client, websocket.TextMessage, "world")
}
func TestSeparateCableBackend(t *testing.T) {
authBackendServer := testhelper.TestServerWithHandler(regexp.MustCompile(`.`), http.HandlerFunc(http.NotFound))
defer authBackendServer.Close()
cableServerConns, cableBackendServer := startCableServer()
defer cableBackendServer.Close()
config := newUpstreamWithCableConfig(authBackendServer.URL, cableBackendServer.URL)
workhorse := startWorkhorseServerWithConfig(config)
defer workhorse.Close()
cableURL := websocketURL(workhorse.URL, cablePath)
client, _, err := dialWebsocket(cableURL, nil)
require.NoError(t, err)
defer client.Close()
server := (<-cableServerConns).conn
defer server.Close()
require.NoError(t, say(client, "hello"))
requireReadMessage(t, server, websocket.TextMessage, "hello")
require.NoError(t, say(server, "world"))
requireReadMessage(t, client, websocket.TextMessage, "world")
}
func startCableServer() (chan connWithReq, *httptest.Server) {
upgrader := &websocket.Upgrader{}
connCh := make(chan connWithReq, 1)
server := testhelper.TestServerWithHandler(regexp.MustCompile(cablePath), webSocketHandler(upgrader, connCh))
return connCh, server
}
func newUpstreamWithCableConfig(authBackend string, cableBackend string) *config.Config {
var cableBackendURL *url.URL
if cableBackend != "" {
cableBackendURL = helper.URLMustParse(cableBackend)
}
return &config.Config{
Version: "123",
DocumentRoot: testDocumentRoot,
Backend: helper.URLMustParse(authBackend),
CableBackend: cableBackendURL,
}
}
package main
import (
"bytes"
"encoding/pem"
"fmt"
"net"
"net/http"
"net/http/httptest"
"net/url"
"path"
"strings"
"testing"
"time"
"github.com/gorilla/websocket"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/labkit/log"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/api"
)
var (
envTerminalPath = fmt.Sprintf("%s/-/environments/1/terminal.ws", testProject)
jobTerminalPath = fmt.Sprintf("%s/-/jobs/1/terminal.ws", testProject)
servicesProxyWSPath = fmt.Sprintf("%s/-/jobs/1/proxy.ws", testProject)
)
type connWithReq struct {
conn *websocket.Conn
req *http.Request
}
func TestChannelHappyPath(t *testing.T) {
tests := []struct {
name string
channelPath string
}{
{"environments", envTerminalPath},
{"jobs", jobTerminalPath},
{"services", servicesProxyWSPath},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
serverConns, clientURL, close := wireupChannel(t, test.channelPath, nil, "channel.k8s.io")
defer close()
client, _, err := dialWebsocket(clientURL, nil, "terminal.gitlab.com")
require.NoError(t, err)
server := (<-serverConns).conn
defer server.Close()
message := "test message"
// channel.k8s.io: server writes to channel 1, STDOUT
require.NoError(t, say(server, "\x01"+message))
requireReadMessage(t, client, websocket.BinaryMessage, message)
require.NoError(t, say(client, message))
// channel.k8s.io: client writes get put on channel 0, STDIN
requireReadMessage(t, server, websocket.BinaryMessage, "\x00"+message)
// Closing the client should send an EOT signal to the server's STDIN
client.Close()
requireReadMessage(t, server, websocket.BinaryMessage, "\x00\x04")
})
}
}
func TestChannelBadTLS(t *testing.T) {
_, clientURL, close := wireupChannel(t, envTerminalPath, badCA, "channel.k8s.io")
defer close()
_, _, err := dialWebsocket(clientURL, nil, "terminal.gitlab.com")
require.Equal(t, websocket.ErrBadHandshake, err, "unexpected error %v", err)
}
func TestChannelSessionTimeout(t *testing.T) {
serverConns, clientURL, close := wireupChannel(t, envTerminalPath, timeout, "channel.k8s.io")
defer close()
client, _, err := dialWebsocket(clientURL, nil, "terminal.gitlab.com")
require.NoError(t, err)
sc := <-serverConns
defer sc.conn.Close()
client.SetReadDeadline(time.Now().Add(time.Duration(2) * time.Second))
_, _, err = client.ReadMessage()
require.True(t, websocket.IsCloseError(err, websocket.CloseAbnormalClosure), "Client connection was not closed, got %v", err)
}
func TestChannelProxyForwardsHeadersFromUpstream(t *testing.T) {
hdr := make(http.Header)
hdr.Set("Random-Header", "Value")
serverConns, clientURL, close := wireupChannel(t, envTerminalPath, setHeader(hdr), "channel.k8s.io")
defer close()
client, _, err := dialWebsocket(clientURL, nil, "terminal.gitlab.com")
require.NoError(t, err)
defer client.Close()
sc := <-serverConns
defer sc.conn.Close()
require.Equal(t, "Value", sc.req.Header.Get("Random-Header"), "Header specified by upstream not sent to remote")
}
func TestChannelProxyForwardsXForwardedForFromClient(t *testing.T) {
serverConns, clientURL, close := wireupChannel(t, envTerminalPath, nil, "channel.k8s.io")
defer close()
hdr := make(http.Header)
hdr.Set("X-Forwarded-For", "127.0.0.2")
client, _, err := dialWebsocket(clientURL, hdr, "terminal.gitlab.com")
require.NoError(t, err)
defer client.Close()
clientIP, _, err := net.SplitHostPort(client.LocalAddr().String())
require.NoError(t, err)
sc := <-serverConns
defer sc.conn.Close()
require.Equal(t, "127.0.0.2, "+clientIP, sc.req.Header.Get("X-Forwarded-For"), "X-Forwarded-For from client not sent to remote")
}
func wireupChannel(t *testing.T, channelPath string, modifier func(*api.Response), subprotocols ...string) (chan connWithReq, string, func()) {
serverConns, remote := startWebsocketServer(subprotocols...)
authResponse := channelOkBody(remote, nil, subprotocols...)
if modifier != nil {
modifier(authResponse)
}
upstream := testAuthServer(t, nil, nil, 200, authResponse)
workhorse := startWorkhorseServer(upstream.URL)
return serverConns, websocketURL(workhorse.URL, channelPath), func() {
workhorse.Close()
upstream.Close()
remote.Close()
}
}
func startWebsocketServer(subprotocols ...string) (chan connWithReq, *httptest.Server) {
upgrader := &websocket.Upgrader{Subprotocols: subprotocols}
connCh := make(chan connWithReq, 1)
server := httptest.NewTLSServer(webSocketHandler(upgrader, connCh))
return connCh, server
}
func webSocketHandler(upgrader *websocket.Upgrader, connCh chan connWithReq) http.HandlerFunc {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
logEntry := log.WithFields(log.Fields{
"method": r.Method,
"url": r.URL,
"headers": r.Header,
})
logEntry.Info("WEBSOCKET")
conn, err := upgrader.Upgrade(w, r, nil)
if err != nil {
logEntry.WithError(err).Error("WEBSOCKET Upgrade failed")
return
}
connCh <- connWithReq{conn, r}
// The connection has been hijacked so it's OK to end here
})
}
func channelOkBody(remote *httptest.Server, header http.Header, subprotocols ...string) *api.Response {
out := &api.Response{
Channel: &api.ChannelSettings{
Url: websocketURL(remote.URL),
Header: header,
Subprotocols: subprotocols,
MaxSessionTime: 0,
},
}
if len(remote.TLS.Certificates) > 0 {
data := bytes.NewBuffer(nil)
pem.Encode(data, &pem.Block{Type: "CERTIFICATE", Bytes: remote.TLS.Certificates[0].Certificate[0]})
out.Channel.CAPem = data.String()
}
return out
}
func badCA(authResponse *api.Response) {
authResponse.Channel.CAPem = "Bad CA"
}
func timeout(authResponse *api.Response) {
authResponse.Channel.MaxSessionTime = 1
}
func setHeader(hdr http.Header) func(*api.Response) {
return func(authResponse *api.Response) {
authResponse.Channel.Header = hdr
}
}
func dialWebsocket(url string, header http.Header, subprotocols ...string) (*websocket.Conn, *http.Response, error) {
dialer := &websocket.Dialer{
Subprotocols: subprotocols,
}
return dialer.Dial(url, header)
}
func websocketURL(httpURL string, suffix ...string) string {
url, err := url.Parse(httpURL)
if err != nil {
panic(err)
}
switch url.Scheme {
case "http":
url.Scheme = "ws"
case "https":
url.Scheme = "wss"
default:
panic("Unknown scheme: " + url.Scheme)
}
url.Path = path.Join(url.Path, strings.Join(suffix, "/"))
return url.String()
}
func say(conn *websocket.Conn, message string) error {
return conn.WriteMessage(websocket.TextMessage, []byte(message))
}
func requireReadMessage(t *testing.T, conn *websocket.Conn, expectedMessageType int, expectedData string) {
messageType, data, err := conn.ReadMessage()
require.NoError(t, err)
require.Equal(t, expectedMessageType, messageType, "message type")
require.Equal(t, expectedData, string(data), "message data")
}
package main
import (
"fmt"
"image"
"os"
"strconv"
"github.com/disintegration/imaging"
)
func main() {
if err := _main(); err != nil {
fmt.Fprintf(os.Stderr, "%s: fatal: %v\n", os.Args[0], err)
os.Exit(1)
}
}
func _main() error {
widthParam := os.Getenv("GL_RESIZE_IMAGE_WIDTH")
requestedWidth, err := strconv.Atoi(widthParam)
if err != nil {
return fmt.Errorf("GL_RESIZE_IMAGE_WIDTH: %w", err)
}
src, formatName, err := image.Decode(os.Stdin)
if err != nil {
return fmt.Errorf("decode: %w", err)
}
imagingFormat, err := imaging.FormatFromExtension(formatName)
if err != nil {
return fmt.Errorf("find imaging format: %w", err)
}
image := imaging.Resize(src, requestedWidth, 0, imaging.Lanczos)
return imaging.Encode(os.Stdout, image, imagingFormat)
}
package main
import (
"archive/zip"
"context"
"errors"
"flag"
"fmt"
"io"
"os"
"gitlab.com/gitlab-org/labkit/mask"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/zipartifacts"
)
const progName = "gitlab-zip-cat"
var Version = "unknown"
var printVersion = flag.Bool("version", false, "Print version and exit")
func main() {
flag.Parse()
version := fmt.Sprintf("%s %s", progName, Version)
if *printVersion {
fmt.Println(version)
os.Exit(0)
}
archivePath := os.Getenv("ARCHIVE_PATH")
encodedFileName := os.Getenv("ENCODED_FILE_NAME")
if len(os.Args) != 1 || archivePath == "" || encodedFileName == "" {
fmt.Fprintf(os.Stderr, "Usage: %s\n", progName)
fmt.Fprintf(os.Stderr, "Env: ARCHIVE_PATH=https://path.to/archive.zip or /path/to/archive.zip\n")
fmt.Fprintf(os.Stderr, "Env: ENCODED_FILE_NAME=base64-encoded-file-name\n")
os.Exit(1)
}
scrubbedArchivePath := mask.URL(archivePath)
fileName, err := zipartifacts.DecodeFileEntry(encodedFileName)
if err != nil {
fatalError(fmt.Errorf("decode entry %q", encodedFileName), err)
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
archive, err := zipartifacts.OpenArchive(ctx, archivePath)
if err != nil {
fatalError(errors.New("open archive"), err)
}
file := findFileInZip(fileName, archive)
if file == nil {
fatalError(fmt.Errorf("find %q in %q: not found", fileName, scrubbedArchivePath), zipartifacts.ErrorCode[zipartifacts.CodeEntryNotFound])
}
// Start decompressing the file
reader, err := file.Open()
if err != nil {
fatalError(fmt.Errorf("open %q in %q", fileName, scrubbedArchivePath), err)
}
defer reader.Close()
if _, err := fmt.Printf("%d\n", file.UncompressedSize64); err != nil {
fatalError(fmt.Errorf("write file size invalid"), err)
}
if _, err := io.Copy(os.Stdout, reader); err != nil {
fatalError(fmt.Errorf("write %q from %q to stdout", fileName, scrubbedArchivePath), err)
}
}
func findFileInZip(fileName string, archive *zip.Reader) *zip.File {
for _, file := range archive.File {
if file.Name == fileName {
return file
}
}
return nil
}
func fatalError(contextErr error, statusErr error) {
code := zipartifacts.ExitCodeByError(statusErr)
fmt.Fprintf(os.Stderr, "%s error: %v - %v, code: %d\n", progName, statusErr, contextErr, code)
if code > 0 {
os.Exit(code)
} else {
os.Exit(1)
}
}
package limit
import (
"errors"
"io"
"sync/atomic"
)
var ErrLimitExceeded = errors.New("reader limit exceeded")
const megabyte = 1 << 20
// LimitedReaderAt supports running a callback in case of reaching a read limit
// (bytes), and allows using a smaller limit than a defined offset for a read.
type LimitedReaderAt struct {
read int64
limit int64
parent io.ReaderAt
limitFunc func(int64)
}
func (r *LimitedReaderAt) ReadAt(p []byte, off int64) (int, error) {
if max := r.limit - r.read; int64(len(p)) > max {
p = p[0:max]
}
n, err := r.parent.ReadAt(p, off)
atomic.AddInt64(&r.read, int64(n))
if r.read >= r.limit {
r.limitFunc(r.read)
return n, ErrLimitExceeded
}
return n, err
}
func NewLimitedReaderAt(reader io.ReaderAt, limit int64, limitFunc func(int64)) io.ReaderAt {
return &LimitedReaderAt{parent: reader, limit: limit, limitFunc: limitFunc}
}
// SizeToLimit tries to dermine an appropriate limit in bytes for an archive of
// a given size. If the size is less than 1 gigabyte we always limit a reader
// to 100 megabytes, otherwise the limit is 10% of a given size.
func SizeToLimit(size int64) int64 {
if size <= 1024*megabyte {
return 100 * megabyte
}
return size / 10
}
package limit
import (
"strings"
"testing"
"github.com/stretchr/testify/require"
)
func TestReadAt(t *testing.T) {
t.Run("when limit has not been reached", func(t *testing.T) {
r := strings.NewReader("some string to read")
buf := make([]byte, 11)
reader := NewLimitedReaderAt(r, 32, func(n int64) {
require.Zero(t, n)
})
p, err := reader.ReadAt(buf, 0)
require.NoError(t, err)
require.Equal(t, 11, p)
require.Equal(t, "some string", string(buf))
})
t.Run("when read limit is exceeded", func(t *testing.T) {
r := strings.NewReader("some string to read")
buf := make([]byte, 11)
reader := NewLimitedReaderAt(r, 9, func(n int64) {
require.Equal(t, 9, int(n))
})
p, err := reader.ReadAt(buf, 0)
require.Error(t, err)
require.Equal(t, 9, p)
require.Equal(t, "some stri\x00\x00", string(buf))
})
t.Run("when offset is higher than a limit", func(t *testing.T) {
r := strings.NewReader("some string to read")
buf := make([]byte, 4)
reader := NewLimitedReaderAt(r, 5, func(n int64) {
require.Zero(t, n)
})
p, err := reader.ReadAt(buf, 15)
require.NoError(t, err)
require.Equal(t, 4, p)
require.Equal(t, "read", string(buf))
})
t.Run("when a read starts at the limit", func(t *testing.T) {
r := strings.NewReader("some string to read")
buf := make([]byte, 11)
reader := NewLimitedReaderAt(r, 10, func(n int64) {
require.Equal(t, 10, int(n))
})
reader.ReadAt(buf, 0)
p, err := reader.ReadAt(buf, 0)
require.EqualError(t, err, ErrLimitExceeded.Error())
require.Equal(t, 0, p)
require.Equal(t, "some strin\x00", string(buf))
})
}
func TestSizeToLimit(t *testing.T) {
tests := []struct {
size int64
limit int64
name string
}{
{size: 1, limit: 104857600, name: "1b to 100mb"},
{size: 100, limit: 104857600, name: "100b to 100mb"},
{size: 104857600, limit: 104857600, name: "100mb to 100mb"},
{size: 1073741824, limit: 104857600, name: "1gb to 100mb"},
{size: 10737418240, limit: 1073741824, name: "10gb to 1gb"},
{size: 53687091200, limit: 5368709120, name: "50gb to 5gb"},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
require.Equal(t, test.limit, SizeToLimit(test.size))
})
}
}
package main
import (
"context"
"flag"
"fmt"
"io"
"os"
"gitlab.com/gitlab-org/gitlab-workhorse/cmd/gitlab-zip-metadata/limit"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/zipartifacts"
)
const progName = "gitlab-zip-metadata"
var Version = "unknown"
var printVersion = flag.Bool("version", false, "Print version and exit")
func main() {
flag.Parse()
version := fmt.Sprintf("%s %s", progName, Version)
if *printVersion {
fmt.Println(version)
os.Exit(0)
}
if len(os.Args) != 2 {
fmt.Fprintf(os.Stderr, "Usage: %s FILE.ZIP\n", progName)
os.Exit(1)
}
readerFunc := func(reader io.ReaderAt, size int64) io.ReaderAt {
readLimit := limit.SizeToLimit(size)
return limit.NewLimitedReaderAt(reader, readLimit, func(read int64) {
fmt.Fprintf(os.Stderr, "%s: zip archive limit exceeded after reading %d bytes\n", progName, read)
fatalError(zipartifacts.ErrorCode[zipartifacts.CodeLimitsReached])
})
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
archive, err := zipartifacts.OpenArchiveWithReaderFunc(ctx, os.Args[1], readerFunc)
if err != nil {
fatalError(err)
}
if err := zipartifacts.GenerateZipMetadata(os.Stdout, archive); err != nil {
fatalError(err)
}
}
func fatalError(err error) {
code := zipartifacts.ExitCodeByError(err)
fmt.Fprintf(os.Stderr, "%s error: %v, code: %d\n", progName, err, code)
if code > 0 {
os.Exit(code)
} else {
os.Exit(1)
}
}
[redis]
URL = "unix:/home/git/gitlab/redis/redis.socket"
[object_storage]
provider = "AWS" # Allowed options: AWS, AzureRM
[object_storage.s3]
aws_access_key_id = "YOUR AWS ACCESS KEY"
aws_secret_access_key = "YOUR AWS SECRET ACCESS KEY"
[object_store.azurerm]
azure_storage_account_name = "YOUR ACCOUNT NAME"
azure_storage_access_key = "YOUR ACCOUNT KEY"
[image_resizer]
max_scaler_procs = 4 # Recommendation: CPUs / 2
max_filesize = 250000
package main
import (
"flag"
"io"
"io/ioutil"
"net/url"
"os"
"testing"
"time"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/config"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/queueing"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/upstream"
)
func TestConfigFile(t *testing.T) {
f, err := ioutil.TempFile("", "workhorse-config-test")
require.NoError(t, err)
defer os.Remove(f.Name())
data := `
[redis]
password = "redis password"
[object_storage]
provider = "test provider"
[image_resizer]
max_scaler_procs = 123
`
_, err = io.WriteString(f, data)
require.NoError(t, err)
require.NoError(t, f.Close())
_, cfg, err := buildConfig("test", []string{"-config", f.Name()})
require.NoError(t, err, "build config")
// These are integration tests: we want to see that each section in the
// config file ends up in the config struct. We do not test all the
// fields in each section; that should happen in the tests of the
// internal/config package.
require.Equal(t, "redis password", cfg.Redis.Password)
require.Equal(t, "test provider", cfg.ObjectStorageCredentials.Provider)
require.Equal(t, uint32(123), cfg.ImageResizerConfig.MaxScalerProcs, "image resizer max_scaler_procs")
}
func TestConfigErrorHelp(t *testing.T) {
for _, f := range []string{"-h", "-help"} {
t.Run(f, func(t *testing.T) {
_, _, err := buildConfig("test", []string{f})
require.Equal(t, alreadyPrintedError{flag.ErrHelp}, err)
})
}
}
func TestConfigError(t *testing.T) {
for _, arg := range []string{"-foobar", "foobar"} {
t.Run(arg, func(t *testing.T) {
_, _, err := buildConfig("test", []string{arg})
require.Error(t, err)
require.IsType(t, alreadyPrintedError{}, err)
})
}
}
func TestConfigDefaults(t *testing.T) {
boot, cfg, err := buildConfig("test", nil)
require.NoError(t, err, "build config")
expectedBoot := &bootConfig{
secretPath: "./.gitlab_workhorse_secret",
listenAddr: "localhost:8181",
listenNetwork: "tcp",
logFormat: "text",
}
require.Equal(t, expectedBoot, boot)
expectedCfg := &config.Config{
Backend: upstream.DefaultBackend,
CableBackend: upstream.DefaultBackend,
Version: "(unknown version)",
DocumentRoot: "public",
ProxyHeadersTimeout: 5 * time.Minute,
APIQueueTimeout: queueing.DefaultTimeout,
APICILongPollingDuration: 50 * time.Nanosecond, // TODO this is meant to be 50*time.Second but it has been wrong for ages
ImageResizerConfig: config.DefaultImageResizerConfig,
}
require.Equal(t, expectedCfg, cfg)
}
func TestConfigFlagParsing(t *testing.T) {
backendURL, err := url.Parse("http://localhost:1234")
require.NoError(t, err)
cableURL, err := url.Parse("http://localhost:5678")
require.NoError(t, err)
args := []string{
"-version",
"-secretPath", "secret path",
"-listenAddr", "listen addr",
"-listenNetwork", "listen network",
"-listenUmask", "123",
"-pprofListenAddr", "pprof listen addr",
"-prometheusListenAddr", "prometheus listen addr",
"-logFile", "log file",
"-logFormat", "log format",
"-documentRoot", "document root",
"-developmentMode",
"-authBackend", backendURL.String(),
"-authSocket", "auth socket",
"-cableBackend", cableURL.String(),
"-cableSocket", "cable socket",
"-proxyHeadersTimeout", "10m",
"-apiLimit", "234",
"-apiQueueLimit", "345",
"-apiQueueDuration", "123s",
"-apiCiLongPollingDuration", "234s",
"-propagateCorrelationID",
}
boot, cfg, err := buildConfig("test", args)
require.NoError(t, err, "build config")
expectedBoot := &bootConfig{
secretPath: "secret path",
listenAddr: "listen addr",
listenNetwork: "listen network",
listenUmask: 123,
pprofListenAddr: "pprof listen addr",
prometheusListenAddr: "prometheus listen addr",
logFile: "log file",
logFormat: "log format",
printVersion: true,
}
require.Equal(t, expectedBoot, boot)
expectedCfg := &config.Config{
DocumentRoot: "document root",
DevelopmentMode: true,
Backend: backendURL,
Socket: "auth socket",
CableBackend: cableURL,
CableSocket: "cable socket",
Version: "(unknown version)",
ProxyHeadersTimeout: 10 * time.Minute,
APILimit: 234,
APIQueueLimit: 345,
APIQueueTimeout: 123 * time.Second,
APICILongPollingDuration: 234 * time.Second,
PropagateCorrelationID: true,
ImageResizerConfig: config.DefaultImageResizerConfig,
}
require.Equal(t, expectedCfg, cfg)
}
# Websocket channel support
In some cases, GitLab can provide in-browser terminal access to an
environment (which is a running server or container, onto which a
project has been deployed), or even access to services running in CI
through a WebSocket. Workhorse manages the WebSocket upgrade and
long-lived connection to the websocket connection, which frees
up GitLab to process other requests.
This document outlines the architecture of these connections.
## Introduction to WebSockets
A websocket is an "upgraded" HTTP/1.1 request. Their purpose is to
permit bidirectional communication between a client and a server.
**Websockets are not HTTP**. Clients can send messages (known as
frames) to the server at any time, and vice-versa. Client messages
are not necessarily requests, and server messages are not necessarily
responses. WebSocket URLs have schemes like `ws://` (unencrypted) or
`wss://` (TLS-secured).
When requesting an upgrade to WebSocket, the browser sends a HTTP/1.1
request that looks like this:
```
GET /path.ws HTTP/1.1
Connection: upgrade
Upgrade: websocket
Sec-WebSocket-Protocol: terminal.gitlab.com
# More headers, including security measures
```
At this point, the connection is still HTTP, so this is a request and
the server can send a normal HTTP response, including `404 Not Found`,
`500 Internal Server Error`, etc.
If the server decides to permit the upgrade, it will send a HTTP
`101 Switching Protocols` response. From this point, the connection
is no longer HTTP. It is a WebSocket and frames, not HTTP requests,
will flow over it. The connection will persist until the client or
server closes the connection.
In addition to the subprotocol, individual websocket frames may
also specify a message type - examples include `BinaryMessage`,
`TextMessage`, `Ping`, `Pong` or `Close`. Only binary frames can
contain arbitrary data - other frames are expected to be valid
UTF-8 strings, in addition to any subprotocol expectations.
## Browser to Workhorse
Using the terminal as an example, GitLab serves a JavaScript terminal
emulator to the browser on a URL like
`https://gitlab.com/group/project/-/environments/1/terminal`.
This opens a websocket connection to, e.g.,
`wss://gitlab.com/group/project/-/environments/1/terminal.ws`,
This endpoint doesn't exist in GitLab - only in Workhorse.
When receiving the connection, Workhorse first checks that the
client is authorized to access the requested terminal. It does
this by performing a "preauthentication" request to GitLab.
If the client has the appropriate permissions and the terminal
exists, GitLab responds with a successful response that includes
details of the terminal that the client should be connected to.
Otherwise, it returns an appropriate HTTP error response.
Errors are passed back to the client as HTTP responses, but if
GitLab returns valid terminal details to Workhorse, it will
connect to the specified terminal, upgrade the browser to a
WebSocket, and proxy between the two connections for as long
as the browser's credentials are valid. Workhorse will also
send regular `PingMessage` control frames to the browser, to
keep intervening proxies from terminating the connection
while the browser is present.
The browser must request an upgrade with a specific subprotocol:
### `terminal.gitlab.com`
This subprotocol considers `TextMessage` frames to be invalid.
Control frames, such as `PingMessage` or `CloseMessage`, have
their usual meanings.
`BinaryMessage` frames sent from the browser to the server are
arbitrary text input.
`BinaryMessage` frames sent from the server to the browser are
arbitrary text output.
These frames are expected to contain ANSI text control codes
and may be in any encoding.
### `base64.terminal.gitlab.com`
This subprotocol considers `BinaryMessage` frames to be invalid.
Control frames, such as `PingMessage` or `CloseMessage`, have
their usual meanings.
`TextMessage` frames sent from the browser to the server are
base64-encoded arbitrary text input (so the server must
base64-decode them before inputting them).
`TextMessage` frames sent from the server to the browser are
base64-encoded arbitrary text output (so the browser must
base64-decode them before outputting them).
In their base64-encoded form, these frames are expected to
contain ANSI terminal control codes, and may be in any encoding.
## Workhorse to GitLab
Using again the terminal as an example, before upgrading the browser,
Workhorse sends a normal HTTP request to GitLab on a URL like
`https://gitlab.com/group/project/environments/1/terminal.ws/authorize`.
This returns a JSON response containing details of where the
terminal can be found, and how to connect it. In particular,
the following details are returned in case of success:
* WebSocket URL to **connect** to, e.g.: `wss://example.com/terminals/1.ws?tty=1`
* WebSocket subprotocols to support, e.g.: `["channel.k8s.io"]`
* Headers to send, e.g.: `Authorization: Token xxyyz..`
* Certificate authority to verify `wss` connections with (optional)
Workhorse periodically re-checks this endpoint, and if it gets an
error response, or the details of the terminal change, it will
terminate the websocket session.
## Workhorse to the WebSocket server
In GitLab, environments or CI jobs may have a deployment service (e.g.,
`KubernetesService`) associated with them. This service knows
where the terminals or the service for an environment may be found, and these
details are returned to Workhorse by GitLab.
These URLs are *also* WebSocket URLs, and GitLab tells Workhorse
which subprotocols to speak over the connection, along with any
authentication details required by the remote end.
Before upgrading the browser's connection to a websocket,
Workhorse opens a HTTP client connection, according to the
details given to it by Workhorse, and attempts to upgrade
that connection to a websocket. If it fails, an error
response is sent to the browser; otherwise, the browser is
also upgraded.
Workhorse now has two websocket connections, albeit with
differing subprotocols. It decodes incoming frames from the
browser, re-encodes them to the the channel's subprotocol, and
sends them to the channel. Similarly, it decodes incoming
frames from the channel, re-encodes them to the browser's
subprotocol, and sends them to the browser.
When either connection closes or enters an error state,
Workhorse detects the error and closes the other connection,
terminating the channel session. If the browser is the
connection that has disconnected, Workhorse will send an ANSI
`End of Transmission` control code (the `0x04` byte) to the
channel, encoded according to the appropriate subprotocol.
Workhorse will automatically reply to any websocket ping frame
sent by the channel, to avoid being disconnected.
Currently, Workhorse only supports the following subprotocols.
Supporting new deployment services will require new subprotocols
to be supported:
### `channel.k8s.io`
Used by Kubernetes, this subprotocol defines a simple multiplexed
channel.
Control frames have their usual meanings. `TextMessage` frames are
invalid. `BinaryMessage` frames represent I/O to a specific file
descriptor.
The first byte of each `BinaryMessage` frame represents the file
descriptor (fd) number, as a `uint8` (so the value `0x00` corresponds
to fd 0, `STDIN`, while `0x01` corresponds to fd 1, `STDOUT`).
The remaining bytes represent arbitrary data. For frames received
from the server, they are bytes that have been received from that
fd. For frames sent to the server, they are bytes that should be
written to that fd.
### `base64.channel.k8s.io`
Also used by Kubernetes, this subprotocol defines a similar multiplexed
channel to `channel.k8s.io`. The main differences are:
* `TextMessage` frames are valid, rather than `BinaryMessage` frames.
* The first byte of each `TextMessage` frame represents the file
descriptor as a numeric UTF-8 character, so the character `U+0030`,
or "0", is fd 0, STDIN).
* The remaining bytes represent base64-encoded arbitrary data.
# Features that rely on Workhorse
Workhorse itself is not a feature, but there are several features in
GitLab that would not work efficiently without Workhorse.
To put the efficiency benefit in context, consider that in 2020Q3 on
GitLab.com [we see][thanos] Rails application threads using on average
about 200MB of RSS vs about 200KB for Workhorse goroutines.
Examples of features that rely on Workhorse:
## 1. `git clone` and `git push` over HTTP
Git clone, pull and push are slow because they transfer large amounts
of data and because each is CPU intensive on the GitLab side. Without
Workhorse, HTTP access to Git repositories would compete with regular
web access to the application, requiring us to run way more Rails
application servers.
## 2. CI runner long polling
GitLab CI runners fetch new CI jobs by polling the GitLab server.
Workhorse acts as a kind of "waiting room" where CI runners can sit
and wait for new CI jobs. Because of Go's efficiency we can fit a lot
of runners in the waiting room at little cost. Without this waiting
room mechanism we would have to add a lot more Rails server capacity.
## 3. File uploads and downloads
File uploads and downloads may be slow either because the file is
large or because the user's connection is slow. Workhorse can handle
the slow part for Rails. This improves the efficiency of features such
as CI artifacts, package repositories, LFS objects, etc.
## 4. Websocket proxying
Features such as the web terminal require a long lived connection
between the user's web browser and a container inside GitLab that is
not directly accessible from the internet. Dedicating a Rails
application thread to proxying such a connection would cost much more
memory than it costs to have Workhorse look after it.
## Quick facts (how does Workhorse work)
- Workhorse can handle some requests without involving Rails at all:
for example, JavaScript files and CSS files are served straight
from disk.
- Workhorse can modify responses sent by Rails: for example if you use
`send_file` in Rails then GitLab Workhorse will open the file on
disk and send its contents as the response body to the client.
- Workhorse can take over requests after asking permission from Rails.
Example: handling `git clone`.
- Workhorse can modify requests before passing them to Rails. Example:
when handling a Git LFS upload Workhorse first asks permission from
Rails, then it stores the request body in a tempfile, then it sends
a modified request containing the tempfile path to Rails.
- Workhorse can manage long-lived WebSocket connections for Rails.
Example: handling the terminal websocket for environments.
- Workhorse does not connect to PostgreSQL, only to Rails and (optionally) Redis.
- We assume that all requests that reach Workhorse pass through an
upstream proxy such as NGINX or Apache first.
- Workhorse does not accept HTTPS connections.
- Workhorse does not clean up idle client connections.
- We assume that all requests to Rails pass through Workhorse.
For more information see ['A brief history of GitLab Workhorse'][brief-history-blog].
[thanos]: https://thanos-query.ops.gitlab.net/graph?g0.range_input=1h&g0.max_source_resolution=0s&g0.expr=sum(ruby_process_resident_memory_bytes%7Bapp%3D%22webservice%22%2Cenv%3D%22gprd%22%2Crelease%3D%22gitlab%22%7D)%20%2F%20sum(puma_max_threads%7Bapp%3D%22webservice%22%2Cenv%3D%22gprd%22%2Crelease%3D%22gitlab%22%7D)&g0.tab=1&g1.range_input=1h&g1.max_source_resolution=0s&g1.expr=sum(go_memstats_sys_bytes%7Bapp%3D%22webservice%22%2Cenv%3D%22gprd%22%2Crelease%3D%22gitlab%22%7D)%2Fsum(go_goroutines%7Bapp%3D%22webservice%22%2Cenv%3D%22gprd%22%2Crelease%3D%22gitlab%22%7D)&g1.tab=1
[brief-history-blog]: https://about.gitlab.com/2016/04/12/a-brief-history-of-gitlab-workhorse/
This file was moved to [`architecture/channel.md`](doc/architecture/channel.md).
## Adding new features to Workhorse
GitLab Workhorse is a smart reverse proxy for GitLab. It handles
"long" HTTP requests such as file downloads, file uploads, Git
push/pull and Git archive downloads.
Workhorse itself is not a feature, but there are [several features in GitLab](https://gitlab.com/gitlab-org/gitlab-workhorse/-/blob/master/doc/architecture/gitlab_features.md) that would not work efficiently without Workhorse.
At a first glance, it may look like Workhorse is just a pipeline for processing HTTP streams so that you can reduce the amount of logic in your Ruby on Rails controller, but there are good reasons to avoid treating it like that.
Engineers embarking on the quest of offloading a feature to Workhorse often find that the endeavor is much higher than what originally anticipated. In part because of the new programming language (only a few engineers at GitLab are Go developers), in part because of the demanding requirements for Workhorse. Workhorse is stateless, memory and disk usage must be kept under tight control, and the request should not be slowed down in the process.
## Can I add a new feature to Workhorse?
We suggest to follow this route only if absolutely necessary and no other options are available.
Splitting a feature between the Rails code-base and Workhorse is deliberately choosing to introduce technical debt. It adds complexity to the system and coupling between the two components.
* Building features using Workhorse has a considerable complexity cost, so you should prefer designs based on Rails requests and Sidekiq jobs.
* Even when using Rails+Sidekiq is "more work" than using Rails+Workhorse, Rails+Sidekiq is easier to maintain in the long term because Workhorse is unique to GitLab while Rails+Sidekiq is an industry standard.
* For "global" behaviors around web requests consider using a Rack middleware instead of Workhorse.
* Generally speaking, we should only use Rails+Workhorse if the HTTP client expects behavior that is not reasonable to implement in Rails, like "long" requests.
## What is a "long" request?
There is one order of magnitude between Workhorse and puma RAM usage. Having connection open for a period longer than milliseconds is a problem because of the amount of RAM it monopolizes once it reaches the Ruby on Rails controller.
So far we identified two classes of "long" requests: data transfers and HTTP long polling.
`git push`, `git pull`, uploading or downloading an artifact, the CI runner waiting for a new job are all good examples of long requests.
With the rise of cloud-native installations, Workhorse's feature-set was extended to add object storage direct-upload, to get rid of the shared Network File System (NFS) drives.
In 2020 @nolith presented at FOSDEM a talk titled [_Speed up the monolith. Building a smart reverse proxy in Go_](https://archive.fosdem.org/2020/schedule/event/speedupmonolith/).
You can watch the recording for more details on the history of Workhorse and the NFS removal.
[Uploads development documentation]( https://docs.gitlab.com/ee/development/uploads.html)
contains the most common use-cases for adding a new type of upload and may answer all of your questions.
If you still think we should add a new feature to Workhorse, please open an issue explaining **what you want to implement** and **why it can't be implemented in our ruby code-base**. Workhorse maintainers will be happy to help you assessing the situation.
# Testing your code
Run the tests with:
```
make clean test
```
## Coverage / what to test
Each feature in GitLab Workhorse should have an integration test that
verifies that the feature 'kicks in' on the right requests and leaves
other requests unaffected. It is better to also have package-level tests
for specific behavior but the high-level integration tests should have
the first priority during development.
It is OK if a feature is only covered by integration tests.
# Workhorse configuration
For historical reasons Workhorse uses both command line flags, a configuration file and environment variables.
All new configuration options that get added to Workhorse should go into the configuration file.
## CLI options
```
gitlab-workhorse [OPTIONS]
Options:
-apiCiLongPollingDuration duration
Long polling duration for job requesting for runners (default 50s - enabled) (default 50ns)
-apiLimit uint
Number of API requests allowed at single time
-apiQueueDuration duration
Maximum queueing duration of requests (default 30s)
-apiQueueLimit uint
Number of API requests allowed to be queued
-authBackend string
Authentication/authorization backend (default "http://localhost:8080")
-authSocket string
Optional: Unix domain socket to dial authBackend at
-cableBackend string
Optional: ActionCable backend (default authBackend)
-cableSocket string
Optional: Unix domain socket to dial cableBackend at (default authSocket)
-config string
TOML file to load config from
-developmentMode
Allow the assets to be served from Rails app
-documentRoot string
Path to static files content (default "public")
-listenAddr string
Listen address for HTTP server (default "localhost:8181")
-listenNetwork string
Listen 'network' (tcp, tcp4, tcp6, unix) (default "tcp")
-listenUmask int
Umask for Unix socket
-logFile string
Log file location
-logFormat string
Log format to use defaults to text (text, json, structured, none) (default "text")
-pprofListenAddr string
pprof listening address, e.g. 'localhost:6060'
-prometheusListenAddr string
Prometheus listening address, e.g. 'localhost:9229'
-proxyHeadersTimeout duration
How long to wait for response headers when proxying the request (default 5m0s)
-secretPath string
File with secret key to authenticate with authBackend (default "./.gitlab_workhorse_secret")
-version
Print version and exit
```
The 'auth backend' refers to the GitLab Rails application. The name is
a holdover from when GitLab Workhorse only handled Git push/pull over
HTTP.
GitLab Workhorse can listen on either a TCP or a Unix domain socket. It
can also open a second listening TCP listening socket with the Go
[net/http/pprof profiler server](http://golang.org/pkg/net/http/pprof/).
GitLab Workhorse can listen on redis events (currently only builds/register
for runners). This requires you to pass a valid TOML config file via
`-config` flag.
For regular setups it only requires the following (replacing the string
with the actual socket)
## Redis
GitLab Workhorse integrates with Redis to do long polling for CI build
requests. This is configured via two things:
- Redis settings in the TOML config file
- The `-apiCiLongPollingDuration` command line flag to control polling
behavior for CI build requests
It is OK to enable Redis in the config file but to leave CI polling
disabled; this just results in an idle Redis pubsub connection. The
opposite is not possible: CI long polling requires a correct Redis
configuration.
Below we discuss the options for the `[redis]` section in the config
file.
```
[redis]
URL = "unix:///var/run/gitlab/redis.sock"
Password = "my_awesome_password"
Sentinel = [ "tcp://sentinel1:23456", "tcp://sentinel2:23456" ]
SentinelMaster = "mymaster"
```
- `URL` takes a string in the format `unix://path/to/redis.sock` or
`tcp://host:port`.
- `Password` is only required if your redis instance is password-protected
- `Sentinel` is used if you are using Sentinel.
*NOTE* that if both `Sentinel` and `URL` are given, only `Sentinel` will be used
Optional fields are as follows:
```
[redis]
DB = 0
ReadTimeout = "1s"
KeepAlivePeriod = "5m"
MaxIdle = 1
MaxActive = 1
```
- `DB` is the Database to connect to. Defaults to `0`
- `ReadTimeout` is how long a redis read-command can take. Defaults to `1s`
- `KeepAlivePeriod` is how long the redis connection is to be kept alive without anything flowing through it. Defaults to `5m`
- `MaxIdle` is how many idle connections can be in the redis-pool at once. Defaults to 1
- `MaxActive` is how many connections the pool can keep. Defaults to 1
## Relative URL support
If you are mounting GitLab at a relative URL, e.g.
`example.com/gitlab`, then you should also use this relative URL in
the `authBackend` setting:
```
gitlab-workhorse -authBackend http://localhost:8080/gitlab
```
## Interaction of authBackend and authSocket
The interaction between `authBackend` and `authSocket` can be a bit
confusing. It comes down to: if `authSocket` is set it overrides the
_host_ part of `authBackend` but not the relative path.
In table form:
|authBackend|authSocket|Workhorse connects to?|Rails relative URL|
|---|---|---|---|
|unset|unset|`localhost:8080`|`/`|
|`http://localhost:3000`|unset|`localhost:3000`|`/`|
|`http://localhost:3000/gitlab`|unset|`localhost:3000`|`/gitlab`|
|unset|`/path/to/socket`|`/path/to/socket`|`/`|
|`http://localhost:3000`|`/path/to/socket`|`/path/to/socket`|`/`|
|`http://localhost:3000/gitlab`|`/path/to/socket`|`/path/to/socket`|`/gitlab`|
The same applies to `cableBackend` and `cableSocket`.
## Error tracking
GitLab-Workhorse supports remote error tracking with
[Sentry](https://sentry.io). To enable this feature set the
`GITLAB_WORKHORSE_SENTRY_DSN` environment variable.
You can also set the `GITLAB_WORKHORSE_SENTRY_ENVIRONMENT` environment variable to
use the Sentry environment functionality to separate staging, production and
development.
Omnibus (`/etc/gitlab/gitlab.rb`):
```
gitlab_workhorse['env'] = {
'GITLAB_WORKHORSE_SENTRY_DSN' => 'https://foobar'
'GITLAB_WORKHORSE_SENTRY_ENVIRONMENT' => 'production'
}
```
Source installations (`/etc/default/gitlab`):
```
export GITLAB_WORKHORSE_SENTRY_DSN='https://foobar'
export GITLAB_WORKHORSE_SENTRY_ENVIRONMENT='production'
```
## Distributed Tracing
Workhorse supports distributed tracing through [LabKit][] using [OpenTracing APIs](https://opentracing.io).
By default, no tracing implementation is linked into the binary, but different OpenTracing providers can be linked in using [build tags][build-tags]/[build constraints][build-tags]. This can be done by setting the `BUILD_TAGS` make variable.
For more details of the supported providers, see LabKit, but as an example, for Jaeger tracing support, include the tags: `BUILD_TAGS="tracer_static tracer_static_jaeger"`.
```shell
make BUILD_TAGS="tracer_static tracer_static_jaeger"
```
Once Workhorse is compiled with an opentracing provider, the tracing configuration is configured via the `GITLAB_TRACING` environment variable.
For example:
```shell
GITLAB_TRACING=opentracing://jaeger ./gitlab-workhorse
```
## Continuous Profiling
Workhorse supports continuous profiling through [LabKit][] using [Stackdriver Profiler](https://cloud.google.com/profiler).
By default, the Stackdriver Profiler implementation is linked in the binary using [build tags][build-tags], though it's not
required and can be skipped.
For example:
```shell
make BUILD_TAGS=""
```
Once Workhorse is compiled with Continuous Profiling, the profiler configuration can be set via `GITLAB_CONTINUOUS_PROFILING`
environment variable.
For example:
```shell
GITLAB_CONTINUOUS_PROFILING="stackdriver?service=workhorse&service_version=1.0.1&project_id=test-123 ./gitlab-workhorse"
```
More information about see the [LabKit monitoring docs](https://gitlab.com/gitlab-org/labkit/-/blob/master/monitoring/doc.go).
[LabKit]: https://gitlab.com/gitlab-org/labkit/
[build-tags]: https://golang.org/pkg/go/build/#hdr-Build_Constraints
# Installation
To install GitLab Workhorse you need [Go 1.13 or
newer](https://golang.org/dl) and [GNU
Make](https://www.gnu.org/software/make/).
To install into `/usr/local/bin` run `make install`.
```
make install
```
To install into `/foo/bin` set the PREFIX variable.
```
make install PREFIX=/foo
```
On some operating systems, such as FreeBSD, you may have to use
`gmake` instead of `make`.
*NOTE*: Some features depends on build tags, make sure to check
[Workhorse configuration](doc/operations/configuration.md) to enable them.
## Run time dependencies
### Exiftool
Workhorse uses [exiftool](https://www.sno.phy.queensu.ca/~phil/exiftool/) for
removing EXIF data (which may contain sensitive information) from uploaded
images. If you installed GitLab:
- Using the Omnibus package, you're all set.
*NOTE* that if you are using CentOS Minimal, you may need to install `perl`
package: `yum install perl`
- From source, make sure `exiftool` is installed:
```sh
# Debian/Ubuntu
sudo apt-get install libimage-exiftool-perl
# RHEL/CentOS
sudo yum install perl-Image-ExifTool
```
This diff is collapsed.
This diff is collapsed.
module gitlab.com/gitlab-org/gitlab-workhorse
go 1.13
require (
github.com/Azure/azure-storage-blob-go v0.10.0
github.com/BurntSushi/toml v0.3.1
github.com/FZambia/sentinel v1.0.0
github.com/alecthomas/chroma v0.7.3
github.com/aws/aws-sdk-go v1.31.13
github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054 // indirect
github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/disintegration/imaging v1.6.2
github.com/getsentry/raven-go v0.2.0
github.com/golang/gddo v0.0.0-20190419222130-af0f2af80721
github.com/golang/protobuf v1.4.2
github.com/gomodule/redigo v2.0.0+incompatible
github.com/gorilla/websocket v1.4.0
github.com/grpc-ecosystem/go-grpc-middleware v1.0.0
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0
github.com/johannesboyne/gofakes3 v0.0.0-20200510090907-02d71f533bec
github.com/jpillora/backoff v0.0.0-20170918002102-8eab2debe79d
github.com/mitchellh/copystructure v1.0.0
github.com/prometheus/client_golang v1.0.0
github.com/rafaeljusto/redigomock v0.0.0-20190202135759-257e089e14a1
github.com/sebest/xff v0.0.0-20160910043805-6c115e0ffa35
github.com/shabbyrobe/gocovmerge v0.0.0-20190829150210-3e036491d500 // indirect
github.com/sirupsen/logrus v1.7.0
github.com/smartystreets/goconvey v1.6.4
github.com/stretchr/testify v1.6.1
gitlab.com/gitlab-org/gitaly v1.74.0
gitlab.com/gitlab-org/labkit v1.0.0
gocloud.dev v0.20.0
golang.org/x/lint v0.0.0-20200302205851-738671d3881b
golang.org/x/net v0.0.0-20200602114024-627f9648deb9
golang.org/x/tools v0.0.0-20200608174601-1b747fd94509
google.golang.org/grpc v1.29.1
honnef.co/go/tools v0.0.1-2020.1.5
)
// go get tries to enforce semantic version compatibility via module paths.
// We can't upgrade to Gitaly v13.x.x from v1.x.x without using a manual override.
// See https://gitlab.com/gitlab-org/gitaly/-/issues/3177 for more details.
replace gitlab.com/gitlab-org/gitaly => gitlab.com/gitlab-org/gitaly v1.87.1-0.20201001041716-3f5e218def93
This diff is collapsed.
This diff is collapsed.
package api
import (
"fmt"
"net/http"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/helper"
)
// Prevent internal API responses intended for gitlab-workhorse from
// leaking to the end user
func Block(h http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
rw := &blocker{rw: w, r: r}
defer rw.flush()
h.ServeHTTP(rw, r)
})
}
type blocker struct {
rw http.ResponseWriter
r *http.Request
hijacked bool
status int
}
func (b *blocker) Header() http.Header {
return b.rw.Header()
}
func (b *blocker) Write(data []byte) (int, error) {
if b.status == 0 {
b.WriteHeader(http.StatusOK)
}
if b.hijacked {
return len(data), nil
}
return b.rw.Write(data)
}
func (b *blocker) WriteHeader(status int) {
if b.status != 0 {
return
}
if helper.IsContentType(ResponseContentType, b.Header().Get("Content-Type")) {
b.status = 500
b.Header().Del("Content-Length")
b.hijacked = true
helper.Fail500(b.rw, b.r, fmt.Errorf("api.blocker: forbidden content-type: %q", ResponseContentType))
return
}
b.status = status
b.rw.WriteHeader(b.status)
}
func (b *blocker) flush() {
b.WriteHeader(http.StatusOK)
}
package api
import (
"io/ioutil"
"net/http"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/require"
)
func TestBlocker(t *testing.T) {
upstreamResponse := "hello world"
testCases := []struct {
desc string
contentType string
out string
}{
{
desc: "blocked",
contentType: ResponseContentType,
out: "Internal server error\n",
},
{
desc: "pass",
contentType: "text/plain",
out: upstreamResponse,
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
r, err := http.NewRequest("GET", "/foo", nil)
require.NoError(t, err)
rw := httptest.NewRecorder()
bl := &blocker{rw: rw, r: r}
bl.Header().Set("Content-Type", tc.contentType)
upstreamBody := []byte(upstreamResponse)
n, err := bl.Write(upstreamBody)
require.NoError(t, err)
require.Equal(t, len(upstreamBody), n, "bytes written")
rw.Flush()
body := rw.Result().Body
data, err := ioutil.ReadAll(body)
require.NoError(t, err)
require.NoError(t, body.Close())
require.Equal(t, tc.out, string(data))
})
}
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
package artifacts
import (
"os"
"testing"
"gitlab.com/gitlab-org/labkit/log"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/testhelper"
)
func TestMain(m *testing.M) {
if err := testhelper.BuildExecutables(); err != nil {
log.WithError(err).Fatal()
}
os.Exit(m.Run())
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
package artifacts
import "strings"
// taken from mime/multipart/writer.go
var quoteEscaper = strings.NewReplacer("\\", "\\\\", `"`, "\\\"")
func escapeQuotes(s string) string {
return quoteEscaper.Replace(s)
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
package git
// For cosmetic purposes in Sentry
type copyError struct{ error }
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment