From e117f61d4bdbf033c2ef2d7b9542feaffe40a2e3 Mon Sep 17 00:00:00 2001 From: Mustafa Bayar Date: Wed, 15 Oct 2025 11:44:05 +0300 Subject: [PATCH 1/4] test --- .../experiments/mb-tran/config.yml | 164 +++++++++ .../experiments/mb-tran/k6-benchmark.js | 300 ++++++++++++++++ .../benchmarking/experiments/mb-tran/plot.py | 337 ++++++++++++++++++ .../experiments/mb-tran/profile-gitaly.sh | 103 ++++++ .../roles/benchmark/vars/main.yml | 4 +- 5 files changed, 906 insertions(+), 2 deletions(-) create mode 100644 _support/benchmarking/experiments/mb-tran/config.yml create mode 100644 _support/benchmarking/experiments/mb-tran/k6-benchmark.js create mode 100644 _support/benchmarking/experiments/mb-tran/plot.py create mode 100755 _support/benchmarking/experiments/mb-tran/profile-gitaly.sh diff --git a/_support/benchmarking/experiments/mb-tran/config.yml b/_support/benchmarking/experiments/mb-tran/config.yml new file mode 100644 index 00000000000..4ee1b8700c9 --- /dev/null +++ b/_support/benchmarking/experiments/mb-tran/config.yml @@ -0,0 +1,164 @@ +--- +project: "gitaly-benchmark-0150d6cf" +benchmark_region: "us-central1" +benchmark_zone: "us-central1-a" + +# Enable to use regional persistent disk https://cloud.google.com/compute/docs/disks/regional-persistent-disk +# Regional PD is supported on only E2, N1, N2, and N2D machine type VMs. +use_regional_disk: false +# One of these zones must be the zone for benchmark_zone +regional_disk_replica_zones: ["us-central1-a", "us-central1-b"] + +# The image to use for the client node and all Gitaly nodes. +os_image: "ubuntu-os-cloud/ubuntu-2204-lts" + +# Configuration parameters for the sole client instance. +client: + machine_type: "n2d-standard-4" + boot_disk_size: 20 + boot_disk_type: "pd-balanced" + # The client clones Gitaly in order to consume the protobuf definitions. + # This revision should be set to the latest of the gitaly_revision values + # specified for the gitaly_instances below. + gitaly_revision: HEAD + +# Configuration parameters for a collection of N Gitaly nodes. Benchmarks will execute for each Gitaly +# node. +gitaly_instances: + # Try to use a short name, otherwise we'll exceed the GCP resource name length of 63 characters + - name: "test" + machine_type: "n2d-standard-16" + boot_disk_type: "pd-balanced" + boot_disk_size: 20 + gitaly_revision: HEAD + disk_size: 100 + disk_type: "pd-balanced" + # ==== Standard Filesystem settings ==== + # Btrfs setup + # filesystem: "btrfs" # Options: ext4, xfs, btrfs + # fs_mount_opts: "noatime,compress=zstd,space_cache=v2,ssd,discard=async" + # fs_format_opts: "-f -n 16k" + # XFS setup + # filesystem: "xfs" # Options: ext4, xfs, btrfs + # fs_mount_opts: "defaults,discard" + # fs_format_opts: "-f" + # ext4 setup + filesystem: "ext4" # Options: ext4, xfs, btrfs + fs_mount_opts: "defaults,discard" + fs_format_opts: "" + + # These are manually templated and don't translate directly to config.toml entries. + config: + # Whether transactions should be enabled. + transactions: true + environment: + # Arbitrary environment variables. Note that SNAPSHOT_DRIVER doesn't actually do anything; it + # just serves as an example. + SNAPSHOT_DRIVER: deepclone + - name: "base" + machine_type: "n2d-standard-16" + boot_disk_type: "pd-balanced" + boot_disk_size: 20 + gitaly_revision: HEAD + disk_size: 100 + disk_type: "pd-balanced" + # ==== Standard Filesystem settings ==== + # Btrfs setup + # filesystem: "btrfs" # Options: ext4, xfs, btrfs + # fs_mount_opts: "noatime,compress=zstd,space_cache=v2,ssd,discard=async" + # fs_format_opts: "-f -n 16k" + # XFS setup + # filesystem: "xfs" # Options: ext4, xfs, btrfs + # fs_mount_opts: "defaults,discard" + # fs_format_opts: "-f" + # ext4 setup + filesystem: "ext4" # Options: ext4, xfs, btrfs + fs_mount_opts: "defaults,discard" + fs_format_opts: "" + + # These are manually templated and don't translate directly to config.toml entries. + config: + # Whether transactions should be enabled. + transactions: false + environment: + # Arbitrary environment variables. Note that SNAPSHOT_DRIVER doesn't actually do anything; it + # just serves as an example. + SNAPSHOT_DRIVER: deepclone + +# A list of repositories to be cloned onto the repositories disk, along with test inputs to be used +# for RPC calls. This section of the configuration is re-serialised into JSON and provided to the +# K6 script. +# +# NOTE: you may wish to delete some of these entries, otherwise benchmarking setup may take a while +# to clone each repo. +repositories: + - name: git + # Whether this repository should be tested. This toggle is read by the K6 script. + include_in_test: true + # Which reference backend to use. This defines how the repository will be cloned. + reference_backend: files + remote: "https://gitlab.com/gitlab-org/git.git" + revision: "2462961280690837670d997bde64bd4ebf8ae66d" + # Test data to be used as RPC inputs. The K6 script will randomly choose out of these available + # inputs. + testdata: + commits: + - "fea9d18c534a445ef6e488d8ee711fa92fa0e6bd" + - "0a15bb634cf005a0266ee1108ac31aa75649a61c" + - "217e4a23d76fe95a0f6ab0f6159de2460db6fcd9" + refs: + - "refs/heads/master" + - "refs/heads/next" + - "refs/tags/v2.50.0" + files: + - "README.md" + - "t/lib-diff.sh" + - "packfile.c" + directories: + - "t" + - "Documentation" + - "git-gui" + - name: gitlab + include_in_test: true + reference_backend: files + remote: "https://gitlab.com/gitlab-org/gitlab.git" + revision: "8f3978675aa4df643cff5a01a8e1896ae754685a" + testdata: + commits: + - "875ffb690e25eb8c98797b5641c6c16c71454b73" + - "3074e43761003e2566ea604053fe4988774d2896" + - "9ace97496c56335c5739c226853b468afd962830" + refs: + - "refs/heads/master" + - "refs/tags/v18.1.5-ee" + - "refs/tags/v17.11.6-ee" + files: + - "README.md" + - "lib/gitaly/server.rb" + - "ee/app/graphql/resolvers/epics_resolver.rb" + directories: + - "app" + - "rubocop" + - "qa" + - name: gitaly + include_in_test: true + reference_backend: files + remote: "https://gitlab.com/gitlab-org/gitaly.git" + revision: "4d78df8cb5c6b3abfef5530830dba6c67d9d4c53" + testdata: + commits: + - "765d81272feb53bcc0c50199183b4514c5ef7a73" + - "25965387d1a0a91d226912649180f38c04d89a36" + - "552d12d94dd24ad8dff93856e77a08b6a96f1d3e" + refs: + - "refs/heads/master" + - "refs/tags/v18.1.5" + - "refs/tags/v17.11.7" + files: + - "README.md" + - "internal/gitaly/rangediff/range_diff_test.go" + - "proto/go/gitalypb/blob.pb.go" + directories: + - "internal" + - "proto" + - "internal/gitaly/service/raft" diff --git a/_support/benchmarking/experiments/mb-tran/k6-benchmark.js b/_support/benchmarking/experiments/mb-tran/k6-benchmark.js new file mode 100644 index 00000000000..264d4ffde33 --- /dev/null +++ b/_support/benchmarking/experiments/mb-tran/k6-benchmark.js @@ -0,0 +1,300 @@ +import { Client, Stream, StatusOK } from 'k6/net/grpc' +import encoding from 'k6/encoding' +import { check } from 'k6' +import exec from 'k6/x/exec' + +// Consume the environment variables we set in the Ansible task. +const gitalyAddress = __ENV.GITALY_ADDRESS +const gitalyProtoDir = __ENV.GITALY_PROTO_DIR +const runName = __ENV.RUN_NAME +const workloadDuration = __ENV.WORKLOAD_DURATION + + +// optionsStatic returns a test scenario where constant load is offered to Gitaly +const optionsStatic = () => { + const SCENARIO_DEFAULTS = { + executor: 'constant-arrival-rate', + duration: workloadDuration, + timeUnit: '1s', + gracefulStop: '0s', + preAllocatedVUs: 40 + } + + return { + scenarios: { + findCommit: { ...SCENARIO_DEFAULTS, rate: 200, exec: 'findCommit' }, + getBlobs: { ...SCENARIO_DEFAULTS, rate: 200, exec: 'getBlobs' }, + getTreeEntries: { ...SCENARIO_DEFAULTS, rate: 200, exec: 'getTreeEntries' }, + treeEntry: { ...SCENARIO_DEFAULTS, rate: 100, exec: 'treeEntry' }, + listCommitsByOid: { ...SCENARIO_DEFAULTS, rate: 200, exec: 'listCommitsByOid' } + // writeAndDeleteRefs: { ...SCENARIO_DEFAULTS, rate: 100, exec: 'writeAndDeleteRefs' } + }, + setupTimeout: '5m' + } +} + +// optionsRamping returns a test scenario where a ramping workload is offered to Gitaly +const optionsRamping = () => { + const SCENARIO_DEFAULTS = { + executor: 'ramping-arrival-rate', + timeUnit: '1s', + preAllocatedVUs: 40 + } + + const stages_read = [{target: 50, duration: '100s'}, {target: 100, duration: '50s'}, {target: 200, duration: '100s'}, {target: 50, duration: '50s'}] + // const stages_write = [{target: 25, duration: '100s'}, {target: 50, duration: '50s'}, {target: 100, duration: '100s'}, {target: 25, duration: '50s'}] + + return { + scenarios: { + findCommit: { + ...SCENARIO_DEFAULTS, + stages: stages_read, + exec: 'findCommit' + }, + getBlobs: { + ...SCENARIO_DEFAULTS, + stages: stages_read, + exec: 'getBlobs' + }, + getTreeEntries: { + ...SCENARIO_DEFAULTS, + stages: stages_read, + exec: 'getTreeEntries' + }, + treeEntry: { + ...SCENARIO_DEFAULTS, + stages: stages_read, + exec: 'treeEntry' + }, + listCommitsByOid: { + ...SCENARIO_DEFAULTS, + stages: stages_read, + exec: 'listCommitsByOid' + } + // writeAndDeleteRefs: { + // ...SCENARIO_DEFAULTS, + // stages: stages_write, + // exec: 'writeAndDeleteRefs' + // } + }, + setupTimeout: '5m' + } + +} + +export const options = optionsRamping() + +const repos = JSON.parse(open("/opt/benchmark-gitaly/repositories.json")); + +const selectTestRepo = () => { + const active = repos.filter(r => r.include_in_test); + const repo = active[Math.floor(Math.random() * active.length)]; + + return { + repository: { + storageName: 'default', + relativePath: `${repo.name}`, + glRepository: repo.name, // irrelevant but mandatory + glProjectPath: `foo/bar/${repo.name}`, // irrelevant but mandatory + }, + commit: repo.testdata.commits[Math.floor(Math.random() * repo.testdata.commits.length)], + ref: repo.testdata.refs[Math.floor(Math.random() * repo.testdata.refs.length)], + file: repo.testdata.files[Math.floor(Math.random() * repo.testdata.files.length)], + directory: repo.testdata.directories[Math.floor(Math.random() * repo.testdata.directories.length)], + } +} + +const generateRandom = () => Math.random().toString(36).substring(2, 15) + Math.random().toString(23).substring(2, 5) + +export function setup () { + const setupCompletionSentinel = `/tmp/${runName}-setup-complete` + // Signal to Ansible that setup is complete, in a very hacky way. + exec.command('touch', [setupCompletionSentinel]) + + return { + setupCompletionSentinel + } +} + +export function teardown (context) { + exec.command('rm', [context.setupCompletionSentinel]) +} + +const client = new Client() +// k6 provides no easy way to list directory contents. +client.load([gitalyProtoDir], 'commit.proto', 'blob.proto', 'ref.proto', 'repository.proto') + +export function findCommit () { + client.connect(gitalyAddress, { + plaintext: true + }) + + const testRepo = selectTestRepo(); + const req = { + repository: testRepo.repository, + revision: encoding.b64encode(testRepo.commit) + } + + const res = client.invoke('gitaly.CommitService/FindCommit', req) + check(res, { + 'FindCommit - StatusOK': r => r && r.status === StatusOK + }) + + client.close() +} + +export function getBlobs () { + client.connect(gitalyAddress, { + plaintext: true + }) + + const testRepo = selectTestRepo(); + const req = { + repository: testRepo.repository, + revision_paths: [ + { + revision: testRepo.commit, + path: encoding.b64encode(testRepo.file) + } + ], + limit: -1 + } + + const stream = new Stream(client, 'gitaly.BlobService/GetBlobs') + stream.on('data', data => { + check(data, { + 'GetBlobs - data present in response': r => r && r.data + }) + }) + + stream.on('end', function () { + client.close() + }) + + stream.on('error', function(err) { + console.error(err) + }) + + stream.write(req) +} + +export function getTreeEntries () { + client.connect(gitalyAddress, { + plaintext: true + }) + + const testRepo = selectTestRepo(); + const req = { + repository: testRepo.repository, + revision: encoding.b64encode(testRepo.commit), + path: encoding.b64encode(testRepo.directory) + } + + const stream = new Stream(client, 'gitaly.CommitService/GetTreeEntries') + stream.on('data', data => { + check(data, { + 'GetTreeEntries - entries present in response': r => r && r.entries + }) + }) + + stream.on('end', function () { + client.close() + }) + + stream.on('error', function(err) { + console.error(err) + }) + + stream.write(req) +} + +export function treeEntry () { + client.connect(gitalyAddress, { + plaintext: true + }) + + const testRepo = selectTestRepo(); + const req = { + repository: testRepo.repository, + revision: encoding.b64encode(testRepo.ref), + path: encoding.b64encode(testRepo.file) + } + + const stream = new Stream(client, 'gitaly.CommitService/TreeEntry') + stream.on('data', data => { + check(data, { + 'TreeEntry - data present in response': r => r && r.data + }) + }) + + stream.on('end', function () { + client.close() + }) + + stream.on('error', function(err) { + console.error(err) + }) + + stream.write(req) +} + +export function listCommitsByOid () { + client.connect(gitalyAddress, { + plaintext: true + }) + + const testRepo = selectTestRepo(); + const req = { + repository: testRepo.repository, + oid: [testRepo.commit] + } + + const stream = new Stream(client, 'gitaly.CommitService/ListCommitsByOid') + stream.on('data', data => { + check(data, { + 'ListCommitsByOid - commits present in response': r => r && r.commits + }) + }) + + stream.on('end', function () { + client.close() + }) + + stream.on('error', function(err) { + console.error(err) + }) + + stream.write(req) +} + +export function writeAndDeleteRefs () { + client.connect(gitalyAddress, { + plaintext: true + }) + + const testRepo = selectTestRepo(); + const generatedRef = 'refs/test/' + generateRandom() + + const writeRefReq = { + repository: testRepo.repository, + ref: encoding.b64encode(generatedRef), + revision: encoding.b64encode(testRepo.commit) + } + + const writeRefRes = client.invoke('gitaly.RepositoryService/WriteRef', writeRefReq) + check(writeRefRes, { + 'WriteRef - StatusOK': r => r && r.status === StatusOK + }) + + const deleteRefsReq = { + repository: testRepo.repository, + refs: [encoding.b64encode(generatedRef)] + } + + const deleteRefsRes = client.invoke('gitaly.RefService/DeleteRefs', deleteRefsReq) + check(deleteRefsRes, { + 'DeleteRefs - StatusOK': r => r && r.status === StatusOK + }) + + client.close() +} diff --git a/_support/benchmarking/experiments/mb-tran/plot.py b/_support/benchmarking/experiments/mb-tran/plot.py new file mode 100644 index 00000000000..b5a7377e872 --- /dev/null +++ b/_support/benchmarking/experiments/mb-tran/plot.py @@ -0,0 +1,337 @@ +import pandas as pd +from plotnine import * +import sys +import json + +# Define custom color palette +custom_colors = [ + "#ffd700", + "#fa8775", + "#ffb14e", + "#ea5f94", + "#cd34b5", + "#9d02d7", + "#0000ff", +] + +def load(fname): + # The log file is a newline-separated collection of JSON objects, each of which can + # be nested and needs to be flattened. + df = pd.json_normalize(pd.Series(open(fname).readlines()).apply(json.loads)) + + # The time column is often used for aggregations. + df["time"] = pd.to_datetime(df["time"]) + return df + + +def stats_rpc_count(df, outdir): + df = df[df["grpc.request.glRepository"].str.len() > 0] + df = df[df["grpc.method"].str.len() > 0] + + df = ( + df.groupby(["time_interval", "grpc.request.glRepository", "grpc.method", "grpc.code"]) + .size() + .reset_index(name="request_count") + ) + + with open(f"{outdir}/rpc_count_by_repo.txt", "w") as f: + f.write(df.to_string(index=False)) + + p = ( + ggplot( + df, + aes( + x="time_interval", + y="request_count", + color="grpc.method", + shape="grpc.request.glRepository", + ), + ) + + geom_line() + + scale_x_datetime(date_labels="%H:%M:%S", date_breaks="5 seconds") + + theme_seaborn( + style="darkgrid", context="notebook", font="sans-serif", font_scale=1) + + theme( + axis_text_x=element_text(rotation=45, hjust=1), figure_size=(12, 8), dpi=200 + ) + + labs( + title="gRPC Request Count", + x="Time", + y="Count", + color="Method", + shape="Repository", + ) + + facet_grid("grpc.request.glRepository", "grpc.code") + ) + + p.save(f"{outdir}/rpc_count_by_repo.png") + + +def stats_rpc_latency(df, outdir): + df = df[df["grpc.request.glRepository"].str.len() > 0] + df = df[df["grpc.method"].str.len() > 0] + df = df[df["grpc.time_ms"].notna()] + + df = ( + df.groupby(["time_interval", "grpc.request.glRepository", "grpc.method", "grpc.code"])[ + "grpc.time_ms" + ] + .quantile(0.95) + .reset_index() + ) + with open(f"{outdir}/rpc_latency_by_repo.txt", "w") as f: + f.write(df.to_string(index=False)) + + p = ( + ggplot( + df, + aes( + x="time_interval", + y="grpc.time_ms", + color="grpc.method", + shape="grpc.request.glRepository", + ), + ) + + geom_line() + + scale_x_datetime(date_labels="%H:%M:%S", date_breaks="5 seconds") + + scale_y_continuous(limits=(0, 12000)) + + theme_seaborn( + style="darkgrid", context="notebook", font="sans-serif", font_scale=1) + + theme( + axis_text_x=element_text(rotation=45, hjust=1), figure_size=(12, 16), dpi=200 + ) + + labs( + title="gRPC Response Latency", + x="Time", + y="Latency (ms, p95)", + color="Method", + shape="Repository", + ) + + facet_grid("grpc.request.glRepository", "grpc.code") + ) + + p.save(f"{outdir}/rpc_latency_by_repo.png") + + +def stats_snapshot(df, outdir): + if "snapshot.duration_ms" not in df.columns: + print("No snapshot creation events found in the log") + return + + df = df[df["snapshot.duration_ms"].notna()] + df = df[df["grpc.request.glRepository"].notna()] + + df = ( + df.groupby(["time_interval", "grpc.request.glRepository"])[ + "snapshot.duration_ms" + ] + .quantile(0.95) + .reset_index() + ) + with open(f"{outdir}/snapshot_creation_latency_by_repo.txt", "w") as f: + f.write(df.to_string(index=False)) + + p = ( + ggplot( + df, + aes( + x="time_interval", + y="snapshot.duration_ms", + color="grpc.request.glRepository", + ), + ) + + geom_line() + + scale_x_datetime(date_labels="%H:%M:%S", date_breaks="5 seconds") + + theme_seaborn( + style="darkgrid", context="notebook", font="sans-serif", font_scale=1) + + theme( + axis_text_x=element_text(rotation=45, hjust=1), figure_size=(12, 8), dpi=200 + ) + + labs( + title="Snapshot Creation Latency", + x="Time", + y="Latency (ms, p95)", + color="Repository", + ) + ) + + p.save(f"{outdir}/snapshot_creation_latency_by_repo.png") + + +def analyze_snapshot_creation_rate(df, outdir): + if "snapshot.duration_ms" not in df.columns: + print("No snapshot creation events found in the log") + return + + # Filter for snapshot creation events only + snapshots = df[df["snapshot.duration_ms"].notna()] + + interval = "1s" # 1 second windows + snapshots = with_interval(snapshots, interval) + + metrics = [] + + # Group by both time_interval AND snapshot.exclusive + for time_window in snapshots["time_interval"].unique(): + for exclusive_value in snapshots["snapshot.exclusive"].unique(): + window_data = snapshots[ + (snapshots["time_interval"] == time_window) & + (snapshots["snapshot.exclusive"] == exclusive_value) + ] + + count = len(window_data) + print(f"There are {count} snapshots (exclusive={exclusive_value}) in window {time_window}") + + if count > 0: + creation_rate = count / pd.Timedelta(interval).total_seconds() + p95_latency = window_data["snapshot.duration_ms"].quantile(0.95) + else: + creation_rate = 0 + p95_latency = None + + # Calculate latency percentiles + metrics.append( + { + "time_interval": time_window, + "exclusive": exclusive_value, + "count": count, + "creation_rate_per_sec": creation_rate, + "p95_latency_ms": p95_latency, + } + ) + + metrics_df = pd.DataFrame(metrics) + + # Remove rows with no data for cleaner plotting + plot_data = metrics_df[metrics_df["p95_latency_ms"].notna()] + + # Plot :: Latency vs Creation Rate (Throughput) grouped by exclusive flag + p = ( + ggplot(plot_data, aes(x="creation_rate_per_sec", color="exclusive")) + + geom_point(aes(y="p95_latency_ms"), size=3, alpha=0.7) + + geom_smooth( + aes(y="p95_latency_ms"), method="lm", se=False, size=1 + ) + + scale_color_manual(values=custom_colors, name="Exclusive") + + theme_seaborn( + style="darkgrid", context="notebook", font="sans-serif", font_scale=1) + + theme(figure_size=(12, 7), dpi=200) + + labs( + title="Impact of Creation Rate on Snapshot P95 Duration in 1s interval", + subtitle="P95 latencies vs actual snapshot throughput, grouped by exclusive flag", + x="Creation Rate - Throughput (snapshots completed/second)", + y="Snapshot P95 Duration Latency (ms)", + ) + ) + p.save(f"{outdir}/latency_vs_creation_rate.png") + print(f"Saved: {outdir}/latency_vs_creation_rate.png") + + +def analyze_snapshot_duration_by_repository(df, outdir): + if "snapshot.duration_ms" not in df.columns: + print("No snapshot creation events found in the log") + return + + # Filter for snapshot events AND valid repository paths + snapshots = df[ + (df["snapshot.duration_ms"].notna()) + & (df["grpc.request.glProjectPath"].notna()) + ] + + # Get repository counts + repo_counts = snapshots["grpc.request.glProjectPath"].value_counts() + + # Plot :: Overlapping histograms of snapshot duration by repository, log scale on X axis + p = ( + ggplot( + snapshots, aes(x="snapshot.duration_ms", fill="grpc.request.glProjectPath") + ) + + geom_histogram(bins=30, alpha=0.5, position="identity") + + theme_seaborn( + style="darkgrid", context="notebook", font="sans-serif", font_scale=1) + + theme( + figure_size=(16, 10), + dpi=200, + legend_position="right", + legend_title=element_text(size=10, weight="bold"), + legend_text=element_text(size=8), + ) + + labs( + title="Snapshot Duration Distribution by Repository", + subtitle="Overlapping histograms show how snapshot duration varies across repositories", + x="Duration (ms) - Log Scale", + y="Count", + ) + + scale_x_log10() + + scale_fill_manual( + values=custom_colors * 3, name="Repository" + ) # *3 to ensure enough colors, however they will start repetiting + ) + p.save(f"{outdir}/snapshot_duration_by_repository.png") + print(f"\nSaved: {outdir}/snapshot_duration_by_repository.png") + +def analyze_snapshot_by_files_dirs(df, outdir): + if "snapshot.duration_ms" not in df.columns: + print("No snapshot creation events found in the log") + return + + # Filter for snapshot events AND valid repository paths + snapshots = df[ + (df["snapshot.duration_ms"].notna()) + & (df["grpc.request.glProjectPath"].notna()) + ] + + relevant_cols = ['snapshot.directory_count', 'snapshot.file_count', 'snapshot.duration_ms'] + + # Plot :: Scatter plot: dirs (x) vs files (y), colored by duration + p = ( + ggplot(snapshots, aes(x='snapshot.directory_count', y='snapshot.file_count', color='snapshot.duration_ms')) + + geom_point(size=3, alpha=0.6) + + scale_color_gradient2( + low=custom_colors[0], # yellow for fast + mid=custom_colors[3], # pink for medium + high=custom_colors[-1], # indigo for slow + midpoint=snapshots['snapshot.duration_ms'].median(), + name='Duration (ms)' + ) + + theme_seaborn( + style="darkgrid", context="notebook", font="sans-serif", font_scale=1) + + theme( + figure_size=(12, 10), + dpi=200, + legend_position='right' + ) + + labs( + title="Snapshot Duration By Files X Directories", + subtitle="Each dot represents a snapshot operation", + x="Directory Count", + y="File Count" + ) + + facet_wrap("grpc.request.glRepository", ncol=1) + ) + + p.save(f"{outdir}/snapshot_files_dirs_duration.png") + print(f"\nSaved: {outdir}/snapshot_files_dirs_duration.png") + +def with_interval(df, interval): + df["time_interval"] = df["time"].dt.floor(interval) + return df + + +if __name__ == "__main__": + if len(sys.argv) < 3: + print("Usage: plot.py ") + sys.exit(1) + + log_filename = sys.argv[1] + output_directory = sys.argv[2] + + df = load(log_filename) + + with_interval(df, "1s") + stats_snapshot(df, output_directory) + stats_rpc_latency(df, output_directory) + stats_rpc_count(df, output_directory) + analyze_snapshot_creation_rate(df, output_directory) + analyze_snapshot_duration_by_repository(df, output_directory) + analyze_snapshot_by_files_dirs(df, output_directory) diff --git a/_support/benchmarking/experiments/mb-tran/profile-gitaly.sh b/_support/benchmarking/experiments/mb-tran/profile-gitaly.sh new file mode 100755 index 00000000000..2a37ecf93cf --- /dev/null +++ b/_support/benchmarking/experiments/mb-tran/profile-gitaly.sh @@ -0,0 +1,103 @@ +#!/bin/sh +# +# profile-gitaly: Profile host with perf and libbpf-tools. +# Must be run as root. +# +# Mandatory arguments: +# -d : Number of seconds to profile for +# -g : Name of Git repository being used +# -o : Directory to write output to +# -r : Name of RPC being executed + +set -e + +usage() { + echo "Usage: $0 -d -o -r \ +-g " + exit 1 +} + +profile() { + # Profile on-CPU time for Gitaly and child processes + perf record --freq=99 -g --pid="$(pidof -s gitaly)" \ + --output="${gitaly_perf_data}" -- sleep "${seconds}" & + + # Profile on-CPU time for whole system + perf record --freq=97 -g --all-cpus \ + --output="${all_perf_data}" -- sleep "${seconds}" & + + # Profile off-CPU time for whole system (with filtering as a post-processing step) + min_stall_duration_us=1000 + offcpu_profile_raw_output_file="${out_dir}/offcpu_profile.raw.txt.gz" + bpftrace /usr/local/gitaly_offcpu_profiler/offcpu_profile.bt "${seconds}" "${min_stall_duration_us}" \ + | gzip > "${offcpu_profile_raw_output_file}" & + + wait +} + +generate_flamegraphs() { + gitaly_perf_txt="${out_dir}/gitaly-perf.txt.gz" + gitaly_perf_svg="${out_dir}/gitaly-perf.svg" + perf script --header --input="${gitaly_perf_data}" \ + | gzip > "${gitaly_perf_txt}" + zcat "${gitaly_perf_txt}" \ + | stackcollapse-perf --kernel \ + | flamegraph --hash --colors=perl > "${gitaly_perf_svg}" + + all_perf_txt="${out_dir}/all-perf.txt.gz" + all_perf_svg="${out_dir}/all-perf.svg" + perf script --header --input="${all_perf_data}" \ + | gzip > "${all_perf_txt}" + zcat "${all_perf_txt}" \ + | stackcollapse-perf --kernel \ + | flamegraph --hash --colors=perl > "${all_perf_svg}" + + /usr/local/gitaly_offcpu_profiler/offcpu_profile_postprocessing.sh "${offcpu_profile_raw_output_file}" +} + +main() { + if [ "$(id -u)" -ne 0 ]; then + echo "$0 must be run as root" >&2 + exit 1 + fi + + while getopts "hd:g:o:r:" arg; do + case "${arg}" in + d) seconds=${OPTARG} ;; + g) repo=${OPTARG} ;; + o) out_dir=${OPTARG} ;; + r) rpc=${OPTARG} ;; + h|*) usage ;; + esac + done + + if [ "${seconds}" -le 0 ] \ + || [ -z "${out_dir}" ] \ + || [ -z "${rpc}" ] \ + || [ -z "${repo}" ]; then + usage + fi + + if ! pidof gitaly > /dev/null; then + echo "Gitaly is not running, aborting" >&2 + exit 1 + fi + + # Ansible's minimal shell will may not include /usr/local/bin in $PATH + if ! printenv PATH | grep "/usr/local/bin" > /dev/null; then + export PATH="${PATH}:/usr/local/bin" + fi + + perf_tmp_dir=$(mktemp -d "/tmp/gitaly-perf-${repo}-${rpc}.XXXXXX") + gitaly_perf_data="${perf_tmp_dir}/gitaly-perf.out" + all_perf_data="${perf_tmp_dir}/all-perf.out" + + profile + + generate_flamegraphs + + chown -R git:git "${out_dir}" + rm -rf "${perf_tmp_dir}" +} + +main "$@" diff --git a/_support/benchmarking/roles/benchmark/vars/main.yml b/_support/benchmarking/roles/benchmark/vars/main.yml index f90aa2f05e1..a9f2439ee8b 100644 --- a/_support/benchmarking/roles/benchmark/vars/main.yml +++ b/_support/benchmarking/roles/benchmark/vars/main.yml @@ -2,6 +2,6 @@ profile: true clear_page_cache: true # Profiling and the workload will begin concurrently. -profile_duration: 60 +profile_duration: 300 workload_duration: "60s" -workload_wait_duration: 120 +workload_wait_duration: 360 -- GitLab From 3ffa0cc7d2d3fd34cbaed91b433b9db7e8505875 Mon Sep 17 00:00:00 2001 From: Mustafa Bayar Date: Mon, 27 Oct 2025 10:00:53 +0300 Subject: [PATCH 2/4] enable housekeeping --- internal/featureflag/ff_housekeeping_middleware.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/featureflag/ff_housekeeping_middleware.go b/internal/featureflag/ff_housekeeping_middleware.go index bbbd4771f3c..6979ba29668 100644 --- a/internal/featureflag/ff_housekeeping_middleware.go +++ b/internal/featureflag/ff_housekeeping_middleware.go @@ -5,5 +5,5 @@ var HousekeepingMiddleware = NewFeatureFlag( "housekeeping_middleware", "v18.1.0", "https://gitlab.com/gitlab-org/gitaly/-/issues/6761", - false, + true, ) -- GitLab From 0aade8b139e1cfe0090ad88294ecd7d296ccaa39 Mon Sep 17 00:00:00 2001 From: Mustafa Bayar Date: Mon, 27 Oct 2025 10:03:16 +0300 Subject: [PATCH 3/4] use commit hash --- .../benchmarking/experiments/mb-tran/config.yml | 10 +++++----- .../experiments/mb-tran/k6-benchmark.js | 16 ++++++++-------- internal/cli/gitaly/serve.go | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/_support/benchmarking/experiments/mb-tran/config.yml b/_support/benchmarking/experiments/mb-tran/config.yml index 4ee1b8700c9..b42e5952837 100644 --- a/_support/benchmarking/experiments/mb-tran/config.yml +++ b/_support/benchmarking/experiments/mb-tran/config.yml @@ -20,7 +20,7 @@ client: # The client clones Gitaly in order to consume the protobuf definitions. # This revision should be set to the latest of the gitaly_revision values # specified for the gitaly_instances below. - gitaly_revision: HEAD + gitaly_revision: mb-tran # Configuration parameters for a collection of N Gitaly nodes. Benchmarks will execute for each Gitaly # node. @@ -30,7 +30,7 @@ gitaly_instances: machine_type: "n2d-standard-16" boot_disk_type: "pd-balanced" boot_disk_size: 20 - gitaly_revision: HEAD + gitaly_revision: mb-tran disk_size: 100 disk_type: "pd-balanced" # ==== Standard Filesystem settings ==== @@ -59,7 +59,7 @@ gitaly_instances: machine_type: "n2d-standard-16" boot_disk_type: "pd-balanced" boot_disk_size: 20 - gitaly_revision: HEAD + gitaly_revision: mb-tran disk_size: 100 disk_type: "pd-balanced" # ==== Standard Filesystem settings ==== @@ -96,7 +96,7 @@ repositories: # Whether this repository should be tested. This toggle is read by the K6 script. include_in_test: true # Which reference backend to use. This defines how the repository will be cloned. - reference_backend: files + reference_backend: reftable remote: "https://gitlab.com/gitlab-org/git.git" revision: "2462961280690837670d997bde64bd4ebf8ae66d" # Test data to be used as RPC inputs. The K6 script will randomly choose out of these available @@ -120,7 +120,7 @@ repositories: - "git-gui" - name: gitlab include_in_test: true - reference_backend: files + reference_backend: reftable remote: "https://gitlab.com/gitlab-org/gitlab.git" revision: "8f3978675aa4df643cff5a01a8e1896ae754685a" testdata: diff --git a/_support/benchmarking/experiments/mb-tran/k6-benchmark.js b/_support/benchmarking/experiments/mb-tran/k6-benchmark.js index 264d4ffde33..b2b966da6be 100644 --- a/_support/benchmarking/experiments/mb-tran/k6-benchmark.js +++ b/_support/benchmarking/experiments/mb-tran/k6-benchmark.js @@ -26,8 +26,8 @@ const optionsStatic = () => { getBlobs: { ...SCENARIO_DEFAULTS, rate: 200, exec: 'getBlobs' }, getTreeEntries: { ...SCENARIO_DEFAULTS, rate: 200, exec: 'getTreeEntries' }, treeEntry: { ...SCENARIO_DEFAULTS, rate: 100, exec: 'treeEntry' }, - listCommitsByOid: { ...SCENARIO_DEFAULTS, rate: 200, exec: 'listCommitsByOid' } - // writeAndDeleteRefs: { ...SCENARIO_DEFAULTS, rate: 100, exec: 'writeAndDeleteRefs' } + listCommitsByOid: { ...SCENARIO_DEFAULTS, rate: 200, exec: 'listCommitsByOid' }, + writeAndDeleteRefs: { ...SCENARIO_DEFAULTS, rate: 100, exec: 'writeAndDeleteRefs' } }, setupTimeout: '5m' } @@ -42,7 +42,7 @@ const optionsRamping = () => { } const stages_read = [{target: 50, duration: '100s'}, {target: 100, duration: '50s'}, {target: 200, duration: '100s'}, {target: 50, duration: '50s'}] - // const stages_write = [{target: 25, duration: '100s'}, {target: 50, duration: '50s'}, {target: 100, duration: '100s'}, {target: 25, duration: '50s'}] + const stages_write = [{target: 25, duration: '100s'}, {target: 50, duration: '50s'}, {target: 100, duration: '100s'}, {target: 25, duration: '50s'}] return { scenarios: { @@ -70,12 +70,12 @@ const optionsRamping = () => { ...SCENARIO_DEFAULTS, stages: stages_read, exec: 'listCommitsByOid' + }, + writeAndDeleteRefs: { + ...SCENARIO_DEFAULTS, + stages: stages_write, + exec: 'writeAndDeleteRefs' } - // writeAndDeleteRefs: { - // ...SCENARIO_DEFAULTS, - // stages: stages_write, - // exec: 'writeAndDeleteRefs' - // } }, setupTimeout: '5m' } diff --git a/internal/cli/gitaly/serve.go b/internal/cli/gitaly/serve.go index 87ad0ebe309..9af11358a16 100644 --- a/internal/cli/gitaly/serve.go +++ b/internal/cli/gitaly/serve.go @@ -577,7 +577,7 @@ func run(appCtx *cli.Command, cfg config.Cfg, logger log.Logger) error { housekeepingManager := housekeepingmgr.New(cfg.Prometheus, logger, transactionManager, node) prometheus.MustRegister(housekeepingManager) - housekeepingMiddleware := housekeepingmw.NewHousekeepingMiddleware(logger, protoregistry.GitalyProtoPreregistered, localrepoFactory, housekeepingManager, 20) + housekeepingMiddleware := housekeepingmw.NewHousekeepingMiddleware(logger, protoregistry.GitalyProtoPreregistered, localrepoFactory, housekeepingManager, 50) defer housekeepingMiddleware.WaitForWorkers() gitalyServerFactory := server.NewGitalyServerFactory( -- GitLab From dcea8392e94ca0488570d9ceb91892aec76594b5 Mon Sep 17 00:00:00 2001 From: Mustafa Bayar Date: Tue, 28 Oct 2025 13:19:12 +0300 Subject: [PATCH 4/4] reftable-test --- .../experiments/master/config.yml | 33 +- .../experiments/master/k6-benchmark.js | 333 +++++------------- .../experiments/mb-tran/config.yml | 137 +++++-- .../experiments/mb-tran/k6-benchmark.js | 274 +++++--------- .../roles/benchmark/vars/main.yml | 6 +- .../roles/gitaly/tasks/initialize.yml | 14 +- internal/cli/gitaly/serve.go | 2 +- 7 files changed, 327 insertions(+), 472 deletions(-) diff --git a/_support/benchmarking/experiments/master/config.yml b/_support/benchmarking/experiments/master/config.yml index 4972af8d9d5..214c9edbf45 100644 --- a/_support/benchmarking/experiments/master/config.yml +++ b/_support/benchmarking/experiments/master/config.yml @@ -26,7 +26,7 @@ client: # node. gitaly_instances: # Try to use a short name, otherwise we'll exceed the GCP resource name length of 63 characters - - name: "large" + - name: "baseline" machine_type: "n2d-standard-16" boot_disk_type: "pd-balanced" boot_disk_size: 20 @@ -38,7 +38,36 @@ gitaly_instances: # filesystem: "btrfs" # Options: ext4, xfs, btrfs # fs_mount_opts: "noatime,compress=zstd,space_cache=v2,ssd,discard=async" # fs_format_opts: "-f -n 16k" - # XFS setup + # XFS setup + # filesystem: "xfs" # Options: ext4, xfs, btrfs + # fs_mount_opts: "defaults,discard" + # fs_format_opts: "-f" + # ext4 setup + filesystem: "ext4" # Options: ext4, xfs, btrfs + fs_mount_opts: "defaults,discard" + fs_format_opts: "" + + # These are manually templated and don't translate directly to config.toml entries. + config: + # Whether transactions should be enabled. + transactions: false + environment: + # Arbitrary environment variables. Note that SNAPSHOT_DRIVER doesn't actually do anything; it + # just serves as an example. + SNAPSHOT_DRIVER: deepclone + - name: "transaction" + machine_type: "n2d-standard-16" + boot_disk_type: "pd-balanced" + boot_disk_size: 20 + gitaly_revision: HEAD + disk_size: 100 + disk_type: "pd-balanced" + # ==== Standard Filesystem settings ==== + # Btrfs setup + # filesystem: "btrfs" # Options: ext4, xfs, btrfs + # fs_mount_opts: "noatime,compress=zstd,space_cache=v2,ssd,discard=async" + # fs_format_opts: "-f -n 16k" + # XFS setup # filesystem: "xfs" # Options: ext4, xfs, btrfs # fs_mount_opts: "defaults,discard" # fs_format_opts: "-f" diff --git a/_support/benchmarking/experiments/master/k6-benchmark.js b/_support/benchmarking/experiments/master/k6-benchmark.js index 7863023961e..24fa3e03c57 100644 --- a/_support/benchmarking/experiments/master/k6-benchmark.js +++ b/_support/benchmarking/experiments/master/k6-benchmark.js @@ -9,76 +9,27 @@ const gitalyProtoDir = __ENV.GITALY_PROTO_DIR const runName = __ENV.RUN_NAME const workloadDuration = __ENV.WORKLOAD_DURATION - -// optionsStatic returns a test scenario where constant load is offered to Gitaly -const optionsStatic = () => { - const SCENARIO_DEFAULTS = { - executor: 'constant-arrival-rate', - duration: workloadDuration, - timeUnit: '1s', - gracefulStop: '0s', - preAllocatedVUs: 40 - } - - return { - scenarios: { - findCommit: { ...SCENARIO_DEFAULTS, rate: 200, exec: 'findCommit' }, - getBlobs: { ...SCENARIO_DEFAULTS, rate: 200, exec: 'getBlobs' }, - getTreeEntries: { ...SCENARIO_DEFAULTS, rate: 200, exec: 'getTreeEntries' }, - treeEntry: { ...SCENARIO_DEFAULTS, rate: 100, exec: 'treeEntry' }, - listCommitsByOid: { ...SCENARIO_DEFAULTS, rate: 200, exec: 'listCommitsByOid' }, - writeAndDeleteRefs: { ...SCENARIO_DEFAULTS, rate: 100, exec: 'writeAndDeleteRefs' } - }, - setupTimeout: '5m' - } -} +// Total reads: 18,104 req/m = 302 req/s ~= 500 req/s +// Total writes: 225 req/m = 4 req/s ~= 5 req/s // optionsRamping returns a test scenario where a ramping workload is offered to Gitaly const optionsRamping = () => { - const SCENARIO_DEFAULTS = { - executor: 'ramping-arrival-rate', - timeUnit: '1s', - preAllocatedVUs: 40 - } - - const stages_read = [{target: 50, duration: '20s'}, {target: 100, duration: '10s'}, {target: 200, duration: '20s'}, {target: 50, duration: '10s'}] - const stages_write = [{target: 25, duration: '20s'}, {target: 50, duration: '10s'}, {target: 100, duration: '20s'}, {target: 25, duration: '10s'}] - - return { - scenarios: { - findCommit: { - ...SCENARIO_DEFAULTS, - stages: stages_read, - exec: 'findCommit' - }, - getBlobs: { - ...SCENARIO_DEFAULTS, - stages: stages_read, - exec: 'getBlobs' - }, - getTreeEntries: { - ...SCENARIO_DEFAULTS, - stages: stages_read, - exec: 'getTreeEntries' - }, - treeEntry: { - ...SCENARIO_DEFAULTS, - stages: stages_read, - exec: 'treeEntry' - }, - listCommitsByOid: { - ...SCENARIO_DEFAULTS, - stages: stages_read, - exec: 'listCommitsByOid' - }, - writeAndDeleteRefs: { - ...SCENARIO_DEFAULTS, - stages: stages_write, - exec: 'writeAndDeleteRefs' - } - }, - setupTimeout: '5m' - } + const SCENARIO_DEFAULTS = { + executor: 'ramping-arrival-rate', + timeUnit: '1s', + preAllocatedVUs: 1000 + } + + const stages_read = [{target: 125, duration: '100s'}, {target: 250, duration: '50s'}, {target: 500, duration: '100s'}, {target: 250, duration: '50s'}] + const stages_write = [{target: 2, duration: '100s'}, {target: 4, duration: '50s'}, {target: 5, duration: '100s'}, {target: 4, duration: '50s'}] + + return { + scenarios: { + listRefs: { ...SCENARIO_DEFAULTS, stages: stages_read, exec: 'listRefs', tags: { rpc: 'ListRefs' } }, + deleteRefs: { ...SCENARIO_DEFAULTS, stages: stages_write, exec: 'deleteRefs', tags: { rpc: 'DeleteRefs' } }, + }, + setupTimeout: '15m' + } } @@ -87,214 +38,106 @@ export const options = optionsRamping() const repos = JSON.parse(open("/opt/benchmark-gitaly/repositories.json")); const selectTestRepo = () => { - const active = repos.filter(r => r.include_in_test); - const repo = active[Math.floor(Math.random() * active.length)]; - - return { - repository: { - storageName: 'default', - relativePath: `${repo.name}`, - glRepository: repo.name, // irrelevant but mandatory - glProjectPath: `foo/bar/${repo.name}`, // irrelevant but mandatory - }, - commit: repo.testdata.commits[Math.floor(Math.random() * repo.testdata.commits.length)], - ref: repo.testdata.refs[Math.floor(Math.random() * repo.testdata.refs.length)], - file: repo.testdata.files[Math.floor(Math.random() * repo.testdata.files.length)], - directory: repo.testdata.directories[Math.floor(Math.random() * repo.testdata.directories.length)], - } + const active = repos.filter(r => r.include_in_test); + const repo = active[Math.floor(Math.random() * active.length)]; + + return { + repository: { + storageName: 'default', + relativePath: `${repo.name}`, + glRepository: repo.name, // irrelevant but mandatory + glProjectPath: `foo/bar/${repo.name}`, // irrelevant but mandatory + }, + commit: repo.testdata.commits[Math.floor(Math.random() * repo.testdata.commits.length)], + ref: repo.testdata.refs[Math.floor(Math.random() * repo.testdata.refs.length)], + file: repo.testdata.files[Math.floor(Math.random() * repo.testdata.files.length)], + directory: repo.testdata.directories[Math.floor(Math.random() * repo.testdata.directories.length)], + } } const generateRandom = () => Math.random().toString(36).substring(2, 15) + Math.random().toString(23).substring(2, 5) export function setup () { - const setupCompletionSentinel = `/tmp/${runName}-setup-complete` - // Signal to Ansible that setup is complete, in a very hacky way. - exec.command('touch', [setupCompletionSentinel]) + const setupCompletionSentinel = `/tmp/${runName}-setup-complete` + // Signal to Ansible that setup is complete, in a very hacky way. + exec.command('touch', [setupCompletionSentinel]) - return { - setupCompletionSentinel - } + return { + setupCompletionSentinel + } } export function teardown (context) { - exec.command('rm', [context.setupCompletionSentinel]) + exec.command('rm', [context.setupCompletionSentinel]) } const client = new Client() // k6 provides no easy way to list directory contents. client.load([gitalyProtoDir], 'commit.proto', 'blob.proto', 'ref.proto', 'repository.proto') -export function findCommit () { - client.connect(gitalyAddress, { - plaintext: true - }) - - const testRepo = selectTestRepo(); - const req = { - repository: testRepo.repository, - revision: encoding.b64encode(testRepo.commit) - } - - const res = client.invoke('gitaly.CommitService/FindCommit', req) - check(res, { - 'FindCommit - StatusOK': r => r && r.status === StatusOK - }) - - client.close() -} - -export function getBlobs () { - client.connect(gitalyAddress, { - plaintext: true - }) - - const testRepo = selectTestRepo(); - const req = { - repository: testRepo.repository, - revision_paths: [ - { - revision: testRepo.commit, - path: encoding.b64encode(testRepo.file) - } - ], - limit: -1 - } - const stream = new Stream(client, 'gitaly.BlobService/GetBlobs') - stream.on('data', data => { - check(data, { - 'GetBlobs - data present in response': r => r && r.data +export function listRefs () { + client.connect(gitalyAddress, { + plaintext: true }) - }) - - stream.on('end', function () { - client.close() - }) - - stream.on('error', function(err) { - console.error(err) - }) - - stream.write(req) -} - -export function getTreeEntries () { - client.connect(gitalyAddress, { - plaintext: true - }) - - const testRepo = selectTestRepo(); - const req = { - repository: testRepo.repository, - revision: encoding.b64encode(testRepo.commit), - path: encoding.b64encode(testRepo.directory) - } - const stream = new Stream(client, 'gitaly.CommitService/GetTreeEntries') - stream.on('data', data => { - check(data, { - 'GetTreeEntries - entries present in response': r => r && r.entries + const testRepo = selectTestRepo(); + const req = { + repository: testRepo.repository, + patterns: [ encoding.b64encode("refs/heads/*") ], + pagination_params: { + limit: 20, + }, + } + + const stream = new Stream(client, 'gitaly.RefService/ListRefs') + stream.on('data', data => { + check(data, { + 'ListRefs - references present in response': r => r && r.references + }) }) - }) - stream.on('end', function () { - client.close() - }) - - stream.on('error', function(err) { - console.error(err) - }) - - stream.write(req) -} - -export function treeEntry () { - client.connect(gitalyAddress, { - plaintext: true - }) - - const testRepo = selectTestRepo(); - const req = { - repository: testRepo.repository, - revision: encoding.b64encode(testRepo.ref), - path: encoding.b64encode(testRepo.file) - } - - const stream = new Stream(client, 'gitaly.CommitService/TreeEntry') - stream.on('data', data => { - check(data, { - 'TreeEntry - data present in response': r => r && r.data + stream.on('end', function () { + client.close() }) - }) - - stream.on('end', function () { - client.close() - }) - stream.on('error', function(err) { - console.error(err) - }) - - stream.write(req) -} - -export function listCommitsByOid () { - client.connect(gitalyAddress, { - plaintext: true - }) - - const testRepo = selectTestRepo(); - const req = { - repository: testRepo.repository, - oid: [testRepo.commit] - } - - const stream = new Stream(client, 'gitaly.CommitService/ListCommitsByOid') - stream.on('data', data => { - check(data, { - 'ListCommitsByOid - commits present in response': r => r && r.commits + stream.on('error', function(err) { + console.error(err) }) - }) - - stream.on('end', function () { - client.close() - }) - stream.on('error', function(err) { - console.error(err) - }) - - stream.write(req) + stream.write(req) } -export function writeAndDeleteRefs () { - client.connect(gitalyAddress, { - plaintext: true - }) +export function deleteRefs () { + client.connect(gitalyAddress, { + plaintext: true + }) - const testRepo = selectTestRepo(); - const generatedRef = 'refs/test/' + generateRandom() + const testRepo = selectTestRepo(); - const writeRefReq = { - repository: testRepo.repository, - ref: encoding.b64encode(generatedRef), - revision: encoding.b64encode(testRepo.commit) - } + // Select a random ref from the first 100,000 refs + const refNum = Math.floor(Math.random() * 100000) + 1; + const refToDelete = `refs/heads/zzz-${refNum}`; - const writeRefRes = client.invoke('gitaly.RepositoryService/WriteRef', writeRefReq) - check(writeRefRes, { - 'WriteRef - StatusOK': r => r && r.status === StatusOK - }) + // Delete the ref using DeleteRefs + const deleteRefsReq = { + repository: testRepo.repository, + refs: [encoding.b64encode(refToDelete)] + } - const deleteRefsReq = { - repository: testRepo.repository, - refs: [encoding.b64encode(generatedRef)] - } + const deleteRefsRes = client.invoke('gitaly.RefService/DeleteRefs', deleteRefsReq) + check(deleteRefsRes, { + 'DeleteRefs - StatusOK': r => r && r.status === StatusOK, + }) - const deleteRefsRes = client.invoke('gitaly.RefService/DeleteRefs', deleteRefsReq) - check(deleteRefsRes, { - 'DeleteRefs - StatusOK': r => r && r.status === StatusOK - }) + const writeRefReq = { + repository: testRepo.repository, + ref: encoding.b64encode(refToDelete), + revision: encoding.b64encode(testRepo.commit) + } - client.close() + const writeRefRes = client.invoke('gitaly.RepositoryService/WriteRef', writeRefReq) + check(writeRefRes, { + 'WriteRefs - recreate StatusOK': r => r && r.status === StatusOK + }) } diff --git a/_support/benchmarking/experiments/mb-tran/config.yml b/_support/benchmarking/experiments/mb-tran/config.yml index b42e5952837..2c7d54a8df0 100644 --- a/_support/benchmarking/experiments/mb-tran/config.yml +++ b/_support/benchmarking/experiments/mb-tran/config.yml @@ -20,18 +20,18 @@ client: # The client clones Gitaly in order to consume the protobuf definitions. # This revision should be set to the latest of the gitaly_revision values # specified for the gitaly_instances below. - gitaly_revision: mb-tran + gitaly_revision: HEAD # Configuration parameters for a collection of N Gitaly nodes. Benchmarks will execute for each Gitaly # node. gitaly_instances: - # Try to use a short name, otherwise we'll exceed the GCP resource name length of 63 characters - - name: "test" + # Try to use a short name, otherwise we'll exceed the GCP resource name length of 63 characters + - name: "transactions" machine_type: "n2d-standard-16" boot_disk_type: "pd-balanced" boot_disk_size: 20 - gitaly_revision: mb-tran - disk_size: 100 + gitaly_revision: HEAD + disk_size: 200 disk_type: "pd-balanced" # ==== Standard Filesystem settings ==== # Btrfs setup @@ -55,12 +55,12 @@ gitaly_instances: # Arbitrary environment variables. Note that SNAPSHOT_DRIVER doesn't actually do anything; it # just serves as an example. SNAPSHOT_DRIVER: deepclone - - name: "base" + - name: "baseline" machine_type: "n2d-standard-16" boot_disk_type: "pd-balanced" boot_disk_size: 20 - gitaly_revision: mb-tran - disk_size: 100 + gitaly_revision: HEAD + disk_size: 200 disk_type: "pd-balanced" # ==== Standard Filesystem settings ==== # Btrfs setup @@ -84,7 +84,6 @@ gitaly_instances: # Arbitrary environment variables. Note that SNAPSHOT_DRIVER doesn't actually do anything; it # just serves as an example. SNAPSHOT_DRIVER: deepclone - # A list of repositories to be cloned onto the repositories disk, along with test inputs to be used # for RPC calls. This section of the configuration is re-serialised into JSON and provided to the # K6 script. @@ -92,11 +91,10 @@ gitaly_instances: # NOTE: you may wish to delete some of these entries, otherwise benchmarking setup may take a while # to clone each repo. repositories: + # Small repository with many loose objects + files backend (baseline) - name: git - # Whether this repository should be tested. This toggle is read by the K6 script. include_in_test: true - # Which reference backend to use. This defines how the repository will be cloned. - reference_backend: reftable + reference_backend: files remote: "https://gitlab.com/gitlab-org/git.git" revision: "2462961280690837670d997bde64bd4ebf8ae66d" # Test data to be used as RPC inputs. The K6 script will randomly choose out of these available @@ -118,28 +116,30 @@ repositories: - "t" - "Documentation" - "git-gui" - - name: gitlab - include_in_test: true - reference_backend: reftable - remote: "https://gitlab.com/gitlab-org/gitlab.git" - revision: "8f3978675aa4df643cff5a01a8e1896ae754685a" - testdata: - commits: - - "875ffb690e25eb8c98797b5641c6c16c71454b73" - - "3074e43761003e2566ea604053fe4988774d2896" - - "9ace97496c56335c5739c226853b468afd962830" - refs: - - "refs/heads/master" - - "refs/tags/v18.1.5-ee" - - "refs/tags/v17.11.6-ee" - files: - - "README.md" - - "lib/gitaly/server.rb" - - "ee/app/graphql/resolvers/epics_resolver.rb" - directories: - - "app" - - "rubocop" - - "qa" + # Small repository with many loose objects + reftable backend + # - name: git-reftable + # include_in_test: true + # reference_backend: reftable + # remote: "https://gitlab.com/gitlab-org/git.git" + # revision: "2462961280690837670d997bde64bd4ebf8ae66d" + # testdata: + # commits: + # - "fea9d18c534a445ef6e488d8ee711fa92fa0e6bd" + # - "0a15bb634cf005a0266ee1108ac31aa75649a61c" + # - "217e4a23d76fe95a0f6ab0f6159de2460db6fcd9" + # refs: + # - "refs/heads/master" + # - "refs/heads/next" + # - "refs/tags/v2.50.0" + # files: + # - "README.md" + # - "t/lib-diff.sh" + # - "packfile.c" + # directories: + # - "t" + # - "Documentation" + # - "git-gui" + # Medium repository with decent history and branches + files backend (baseline) - name: gitaly include_in_test: true reference_backend: files @@ -162,3 +162,72 @@ repositories: - "internal" - "proto" - "internal/gitaly/service/raft" + # Medium repository with decent history and branches + reftable backend + # - name: gitaly-reftable + # include_in_test: true + # reference_backend: reftable + # remote: "https://gitlab.com/gitlab-org/gitaly.git" + # revision: "4d78df8cb5c6b3abfef5530830dba6c67d9d4c53" + # testdata: + # commits: + # - "765d81272feb53bcc0c50199183b4514c5ef7a73" + # - "25965387d1a0a91d226912649180f38c04d89a36" + # - "552d12d94dd24ad8dff93856e77a08b6a96f1d3e" + # refs: + # - "refs/heads/master" + # - "refs/tags/v18.1.5" + # - "refs/tags/v17.11.7" + # files: + # - "README.md" + # - "internal/gitaly/rangediff/range_diff_test.go" + # - "proto/go/gitalypb/blob.pb.go" + # directories: + # - "internal" + # - "proto" + # - "internal/gitaly/service/raft" + # Large monorepo with deep history and large packfiles + files backend (baseline) + - name: gitlab + include_in_test: true + reference_backend: files + remote: "https://gitlab.com/gitlab-org/gitlab.git" + revision: "8f3978675aa4df643cff5a01a8e1896ae754685a" + testdata: + commits: + - "875ffb690e25eb8c98797b5641c6c16c71454b73" + - "3074e43761003e2566ea604053fe4988774d2896" + - "9ace97496c56335c5739c226853b468afd962830" + refs: + - "refs/heads/master" + - "refs/tags/v18.1.5-ee" + - "refs/tags/v17.11.6-ee" + files: + - "README.md" + - "lib/gitaly/server.rb" + - "ee/app/graphql/resolvers/epics_resolver.rb" + directories: + - "app" + - "rubocop" + - "qa" + # Large monorepo with deep history and large packfiles + reftable backend + # - name: gitlab-reftable + # include_in_test: true + # reference_backend: reftable + # remote: "https://gitlab.com/gitlab-org/gitlab.git" + # revision: "8f3978675aa4df643cff5a01a8e1896ae754685a" + # testdata: + # commits: + # - "875ffb690e25eb8c98797b5641c6c16c71454b73" + # - "3074e43761003e2566ea604053fe4988774d2896" + # - "9ace97496c56335c5739c226853b468afd962830" + # refs: + # - "refs/heads/master" + # - "refs/tags/v18.1.5-ee" + # - "refs/tags/v17.11.6-ee" + # files: + # - "README.md" + # - "lib/gitaly/server.rb" + # - "ee/app/graphql/resolvers/epics_resolver.rb" + # directories: + # - "app" + # - "rubocop" + # - "qa" diff --git a/_support/benchmarking/experiments/mb-tran/k6-benchmark.js b/_support/benchmarking/experiments/mb-tran/k6-benchmark.js index b2b966da6be..eb9cd337ac0 100644 --- a/_support/benchmarking/experiments/mb-tran/k6-benchmark.js +++ b/_support/benchmarking/experiments/mb-tran/k6-benchmark.js @@ -7,117 +7,67 @@ import exec from 'k6/x/exec' const gitalyAddress = __ENV.GITALY_ADDRESS const gitalyProtoDir = __ENV.GITALY_PROTO_DIR const runName = __ENV.RUN_NAME -const workloadDuration = __ENV.WORKLOAD_DURATION - - -// optionsStatic returns a test scenario where constant load is offered to Gitaly -const optionsStatic = () => { - const SCENARIO_DEFAULTS = { - executor: 'constant-arrival-rate', - duration: workloadDuration, - timeUnit: '1s', - gracefulStop: '0s', - preAllocatedVUs: 40 - } - - return { - scenarios: { - findCommit: { ...SCENARIO_DEFAULTS, rate: 200, exec: 'findCommit' }, - getBlobs: { ...SCENARIO_DEFAULTS, rate: 200, exec: 'getBlobs' }, - getTreeEntries: { ...SCENARIO_DEFAULTS, rate: 200, exec: 'getTreeEntries' }, - treeEntry: { ...SCENARIO_DEFAULTS, rate: 100, exec: 'treeEntry' }, - listCommitsByOid: { ...SCENARIO_DEFAULTS, rate: 200, exec: 'listCommitsByOid' }, - writeAndDeleteRefs: { ...SCENARIO_DEFAULTS, rate: 100, exec: 'writeAndDeleteRefs' } - }, - setupTimeout: '5m' - } -} - -// optionsRamping returns a test scenario where a ramping workload is offered to Gitaly -const optionsRamping = () => { - const SCENARIO_DEFAULTS = { - executor: 'ramping-arrival-rate', - timeUnit: '1s', - preAllocatedVUs: 40 - } - - const stages_read = [{target: 50, duration: '100s'}, {target: 100, duration: '50s'}, {target: 200, duration: '100s'}, {target: 50, duration: '50s'}] - const stages_write = [{target: 25, duration: '100s'}, {target: 50, duration: '50s'}, {target: 100, duration: '100s'}, {target: 25, duration: '50s'}] - - return { - scenarios: { - findCommit: { - ...SCENARIO_DEFAULTS, - stages: stages_read, - exec: 'findCommit' - }, - getBlobs: { - ...SCENARIO_DEFAULTS, - stages: stages_read, - exec: 'getBlobs' - }, - getTreeEntries: { - ...SCENARIO_DEFAULTS, - stages: stages_read, - exec: 'getTreeEntries' - }, - treeEntry: { - ...SCENARIO_DEFAULTS, - stages: stages_read, - exec: 'treeEntry' - }, - listCommitsByOid: { - ...SCENARIO_DEFAULTS, - stages: stages_read, - exec: 'listCommitsByOid' - }, - writeAndDeleteRefs: { - ...SCENARIO_DEFAULTS, - stages: stages_write, - exec: 'writeAndDeleteRefs' - } - }, - setupTimeout: '5m' - } +// optionsFlatLoad returns a test scenario with a flat, sustained load profile +const optionsFlatLoad = () => { + const SCENARIO_DEFAULTS = { + executor: 'constant-arrival-rate', + timeUnit: '1s', + preAllocatedVUs: 100, + duration: '180s' // 3 minutes of sustained load + } + + // Flat load profiles - sustained but not pushing to CPU saturation + const rate_read = 200 // 50 RPS for read operations + const rate_write = 25 // 10 RPS for write operations + + return { + scenarios: { + findCommit: {...SCENARIO_DEFAULTS, rate: rate_read, exec: 'findCommit', tags: { rpc: 'FindCommit' } }, + getBlobs: { ...SCENARIO_DEFAULTS, rate: rate_read, exec: 'getBlobs', tags: { rpc: 'GetBlobs' }, }, + listRefs: { ...SCENARIO_DEFAULTS, rate: rate_read, exec: 'listRefs', tags: { rpc: 'ListRefs' } }, + writeRefs: { ...SCENARIO_DEFAULTS, rate: rate_write, exec: 'writeRefs', tags: { rpc: 'WriteRef' } }, + }, + setupTimeout: '15m' + } } -export const options = optionsRamping() +export const options = optionsFlatLoad() const repos = JSON.parse(open("/opt/benchmark-gitaly/repositories.json")); const selectTestRepo = () => { - const active = repos.filter(r => r.include_in_test); - const repo = active[Math.floor(Math.random() * active.length)]; - - return { - repository: { - storageName: 'default', - relativePath: `${repo.name}`, - glRepository: repo.name, // irrelevant but mandatory - glProjectPath: `foo/bar/${repo.name}`, // irrelevant but mandatory - }, - commit: repo.testdata.commits[Math.floor(Math.random() * repo.testdata.commits.length)], - ref: repo.testdata.refs[Math.floor(Math.random() * repo.testdata.refs.length)], - file: repo.testdata.files[Math.floor(Math.random() * repo.testdata.files.length)], - directory: repo.testdata.directories[Math.floor(Math.random() * repo.testdata.directories.length)], - } + const active = repos.filter(r => r.include_in_test); + const repo = active[Math.floor(Math.random() * active.length)]; + + return { + repository: { + storageName: 'default', + relativePath: `${repo.name}`, + glRepository: repo.name, // irrelevant but mandatory + glProjectPath: `foo/bar/${repo.name}`, // irrelevant but mandatory + }, + commit: repo.testdata.commits[Math.floor(Math.random() * repo.testdata.commits.length)], + ref: repo.testdata.refs[Math.floor(Math.random() * repo.testdata.refs.length)], + file: repo.testdata.files[Math.floor(Math.random() * repo.testdata.files.length)], + directory: repo.testdata.directories[Math.floor(Math.random() * repo.testdata.directories.length)], + } } const generateRandom = () => Math.random().toString(36).substring(2, 15) + Math.random().toString(23).substring(2, 5) export function setup () { - const setupCompletionSentinel = `/tmp/${runName}-setup-complete` - // Signal to Ansible that setup is complete, in a very hacky way. - exec.command('touch', [setupCompletionSentinel]) + const setupCompletionSentinel = `/tmp/${runName}-setup-complete` + // Signal to Ansible that setup is complete, in a very hacky way. + exec.command('touch', [setupCompletionSentinel]) - return { - setupCompletionSentinel - } + return { + setupCompletionSentinel + } } export function teardown (context) { - exec.command('rm', [context.setupCompletionSentinel]) + exec.command('rm', [context.setupCompletionSentinel]) } const client = new Client() @@ -178,96 +128,39 @@ export function getBlobs () { stream.write(req) } -export function getTreeEntries () { - client.connect(gitalyAddress, { - plaintext: true - }) - - const testRepo = selectTestRepo(); - const req = { - repository: testRepo.repository, - revision: encoding.b64encode(testRepo.commit), - path: encoding.b64encode(testRepo.directory) - } - - const stream = new Stream(client, 'gitaly.CommitService/GetTreeEntries') - stream.on('data', data => { - check(data, { - 'GetTreeEntries - entries present in response': r => r && r.entries +export function listRefs () { + client.connect(gitalyAddress, { + plaintext: true }) - }) - - stream.on('end', function () { - client.close() - }) - - stream.on('error', function(err) { - console.error(err) - }) - - stream.write(req) -} - -export function treeEntry () { - client.connect(gitalyAddress, { - plaintext: true - }) - - const testRepo = selectTestRepo(); - const req = { - repository: testRepo.repository, - revision: encoding.b64encode(testRepo.ref), - path: encoding.b64encode(testRepo.file) - } - const stream = new Stream(client, 'gitaly.CommitService/TreeEntry') - stream.on('data', data => { - check(data, { - 'TreeEntry - data present in response': r => r && r.data + const testRepo = selectTestRepo(); + const req = { + repository: testRepo.repository, + patterns: [ encoding.b64encode("refs/heads/*") ], + pagination_params: { + limit: 20, + }, + } + + const stream = new Stream(client, 'gitaly.RefService/ListRefs') + stream.on('data', data => { + check(data, { + 'ListRefs - references present in response': r => r && r.references + }) }) - }) - - stream.on('end', function () { - client.close() - }) - - stream.on('error', function(err) { - console.error(err) - }) - - stream.write(req) -} - -export function listCommitsByOid () { - client.connect(gitalyAddress, { - plaintext: true - }) - - const testRepo = selectTestRepo(); - const req = { - repository: testRepo.repository, - oid: [testRepo.commit] - } - const stream = new Stream(client, 'gitaly.CommitService/ListCommitsByOid') - stream.on('data', data => { - check(data, { - 'ListCommitsByOid - commits present in response': r => r && r.commits + stream.on('end', function () { + client.close() }) - }) - stream.on('end', function () { - client.close() - }) - - stream.on('error', function(err) { - console.error(err) - }) + stream.on('error', function(err) { + console.error(err) + }) - stream.write(req) + stream.write(req) } -export function writeAndDeleteRefs () { +export function writeRefs () { client.connect(gitalyAddress, { plaintext: true }) @@ -286,15 +179,24 @@ export function writeAndDeleteRefs () { 'WriteRef - StatusOK': r => r && r.status === StatusOK }) - const deleteRefsReq = { - repository: testRepo.repository, - refs: [encoding.b64encode(generatedRef)] - } - - const deleteRefsRes = client.invoke('gitaly.RefService/DeleteRefs', deleteRefsReq) - check(deleteRefsRes, { - 'DeleteRefs - StatusOK': r => r && r.status === StatusOK - }) - client.close() } + +// export function optimizeRepository () { +// client.connect(gitalyAddress, { +// plaintext: true +// }) + +// const testRepo = selectTestRepo(); + +// const optimizeRepReq = { +// repository: testRepo.repository, +// } + +// const optimizeRepRes = client.invoke('gitaly.RepositoryService/OptimizeRepository', optimizeRepReq) +// check(optimizeRepRes, { +// 'OptimizeRepository - StatusOK': r => r && r.status === StatusOK +// }) + +// client.close() +// } diff --git a/_support/benchmarking/roles/benchmark/vars/main.yml b/_support/benchmarking/roles/benchmark/vars/main.yml index a9f2439ee8b..9251561fa92 100644 --- a/_support/benchmarking/roles/benchmark/vars/main.yml +++ b/_support/benchmarking/roles/benchmark/vars/main.yml @@ -2,6 +2,6 @@ profile: true clear_page_cache: true # Profiling and the workload will begin concurrently. -profile_duration: 300 -workload_duration: "60s" -workload_wait_duration: 360 +profile_duration: 60 +workload_duration: "180s" +workload_wait_duration: 300 diff --git a/_support/benchmarking/roles/gitaly/tasks/initialize.yml b/_support/benchmarking/roles/gitaly/tasks/initialize.yml index c3075aa669a..e01b80f8ab8 100644 --- a/_support/benchmarking/roles/gitaly/tasks/initialize.yml +++ b/_support/benchmarking/roles/gitaly/tasks/initialize.yml @@ -67,10 +67,22 @@ state: latest become: true -- name: Clone repositories in parallel +- name: Clone repositories in parallel and add extra refs shell: | if [ ! -d "{{ item.name }}" ]; then git clone --bare --ref-format={{ item.reference_backend }} {{ item.remote }} {{ item.name }} + + COMMIT_SHA="$(git -C {{ item.name }} rev-parse HEAD)" + touch base_refs + for i in $(seq 1 1000); do + echo "create refs/refs/zzz-$i $COMMIT_SHA" >> base_refs + done + git -C {{ item.name }} update-ref --stdin < base_refs + if [[ "{{ item.name }}" = "files" ]]; then + git -C {{ item.name }} pack-refs --all --auto + fi + + rm base_refs fi args: chdir: /mnt/git-repositories diff --git a/internal/cli/gitaly/serve.go b/internal/cli/gitaly/serve.go index 9af11358a16..87ad0ebe309 100644 --- a/internal/cli/gitaly/serve.go +++ b/internal/cli/gitaly/serve.go @@ -577,7 +577,7 @@ func run(appCtx *cli.Command, cfg config.Cfg, logger log.Logger) error { housekeepingManager := housekeepingmgr.New(cfg.Prometheus, logger, transactionManager, node) prometheus.MustRegister(housekeepingManager) - housekeepingMiddleware := housekeepingmw.NewHousekeepingMiddleware(logger, protoregistry.GitalyProtoPreregistered, localrepoFactory, housekeepingManager, 50) + housekeepingMiddleware := housekeepingmw.NewHousekeepingMiddleware(logger, protoregistry.GitalyProtoPreregistered, localrepoFactory, housekeepingManager, 20) defer housekeepingMiddleware.WaitForWorkers() gitalyServerFactory := server.NewGitalyServerFactory( -- GitLab