Skip to content

Commit d9b704e

Browse files
committed
Switch from aws-api to aws sdk v2 via interop
Bit of a bummer, but aws-api was copying entire object to heap, and we don't have enough heap for that. Our database backup is close to 1gb. Abstracted s3 to its own namespace and protocol to make swapping aws-api back in the future. Maybe they'll fix the issue, or maybe I was just using it wrong. See: cognitect-labs/aws-api#257
1 parent b8fd03e commit d9b704e

File tree

7 files changed

+327
-201
lines changed

7 files changed

+327
-201
lines changed

deps.edn

+2-5
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,8 @@
6262
raven-clj/raven-clj {:mvn/version "1.7.0"} ;; Sentry service interface
6363

6464
;; s3 for database backups at Exoscale Simple Object Store
65-
org.tcrawley/cognitect-http-client {:mvn/version "1.11.129"} ;; fix for jetty client version conflict
66-
com.cognitect.aws/api {:mvn/version "0.8.692"
67-
:exclusions [com.cognitect/http-client]}
68-
com.cognitect.aws/endpoints {:mvn/version "1.1.12.770"}
69-
com.cognitect.aws/s3 {:mvn/version "869.2.1687.0"}
65+
;; started with aws-api but it loads entire objects on the heap, and we don't have enough heap for that!
66+
software.amazon.awssdk/s3 {:mvn/version "2.28.5"}
7067

7168
;; reaching out to other services
7269
org.eclipse.jgit/org.eclipse.jgit.ssh.jsch {:mvn/version "6.10.0.202406032230-r"} ;; git with jsch

doc/adr/0021-Moving-To-Exoscale.adoc

+12
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,18 @@ tar --use-compress-program=zstd -xf dest.tar.zst
281281

282282
Our Lucene full-text database is quickly reconstituted from clojars at startup time, so no need to save a backup of it.
283283

284+
Our backup files are close to 1gb, and because we need to be cheap we have a small heap.
285+
The cognitect aws api, unfortunately, loads an entire file into memory.
286+
This gives us OutOfMemory exceptions.
287+
What to do?
288+
* I had a peek at https://github.com/grzm/awyeah-api and I think it uses byte buffers too.
289+
* Could try amazonica.
290+
* Could try AWS SDK through java interop.
291+
* Could spawn out to upload and download files.
292+
* Could handle this with raw HTTP requests
293+
294+
* I think I might try the AWS SDK next.
295+
284296
=== Packer or Cloud Init?
285297
We currently use packer to build our host image.
286298

ops/exoscale/deploy/resources/secrets.edn

+4-4
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
:builder-project #env! CIRCLE_BUILDER_PROJECT}
33
:s3 {;; note: we are using Exoscale, if we need to be more flexible in the future could
44
;; configure a :backups-provider
5-
:backups-bucket-region #env! EXO_BACKUPS_BUCKET_REGION ;; in exoscale these are called zones
6-
:backups-bucket-name #env! EXO_BACKUPS_BUCKET_NAME
7-
:backups-bucket-key #env! EXO_BACKUPS_BUCKET_KEY
8-
:backups-bucket-secret #env! EXO_BACKUPS_BUCKET_SECRET}
5+
:backups {:bucket-region #env! EXO_BACKUPS_BUCKET_REGION ;; in exoscale these are called zones
6+
:bucket-name #env! EXO_BACKUPS_BUCKET_NAME
7+
:bucket-key #env! EXO_BACKUPS_BUCKET_KEY
8+
:bucket-secret #env! EXO_BACKUPS_BUCKET_SECRET}}
99
:sentry {:dsn #env! SENTRY_DSN}}

src/cljdoc/config.clj

+1-4
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,7 @@
9696
(-> config
9797
:secrets
9898
:s3
99-
(select-keys [:backups-bucket-region
100-
:backups-bucket-name
101-
:backups-bucket-key
102-
:backups-bucket-secret])))
99+
:backups))
103100

104101
(defn db-backup [config]
105102
(let [enabled? (enable-db-backup? config)]

src/cljdoc/s3.clj

+158
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
(ns cljdoc.s3
2+
(:require [cljdoc.server.log-init] ;; to quiet odd jetty DEBUG logging
3+
[clojure.java.io :as io])
4+
(:import (java.lang AutoCloseable)
5+
(software.amazon.awssdk.auth.credentials AwsBasicCredentials AwsCredentialsProvider StaticCredentialsProvider)
6+
(software.amazon.awssdk.core.sync RequestBody ResponseTransformer)
7+
(software.amazon.awssdk.regions Region)
8+
(software.amazon.awssdk.services.s3 S3Client)
9+
(software.amazon.awssdk.services.s3.model CopyObjectRequest DeleteObjectRequest GetObjectRequest ListObjectsV2Request ObjectCannedACL PutObjectRequest S3Object)))
10+
11+
(set! *warn-on-reflection* true)
12+
13+
(defprotocol IObjectStore
14+
"Use a protocol to make switching to different implementation a bit easier
15+
We are currently using aws sdk, but only because aws-api currently blows our heap
16+
by loading entire objects into RAM.
17+
18+
implement specific to our use case:
19+
- always cotained to a single bucket
20+
- public-read acl
21+
- only expose data we care about
22+
- put and get at granularity of file only (no streams or strings, etc)"
23+
(list-objects [object-store])
24+
(put-object [object-store object-key from-file])
25+
(get-object [object-store object-key to-file])
26+
(delete-object [object-store object-key])
27+
(copy-object [object-store source-key dest-key]))
28+
29+
(defrecord AwsSdkObjectStore [^S3Client s3 opts]
30+
IObjectStore AutoCloseable
31+
(list-objects [_]
32+
(let [{:keys [bucket-name]} opts
33+
^ListObjectsV2Request request (-> (ListObjectsV2Request/builder)
34+
(.bucket bucket-name)
35+
.build)]
36+
(->> (.listObjectsV2 s3 request)
37+
.contents
38+
(mapv (fn [^S3Object o] {:key (.key o)})))))
39+
(put-object [_ object-key from-file]
40+
(let [{:keys [bucket-name]} opts
41+
^PutObjectRequest request (-> (PutObjectRequest/builder)
42+
(.bucket bucket-name)
43+
(.key object-key)
44+
(.acl ObjectCannedACL/PUBLIC_READ)
45+
.build)]
46+
(.putObject s3 request (RequestBody/fromFile (io/file from-file)))))
47+
(get-object [_ object-key to-file]
48+
(let [{:keys [bucket-name]} opts
49+
^GetObjectRequest request (-> (GetObjectRequest/builder)
50+
(.bucket bucket-name)
51+
(.key object-key)
52+
.build)]
53+
(.getObject s3 request (ResponseTransformer/toFile (io/file to-file)))))
54+
(delete-object [_ object-key]
55+
(let [{:keys [bucket-name]} opts
56+
^DeleteObjectRequest request (-> (DeleteObjectRequest/builder)
57+
(.bucket bucket-name)
58+
(.key object-key)
59+
.build)]
60+
(.deleteObject s3 request)))
61+
(copy-object [_ source-key dest-key]
62+
(let [{:keys [bucket-name]} opts
63+
^CopyObjectRequest request (-> (CopyObjectRequest/builder)
64+
(.sourceBucket bucket-name)
65+
(.sourceKey source-key)
66+
(.destinationBucket bucket-name)
67+
(.destinationKey dest-key)
68+
.build)]
69+
(.copyObject s3 request)))
70+
(close [_] (.close s3)))
71+
72+
(defn s3-exo-client [{:keys [bucket-key bucket-secret bucket-region]}]
73+
(let [endpoint (format "https://sos-%s.exo.io" bucket-region)
74+
^AwsCredentialsProvider creds-provider (StaticCredentialsProvider/create
75+
(AwsBasicCredentials/create bucket-key bucket-secret))]
76+
(.build (doto (S3Client/builder)
77+
(.region Region/AWS_GLOBAL) ;; AWS SDK requires this even though we are not using AWS services
78+
(.endpointOverride (java.net.URI. endpoint))
79+
(.credentialsProvider creds-provider)))))
80+
81+
(defn make-exo-object-store [opts]
82+
(let [s3 (s3-exo-client opts)]
83+
(AwsSdkObjectStore. s3 opts)))
84+
85+
(comment
86+
(require '[cljdoc.config :as cfg])
87+
88+
;; assumes you've loaded up secrets to a working exo endpoint
89+
(def opts (cfg/db-backup (cfg/config)))
90+
91+
(:bucket-region opts)
92+
93+
(:bucket-name opts)
94+
95+
(spit "target/dummy-file.txt" "foobar")
96+
97+
(def object-store (make-exo-object-store opts))
98+
99+
(list-objects object-store)
100+
;; => [{:key "daily/cljdoc-db-2024-09-03_2024-09-03T20-22-00.tar.zst"}
101+
;; {:key "daily/cljdoc-db-2024-09-17_2024-09-17T18-01-44.tar.zst"}]
102+
103+
(put-object object-store "daily/dummy-file" "target/dummy-file.txt")
104+
;; => #object[software.amazon.awssdk.services.s3.model.PutObjectResponse 0x67c79dcd "PutObjectResponse(ETag=\"3858f62230ac3c915f300c664312c63f\")"]
105+
106+
(list-objects object-store)
107+
;; => [{:key "daily/cljdoc-db-2024-09-03_2024-09-03T20-22-00.tar.zst"}
108+
;; {:key "daily/cljdoc-db-2024-09-17_2024-09-17T18-01-44.tar.zst"}
109+
;; {:key "daily/dummy-file"}]
110+
111+
(get-object object-store "daily/dummy-file" "target/dummy-file.down.txt")
112+
;; => #object[software.amazon.awssdk.services.s3.model.GetObjectResponse 0x7e2790ce "GetObjectResponse(AcceptRanges=bytes, LastModified=2024-09-21T14:12:04Z, ContentLength=6, ETag=\"3858f62230ac3c915f300c664312c63f\", ContentType=text/plain, Metadata={})"]
113+
114+
(slurp "target/dummy-file.down.txt")
115+
;; => "foobar"
116+
117+
(delete-object object-store "daily/dummy-file")
118+
;; => #object[software.amazon.awssdk.services.s3.model.DeleteObjectResponse 0x4465db91 "DeleteObjectResponse()"]
119+
120+
(list-objects object-store)
121+
;; => [{:key "daily/cljdoc-db-2024-09-03_2024-09-03T20-22-00.tar.zst"}
122+
;; {:key "daily/cljdoc-db-2024-09-17_2024-09-17T18-01-44.tar.zst"}]
123+
124+
(put-object object-store "daily/dummy-file" "target/dummy-file.txt")
125+
;; => #object[software.amazon.awssdk.services.s3.model.PutObjectResponse 0x517e713b "PutObjectResponse(ETag=\"3858f62230ac3c915f300c664312c63f\")"]
126+
127+
(copy-object object-store "daily/dummy-file" "daily/dummy-file-copy")
128+
;; => #object[software.amazon.awssdk.services.s3.model.CopyObjectResponse 0x4488e7e1 "CopyObjectResponse(CopyObjectResult=CopyObjectResult(ETag=3858f62230ac3c915f300c664312c63f, LastModified=2024-09-21T14:17:12.018Z))"]
129+
130+
(list-objects object-store)
131+
;; => [{:key "daily/cljdoc-db-2024-09-03_2024-09-03T20-22-00.tar.zst"}
132+
;; {:key "daily/cljdoc-db-2024-09-17_2024-09-17T18-01-44.tar.zst"}
133+
;; {:key "daily/dummy-file"}
134+
;; {:key "daily/dummy-file-copy"}]
135+
136+
(get-object object-store "daily/dummy-file-copy" "target/dummy-file-copy.down.txt")
137+
;; => #object[software.amazon.awssdk.services.s3.model.GetObjectResponse 0x437659bf "GetObjectResponse(AcceptRanges=bytes, LastModified=2024-09-21T14:17:12Z, ContentLength=6, ETag=\"3858f62230ac3c915f300c664312c63f\", ContentType=text/plain, Metadata={})"]
138+
139+
(slurp "target/dummy-file-copy.down.txt")
140+
;; => "foobar"
141+
142+
(delete-object object-store "daily/dummy-file-copy")
143+
;; => #object[software.amazon.awssdk.services.s3.model.DeleteObjectResponse 0x7235fc92 "DeleteObjectResponse()"]
144+
145+
(delete-object object-store "daily/dummy-file")
146+
;; => #object[software.amazon.awssdk.services.s3.model.DeleteObjectResponse 0x2507c9f0 "DeleteObjectResponse()"]
147+
148+
(list-objects object-store)
149+
;; => [{:key "daily/cljdoc-db-2024-09-03_2024-09-03T20-22-00.tar.zst"}
150+
;; {:key "daily/cljdoc-db-2024-09-17_2024-09-17T18-01-44.tar.zst"}]
151+
152+
(.close object-store)
153+
154+
(list-objects object-store)
155+
;; => Execution error (IllegalStateException) at org.apache.http.util.Asserts/check (Asserts.java:34).
156+
;; Connection pool shut down
157+
158+
:eoc)

0 commit comments

Comments
 (0)