node.js aws-sdk S3:使用 listObjectsV2 列出所有键的最佳方法
声明:本页面是StackOverFlow热门问题的中英对照翻译,遵循CC BY-SA 4.0协议,如果您需要使用它,必须同样遵循CC BY-SA许可,注明原文地址和作者信息,同时你必须将它归于原作者(不是我):StackOverFlow
原文地址: http://stackoverflow.com/questions/42394429/
Warning: these are provided under cc-by-sa 4.0 license. You are free to use/share it, But you must attribute it to the original authors (not me):
StackOverFlow
aws-sdk S3: best way to list all keys with listObjectsV2
提问by eljefedelrodeodeljefe
With the v1 version of the listObjectsAPI call, you would have done something like from this SO answer.
使用listObjectsAPI 调用的 v1 版本,您可以从这个SO answer 中完成类似的操作。
var allKeys = [];
function listAllKeys(marker, cb) {
s3.listObjects({Bucket: s3bucket, Marker: marker}, function(err, data){
allKeys.push(data.Contents);
if(data.IsTruncated)
listAllKeys(data.NextMarker, cb);
else
cb();
});
}
What would be the equivalent of the listObjectsV2function?
listObjectsV2函数的等价物是什么?
回答by Giovanni Bruno
this is the best way to do that in my opinion:
在我看来,这是最好的方法:
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
const listAllKeys = (params, out = []) => new Promise((resolve, reject) => {
s3.listObjectsV2(params).promise()
.then(({Contents, IsTruncated, NextContinuationToken}) => {
out.push(...Contents);
!IsTruncated ? resolve(out) : resolve(listAllKeys(Object.assign(params, {ContinuationToken: NextContinuationToken}), out));
})
.catch(reject);
});
listAllKeys({Bucket: 'bucket-name'})
.then(console.log)
.catch(console.log);
回答by notionquest
Here is the code to get the list of keys from a bucket.
这是从存储桶中获取密钥列表的代码。
var params = {
Bucket: 'bucket-name'
};
var allKeys = [];
listAllKeys();
function listAllKeys() {
s3.listObjectsV2(params, function (err, data) {
if (err) {
console.log(err, err.stack); // an error occurred
} else {
var contents = data.Contents;
contents.forEach(function (content) {
allKeys.push(content.Key);
});
if (data.IsTruncated) {
params.ContinuationToken = data.NextContinuationToken;
console.log("get further list...");
listAllKeys();
}
}
});
}
回答by talawahtech
Building on previous answers, here is an approach that takes advantage of the Prefixparameter to make multiple calls to s3.listObjectsV2() in parallel.
基于先前的答案,这是一种利用Prefix参数并行多次调用 s3.listObjectsV2() 的方法。
This has led to 2-15x speedup for me depending on how evenly the keys are distributed and whether or not the code is running locally or on AWS.
根据密钥分布的均匀程度以及代码是在本地运行还是在 AWS 上运行,这对我来说使速度提高了 2-15 倍。
You should make sure that the prefixes cover the full range of possible prefixes for your bucket. The code below covers all "safe" characters but S3 supports a wider range of UTF-8 characters.
您应该确保前缀涵盖您存储桶的所有可能前缀。下面的代码涵盖了所有“安全”字符,但 S3 支持更广泛的 UTF-8 字符。
Note that this example uses async/await so ES2017/Node 8 is required. The example is a Node 8.10 Lambda function.
请注意,此示例使用 async/await,因此需要 ES2017/Node 8。该示例是一个 Node 8.10 Lambda 函数。
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
exports.handler = async (event) => {
// Prefixes are used to fetch data in parallel.
const numbers = '0123456789'.split('');
const letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'.split('');
const special = "!-_'.*()".split(''); // "Safe" S3 special chars
const prefixes = [...numbers, ...letters, ...special];
// array of params used to call listObjectsV2 in parallel for each prefix above
const arrayOfParams = prefixes.map((prefix) => {
return { Bucket: 'YOUR-BUCKET-NAME', Prefix: prefix }
});
const allKeys = [];
await Promise.all(arrayOfParams.map(params => getAllKeys(params, allKeys)));
return allKeys.length;
};
async function getAllKeys(params, allKeys = []){
const response = await s3.listObjectsV2(params).promise();
response.Contents.forEach(obj => allKeys.push(obj.Key));
if (response.NextContinuationToken) {
params.ContinuationToken = response.NextContinuationToken;
await getAllKeys(params, allKeys); // RECURSIVE CALL
}
return allKeys;
}
Also, for completeness, here is a simpler, non-prefixed async/await version:
此外,为了完整起见,这里有一个更简单的、无前缀的 async/await 版本:
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
exports.handler = async (event) => {
const allKeys = await getAllKeys({ Bucket: 'YOUR-BUCKET-NAME' });
return allKeys.length;
};
async function getAllKeys(params, allKeys = []){
const response = await s3.listObjectsV2(params).promise();
response.Contents.forEach(obj => allKeys.push(obj.Key));
if (response.NextContinuationToken) {
params.ContinuationToken = response.NextContinuationToken;
await getAllKeys(params, allKeys); // RECURSIVE CALL
}
return allKeys;
}
回答by Cully
I know this has been answered quite a few times, but I thought I'd venture my version. It's based on this answer, but with a few changes that seem worthwhile:
我知道这已经回答了很多次,但我想我会冒险我的版本。它基于此答案,但有一些似乎值得的更改:
Takes
s3as parameter, instead of pulling it from global context.It isn't necessary to return a
new Promise.s3.listObjectsV2().promise()already returns a promise, can just piggyback on it.Concats the return values instead of passing it up the call stack as a parameter.
Checks that
NextContinuationTokenactually has a value. If for some reasonIsTruncatedis true, but there is noNextContinuationToken, unless you check for that value, the function will recurse forever. This situation can happen ifMaxKeysis set to a value less than the total number of objects.
需要
s3作为参数,而不是从全球范围内拉。没有必要返回一个
new Promise.s3.listObjectsV2().promise()已经返回一个承诺,可以搭载它。连接返回值,而不是将其作为参数向上传递到调用堆栈。
检查
NextContinuationToken实际上具有值。如果由于某种原因IsTruncated为真,但没有NextContinuationToken,除非您检查该值,否则该函数将永远递归。如果MaxKeys设置为小于对象总数的值,就会发生这种情况。
const listAllObjects = (s3, params) => {
return s3.listObjectsV2(params).promise()
.then(({ Contents, IsTruncated, NextContinuationToken }) => {
return IsTruncated && NextContinuationToken
? listAllObjects(s3, Object.assign({}, params, { ContinuationToken: NextContinuationToken }))
.then(x => Contents.concat(x))
: Contents
})
}
And here's a jest test of it:
这是对它的一个玩笑测试:
test('Returns all results on multiple continuations', () => {
expect.assertions(1)
let numCalls = 0
// mock
const s3 = {
listObjectsV2: params => {
numCalls++
return {
promise: () => {
return new Promise((resolve, reject) => {
setTimeout(() => {
if(numCalls === 3) {
resolve({
Contents: [numCalls],
IsTruncated: false,
})
}
else {
resolve({
Contents: [numCalls],
IsTruncated: true,
NextContinuationToken: 'blah'
})
}
}, 200)
})
}
}
}
}
return listAllObjects(s3, {})
.then(xs => {
expect(xs).toEqual([1, 2, 3])
})
})
回答by Quezler
Used one of the answersfrom here which i needed, and adapted so the result is returned from inside, instead of via an outside array passed as argument, i'll just leave this here in case anyone finds it usefull:
使用了我需要的here中的一个答案,并进行了调整,以便从内部返回结果,而不是通过作为参数传递的外部数组,我将把它留在这里,以防有人发现它有用:
const bucket = {Bucket: '<bucket name here>'};
...
...
s3files(bucket).then(array => {
console.log(_.map(array, entry => {
return entry.Key;
}));
});
...
...
let s3files = function (config) {
const tmp = Object.assign({}, config);
return new Promise(resolve => {
s3.listObjectsV2(tmp).promise().then(response => {
if (response.IsTruncated) {
tmp.ContinuationToken = response.NextContinuationToken;
s3files(tmp).then(array => {
resolve(response.Contents.concat(array));
});
} else {
resolve(response.Contents);
}
});
});
};
回答by nkitku
https://stackoverflow.com/a/57540786/8784402
https://stackoverflow.com/a/57540786/8784402
const { S3 } = require('aws-sdk');
const s3 = new S3();
async function* listAllKeys(opts) {
do {
const data = await s3.listObjectsV2(opts).promise();
opts.ContinuationToken = data.NextContinuationToken;
yield data;
} while (opts.ContinuationToken)
}
const opts = {
Bucket: 'bucket-xyz',
/* required */
// ContinuationToken: 'STRING_VALUE',
// Delimiter: 'STRING_VALUE',
// EncodingType: url,
// FetchOwner: true || false,
// MaxKeys: 'NUMBER_VALUE',
// Prefix: 'STRING_VALUE',
// RequestPayer: requester,
// StartAfter: 'STRING_VALUE'
};
async function main() {
// using for of await loop
for await (const data of listAllKeys(opts)) {
console.log(data.Contents)
}
// or lazy-load
const keys = listAllKeys(opts);
console.log(await keys.next());
// {value: {…}, done: false}
console.log(await keys.next());
// {value: {…}, done: false}
console.log(await keys.next());
// {value: undefined, done: true}
}
main();
// Making Observable
const lister = opts => o => {
let needMore = true;
(async () => {
const keys = listAllKeys(opts);
for await (const data of keys) {
if (data.done) break;
o.next(data);
if (!needMore) break;
}
o.complete();
})();
return () => (needMore = false);
}
// Using Rxjs
const { Observable } = require('rxjs');
const { flatMap } = require('rxjs/operators')
function listAll() {
return Observable.create(lister(opts))
.pipe(flatMap(v => v.Contents))
.subscribe(console.log);
}
listAll();
// Using Nodejs EventEmitter
const EventEmitter = require('events');
const _eve = new EventEmitter();
_eve.on('next', console.log);
const stop = lister(opts)({
next: v => _eve.emit('next', v),
error: e => _eve.emit('error', e),
complete: v => _eve.emit('complete', v)
});

