注意
本文档适用于 Ceph 开发版本。
Librados (C)
librados提供对 RADOS 服务的低级访问。有关 RADOS 的概述,请参阅架构.
示例:连接并写入对象
要使用Librados,您实例化一个变量(集群句柄)并rados_t
调用rados_create()
,其中包含指向它的指针:
int err;
rados_t cluster;
err = rados_create(&cluster, NULL);
if (err < 0) {
fprintf(stderr, "%s: cannot create a cluster handle: %s\n", argv[0], strerror(-err));
exit(1);
}
然后,您配置您的rados_t
以连接到您的集群,可以通过设置单个值 (rados_conf_set()
),rados_conf_read_file()
), 使用rados_conf_parse_argv()
命令行选项 (rados_conf_parse_env()
):
err = rados_conf_read_file(cluster, "/path/to/myceph.conf");
if (err < 0) {
fprintf(stderr, "%s: cannot read config file: %s\n", argv[0], strerror(-err));
exit(1);
}
一旦集群句柄配置完成,您就可以使用rados_connect()
:
err = rados_connect(cluster);
if (err < 0) {
fprintf(stderr, "%s: cannot connect to cluster: %s\n", argv[0], strerror(-err));
exit(1);
}
连接到集群。然后,您打开一个“IO 上下文”,一个rados_ioctx_t
,其中包含rados_ioctx_create()
:
rados_ioctx_t io;
char *poolname = "mypool";
err = rados_ioctx_create(cluster, poolname, &io);
if (err < 0) {
fprintf(stderr, "%s: cannot open rados pool %s: %s\n", argv[0], poolname, strerror(-err));
rados_shutdown(cluster);
exit(1);
}
注意,您尝试访问的池必须存在。
然后,您可以使用 RADOS 数据操作函数,例如greeting
with
rados_write_full()
:
err = rados_write_full(io, "greeting", "hello", 5);
if (err < 0) {
fprintf(stderr, "%s: cannot write pool %s: %s\n", argv[0], poolname, strerror(-err));
rados_ioctx_destroy(io);
rados_shutdown(cluster);
exit(1);
}
的对象。最后,您需要使用rados_ioctx_destroy()
和rados_shutdown()
:
rados_ioctx_destroy(io);
rados_shutdown(cluster);
异步 IO
关闭您的 IO 上下文和连接到 RADOS。当执行大量 IO 时,您通常不需要等待一个操作完成才能开始下一个操作。Librados提供
对于每个操作,您必须首先创建一个rados_completion_t
它表示当操作安全或完成时要执行的操作,通过调用rados_aio_create_completion()
。如果您不需要任何特殊的事情发生,您可以传递 NULL:
rados_completion_t comp;
err = rados_aio_create_completion(NULL, NULL, NULL, &comp);
if (err < 0) {
fprintf(stderr, "%s: could not create aio completion: %s\n", argv[0], strerror(-err));
rados_ioctx_destroy(io);
rados_shutdown(cluster);
exit(1);
}
现在您可以调用任何 aio 操作,并等待它在所有副本的内存或磁盘上准备好:
err = rados_aio_write(io, "foo", comp, "bar", 3, 0);
if (err < 0) {
fprintf(stderr, "%s: could not schedule aio write: %s\n", argv[0], strerror(-err));
rados_aio_release(comp);
rados_ioctx_destroy(io);
rados_shutdown(cluster);
exit(1);
}
rados_aio_wait_for_complete(comp); // in memory
rados_aio_wait_for_safe(comp); // on disk
最后,我们需要释放用于完成的内存rados_aio_release()
:
rados_aio_release(comp);
您可以使用回调来告诉您的应用程序写入操作何时持久化,或者读取缓冲区何时已满。例如,如果您想要测量向几个对象追加操作时的每个操作的延迟,您可以安排几个写入操作,并将确认和提交时间存储在相应的回调中,然后使用rados_aio_flush()
等待它们全部完成
typedef struct {
struct timeval start;
struct timeval ack_end;
struct timeval commit_end;
} req_duration;
void ack_callback(rados_completion_t comp, void *arg) {
req_duration *dur = (req_duration *) arg;
gettimeofday(&dur->ack_end, NULL);
}
void commit_callback(rados_completion_t comp, void *arg) {
req_duration *dur = (req_duration *) arg;
gettimeofday(&dur->commit_end, NULL);
}
int output_append_latency(rados_ioctx_t io, const char *data, size_t len, size_t num_writes) {
req_duration times[num_writes];
rados_completion_t comps[num_writes];
for (size_t i = 0; i < num_writes; ++i) {
gettimeofday(×[i].start, NULL);
int err = rados_aio_create_completion((void*) ×[i], ack_callback, commit_callback, &comps[i]);
if (err < 0) {
fprintf(stderr, "Error creating rados completion: %s\n", strerror(-err));
return err;
}
char obj_name[100];
snprintf(obj_name, sizeof(obj_name), "foo%ld", (unsigned long)i);
err = rados_aio_append(io, obj_name, comps[i], data, len);
if (err < 0) {
fprintf(stderr, "Error from rados_aio_append: %s", strerror(-err));
return err;
}
}
// wait until all requests finish *and* the callbacks complete
rados_aio_flush(io);
// the latencies can now be analyzed
printf("Request # | Ack latency (s) | Commit latency (s)\n");
for (size_t i = 0; i < num_writes; ++i) {
// don't forget to free the completions
rados_aio_release(comps[i]);
struct timeval ack_lat, commit_lat;
timersub(×[i].ack_end, ×[i].start, &ack_lat);
timersub(×[i].commit_end, ×[i].start, &commit_lat);
printf("%9ld | %8ld.%06ld | %10ld.%06ld\n", (unsigned long) i, ack_lat.tv_sec, ack_lat.tv_usec, commit_lat.tv_sec, commit_lat.tv_usec);
}
return 0;
}
Note that all the rados_completion_t
必须使用rados_aio_release()
释放,以避免内存泄漏。
API调用
Defines
- LIBRADOS_ALL_NSPACES
将 nspace 参数作为 rados_ioctx_set_namespace() 的参数传递给 rados_nobjects_list_open() 以返回所有命名空间中的所有对象。
- struct obj_watch_t
#include <rados_types.h>从 list_watchers 列表中的一个项目
- struct notify_ack_t
- struct notify_timeout_t
xattr 比较操作
用于比较对象上的 xattr 的操作,如果比较失败,则中止 rados_read_op 或 rados_write_op 事务。
操作标志
用于 rados_read_op_operate()、rados_write_op_operate()、rados_aio_read_op_operate() 和 rados_aio_write_op_operate() 的标志。有关详细信息,请参阅 librados.hpp。
- enum [anonymous]
Values:
- enumerator LIBRADOS_OPERATION_NOFLAG
- enumerator LIBRADOS_OPERATION_BALANCE_READS
- enumerator LIBRADOS_OPERATION_LOCALIZE_READS
- enumerator LIBRADOS_OPERATION_ORDER_READS_WRITES
- enumerator LIBRADOS_OPERATION_IGNORE_CACHE
- enumerator LIBRADOS_OPERATION_SKIPRWLOCKS
- enumerator LIBRADOS_OPERATION_IGNORE_OVERLAY
- enumerator LIBRADOS_OPERATION_FULL_TRY
- enumerator LIBRADOS_OPERATION_FULL_FORCE
- enumerator LIBRADOS_OPERATION_IGNORE_REDIRECT
- enumerator LIBRADOS_OPERATION_ORDERSNAP
- enumerator LIBRADOS_OPERATION_RETURNVEC
分配提示标志
用于 rados_write_op_alloc_hint2() 和 rados_set_alloc_hint2() 的标志,指示未来的 IO 模式。
- enum [anonymous]
Values:
- enumerator LIBRADOS_ALLOC Hint_FLAG_SEQUENTIAL_WRITE
- enumerator LIBRADOS_ALLOC Hint_FLAG_RANDOM_WRITE
- enumerator LIBRADOS_ALLOC Hint_FLAG_SEQUENTIAL_READ
- enumerator LIBRADOS_ALLOC Hint_FLAG_RANDOM_READ
- enumerator LIBRADOS_ALLOC Hint_FLAG_APPEND_ONLY
- enumerator LIBRADOS_ALLOC Hint_FLAG_IMMUTABLE
- enumerator LIBRADOS_ALLOC Hint_FLAG_SHORTLIVED
- enumerator LIBRADOS_ALLOC Hint_FLAG_LONGLIVED
- enumerator LIBRADOS_ALLOC Hint_FLAG_COMPRESSIBLE
- enumerator LIBRADOS_ALLOC Hint_FLAG_INCOMPRESSIBLE
- enumerator LIBRADOS_ALLOC Hint_FLAG_LOG
异步 I/O
无阻塞地读取和写入对象。
- typedef void (*rados_callback_t)(rados_completion_t cb, void *arg)
异步操作的回调参数:
cb 是已完成的完成
arg 是提供给回调函数的应用程序定义的数据
- int rados_aio_create_completion(void *cb_arg, rados_callback_t cb_complete, rados_callback_t cb_safe, rados_completion_t *pc)
构造一个用于异步操作的完成
完成和安全的回调对应于操作被确认和提交,分别。回调按接收顺序调用,因此安全回调可能在完成回调之前触发,反之亦然。这受 OSD 上日志记录的影响。
TODO: 在其他地方提供更完整的文档(在 RADOS 文档中?)
Note
读取操作只得到一个完成回调。
Note
BUG: 这应该检查 ENOMEM 而不是抛出异常
- 参数:
cb_arg-- 应用程序定义的数据传递给回调函数
cb_complete-- 当操作在所有副本的内存中时调用的函数
cb_safe-- 当操作在所有副本的稳定存储中时调用的函数
pc-- 完成等待的位置
- 返回:
0
- int rados_aio_create_completion2(void *cb_arg, rados_callback_t cb_complete, rados_completion_t *pc)
构造一个用于异步操作的完成
完成回调对应于操作被确认。
Note
BUG: 这应该检查 ENOMEM 而不是抛出异常
- 参数:
cb_arg-- 应用程序定义的数据传递给回调函数
cb_complete-- 当操作在所有副本上提交时调用的函数
pc-- 完成等待的位置
- 返回:
0
- int rados_aio_wait_for_complete(rados_completion_t c)
等待操作完成
这意味着它在所有副本的内存中。
Note
BUG: 这应该是 void
- 参数:
c-- 要等待的操作
- 返回:
0
- int rados_aio_wait_for_safe(rados_completion_t c) __attribute__((deprecated)
等待操作安全
这意味着它在所有副本的稳定存储中。
Note
BUG: 这应该是 void
- 参数:
c-- 要等待的操作
- 返回:
0
- int rados_aio_is_complete(rados_completion_t c)
异步操作是否完成?
警告
这并不表示完成回调已经完成
- 参数:
c-- 要检查的异步操作
- 返回:
检查 c 是否完成
- int rados_aio_is_safe(rados_completion_t c)
异步操作是否安全?
警告
这并不表示安全回调已经完成
- 参数:
c-- 要检查的异步操作
- 返回:
检查 c 是否安全
- int rados_aio_wait_for_complete_and_cb(rados_completion_t c)
等待操作完成和回调完成
这意味着它在所有副本的内存中并且可以读取。
Note
BUG: 这应该是 void
- 参数:
c-- 要等待的操作
- 返回:
0
- int rados_aio_wait_for_safe_and_cb(rados_completion_t c) __attribute__((deprecated)
等待操作安全和回调完成
这意味着它在所有副本的稳定存储中。
Note
BUG: 这应该是 void
- 参数:
c-- 要等待的操作
- 返回:
0
- int rados_aio_is_complete_and_cb(rados_completion_t c)
异步操作和回调是否完成
- 参数:
c-- 要检查的异步操作
- 返回:
检查 c 是否完成
- int rados_aio_is_safe_and_cb(rados_completion_t c)
异步操作是否安全并且回调已完成
- 参数:
c-- 要检查的异步操作
- 返回:
检查 c 是否安全
- int rados_aio_get_return_value(rados_completion_t c)
获取异步操作的返回值
返回值在操作完成或安全时设置,以先发生者为准。
Note
BUG: 当安全消息在完成消息之前收到时,完成回调可能永远不会被调用
- 参数:
c-- 要检查的异步操作
- Pre:
操作是安全或完成的
- 返回:
操作的返回值
- uint64_t rados_aio_get_version(rados_completion_t c)
获取异步操作目标的内部对象版本
返回值在操作完成或安全时设置,以先发生者为准。
Note
BUG: 当安全消息在完成消息之前收到时,完成回调可能永远不会被调用
- 参数:
c-- 要检查的异步操作
- Pre:
操作是安全或完成的
- 返回:
异步操作目标的版本号
- void rados_aio_release(rados_completion_t c)
释放完成
当您不再需要完成时,请调用此函数。如果操作没有被确认和提交,它可能不会立即释放。
- 参数:
c-- 要释放的完成
- int rados_aio_write(rados_ioctx_t io, const char *oid, rados_completion_t completion, const char *buf, size_t len, uint64_t off)
异步写入数据到对象
将写入排队并返回。完成回调的返回值在成功时为 0,在失败时为负错误代码。
- 参数:
io-- 执行写入的上下文
oid-- 对象名称
completion-- 写入安全且完成后要执行的操作
buf-- 要写入的数据
len-- 数据长度,以字节为单位
off-- 对象中开始写入的字节偏移量
- 返回:
成功时为 0,如果 io 上下文指定 snap_seq 不是 LIBRADOS_SNAP_HEAD,则为 -EROFS
- int rados_aio_append(rados_ioctx_t io, const char *oid, rados_completion_t completion, const char *buf, size_t len)
异步追加数据到对象
将追加排队并返回。
完成回调的返回值在成功时为 0,在失败时为负错误代码。
- 参数:
io-- the context to operate in
oid-- the name of the object
completion-- 追加安全且完成后要执行的操作
buf-- 要追加的数据
len-- buf 的长度(以字节为单位)
- 返回:
成功时为 0,如果 io 上下文指定 snap_seq 不是 LIBRADOS_SNAP_HEAD,则为 -EROFS
- int rados_aio_write_full(rados_ioctx_t io, const char *oid, rados_completion_t completion, const char *buf, size_t len)
异步写入整个对象
对象被提供的填充数据填充。如果对象存在,它将原子地截断然后写入。将 write_full 排队并返回。
完成回调的返回值在成功时为 0,在失败时为负错误代码。
- 参数:
io-- 执行写入的上下文
oid-- 对象名称
completion-- 写入_full 安全且完成后要执行的操作
buf-- 要写入的数据
len-- 数据长度,以字节为单位
- 返回:
成功时为 0,如果 io 上下文指定 snap_seq 不是 LIBRADOS_SNAP_HEAD,则为 -EROFS
- int rados_aio_writesame(rados_ioctx_t io, const char *oid, rados_completion_t completion, const char *buf, size_t data_len, size_t write_len, uint64_t off)
异步多次写入相同的缓冲区
将 writesame 排队并返回。
完成回调的返回值在成功时为 0,在失败时为负错误代码。
- 参数:
io-- 执行写入的上下文
oid-- 对象名称
completion-- 写入_same 安全且完成后要执行的操作
buf-- 要写入的数据
data_len-- 数据长度,以字节为单位
write_len-- 要写入的总字节数
off-- 对象中开始写入的字节偏移量
- 返回:
成功时为 0,如果 io 上下文指定 snap_seq 不是 LIBRADOS_SNAP_HEAD,则为 -EROFS
- int rados_aio_remove(rados_ioctx_t io, const char *oid, rados_completion_t completion)
异步删除对象
将删除排队并返回。
完成回调的返回值在成功时为 0,在失败时为负错误代码。
- 参数:
io-- the context to operate in
oid-- the name of the object
completion-- 删除安全且完成后要执行的操作
- 返回:
成功时为 0,如果 io 上下文指定 snap_seq 不是 LIBRADOS_SNAP_HEAD,则为 -EROFS
- int rados_aio_read(rados_ioctx_t io, const char *oid, rados_completion_t completion, char *buf, size_t len, uint64_t off)
异步从对象读取数据
io 上下文确定要读取的快照,如果 rados_ioctx_snap_set_read() 设置了任何快照,则读取该快照。完成回调的返回值在成功时为读取的字节数,在失败时为负错误代码。
The return value of the completion will be number of bytes read on success, negative error code on failure.
Note
只会调用完成回调。
- 参数:
io-- 执行读取的上下文
oid-- 要读取的对象名称
completion-- 读取完成后要执行的操作
buf-- where to store the results
len-- 要读取的字节数
off-- 对象中要开始读取的偏移量
- 返回:
0 on success, negative error code on failure
- int rados_aio_flush(rados_ioctx_t io)
等待 io 上下文中的所有待处理写入操作都安全
这不等于调用所有写入完成回调上的 rados_aio_wait_for_safe(),因为此操作等待相关的回调完成。
Note
BUG: 始终返回 0,应该是 void 或接受超时
- 参数:
io-- 要刷新的上下文
- 返回:
0 on success, negative error code on failure
- int rados_aio_flush_async(rados_ioctx_t io, rados_completion_t completion)
安排一个回调,当当前所有待处理的 aio 写入操作都安全时触发。这是 rados_aio_flush() 的非阻塞版本。
- 参数:
io-- 要刷新的上下文
completion-- 写入操作安全时要执行的操作
- 返回:
0 on success, negative error code on failure
- int rados_aio_stat(rados_ioctx_t io, const char *o, rados_completion_t completion, uint64_t *psize, time_t *pmtime)
异步获取对象统计信息(大小/修改时间)
- 参数:
io-- ioctx
o-- object name
completion-- 统计信息完成时要执行的操作
psize-- where to store object size
pmtime-- where to store modification time
- 返回:
0 on success, negative error code on failure
- int rados_aio_stat2(rados_ioctx_t io, const char *o, rados_completion_t completion, uint64_t *psize, struct timespec *pmtime)
- int rados_aio_cmpext(rados_ioctx_t io, const char *o, rados_completion_t completion, const char *cmp_buf, size_t cmp_len, uint64_t off)
异步比较磁盘上的对象范围与缓冲区
- 参数:
io-- 执行比较的上下文
o-- 要比较的对象名称
completion-- 比较完成后要执行的操作
cmp_buf-- 包含要与对象内容进行比较的字节的缓冲区
cmp_len-- 比较的长度和对象定位器的大小
cmp_buf
in bytesoff-- 对象字节偏移量,从中开始比较
- 返回:
成功时为 0,负错误代码失败,(-MAX_ERRNO - mismatch_off) 在不匹配的情况下
- int rados_aio_cancel(rados_ioctx_t io, rados_completion_t completion)
取消异步操作
- 参数:
io-- ioctx
completion-- 完成句柄
- 返回:
0 on success, negative error code on failure
- int rados_aio_exec(rados_ioctx_t io, const char *o, rados_completion_t completion, const char *cls, const char *方法, const char *in_buf, size_t in_len, char *buf, size_t out_len)
异步在对象上执行 OSD 类方法
The OSD has a plugin mechanism for performing complicated operations on an object atomically. These plugins are called classes. This function allows librados users to call the custom methods. The input and output formats are defined by the class. Classes in ceph.git can be found in src/cls subdirectories
- 参数:
io-- the context in which to call the method
o-- 对象名称
completion-- 方法完成时要执行的操作
cls-- the name of the class
方法-- the name of the method
in_buf-- where to find input
in_len-- length of in_buf in bytes
buf-- where to store output
out_len-- length of buf in bytes
- 返回:
0 on success, negative error code on failure
观察/通知
观察/通知是一种协议,有助于客户端之间进行通信。它可用于同步客户端状态。所需的所有内容是一个众所周知的对象名称(例如,rbd 使用图像的头部对象)。
观察者注册对对象的兴趣,并接收该对象上的所有通知。通知尝试与所有观察一个对象的客户端进行通信,并在通知器上阻塞,直到每个客户端响应或达到超时。
更多细节请参阅 rados_watch() 和 rados_notify()。
- typedef void (*rados_watchcb_t)(uint8_t opcode, uint64_t ver, void *arg)
当在观察对象上收到通知时激活的回调。
Note
BUG: opcode 是一个内部细节,不应该公开
Note
BUG: ver 未使用
- 参数 opcode:
未定义
- 参数 ver:
观察对象的版本
- 参数 arg:
应用程序特定数据
- typedef void (*rados_watchcb2_t)(void *arg, uint64_t notify_id, uint64_t handle, uint64_t notifier_id, void *数据, size_t data_len)
当在观察对象上收到通知时激活的回调。
- 参数 arg:
通过 rados_watch2() 提供的不透明用户定义值
- Param notify_id:
通知事件的 ID
- 参数 handle:
我们正在通知的观察者句柄
- 参数 notifier_id:
通知的唯一客户端 ID
- 参数 data:
来自通知器的有效负载
- 参数 data_len:
有效负载缓冲区的长度
- typedef void (*rados_watcherrcb_t)(void *pre, uint64_t cookie, int err)
当遇到观察会话错误时激活的回调。这可以在对象在集群中的位置移动时发生,我们失败在新的对象位置注册我们的观察,或者当我们的对象 OSD 连接中断并且我们可能错过了通知事件时发生。
- 参数 pre:
通过 rados_watch2() 提供的不透明用户定义值
- 参数 cookie:
分配给观察会话的内部 ID
- 参数 err:
错误代码
- int rados_watch(rados_ioctx_t io, const char *o, uint64_t ver, uint64_t *cookie, rados_watchcb_t watchcb, void *arg) __attribute__((deprecated)
注册对对象的兴趣
观察操作将客户端注册为对对象通知感兴趣。OSD 跟踪持久化存储上的观察,因此它们通过正常的恢复过程在集群更改时得到保留。如果客户端失去对观察对象的 primary OSD 的连接,则观察将在 30 秒后删除。当建立新的连接或放置组切换 OSD 时,观察将自动重新建立。
Note
BUG: librados 应提供一种方法,使观察者能够注意到连接重置
Note
BUG: ver 参数不起作用,并且永远不会返回 -ERANGE(请参阅 URL tracker.ceph.com/issues/2592)
- 参数:
io-- the pool the object is in
o-- 要观察的对象
ver-- 预期的对象版本
cookie-- 存储分配给此观察的内部 ID
watchcb-- 当在此对象上收到通知时要执行的操作
arg-- 传递给 watchcb 调用时的应用程序定义数据
- 返回:
0 on success, negative error code on failure
- 返回:
-ERANGE if the version of the object is greater than ver
- int rados_watch2(rados_ioctx_t io, const char *o, uint64_t *cookie, rados_watchcb2_t watchcb, rados_watcherrcb_t watcherrcb, void *arg)
注册对对象的兴趣
观察操作将客户端注册为对对象通知感兴趣。OSD 跟踪持久化存储上的观察,因此它们通过正常的恢复过程在集群更改时得到保留。如果客户端失去对观察对象的 primary OSD 的连接,则观察将在配置的 osd_client_watch_timeout 秒后删除。当建立新的连接或放置组切换 OSD 时,观察将自动重新建立。
- 参数:
io-- the pool the object is in
o-- 要观察的对象
cookie-- 存储分配给此观察的内部 ID
watchcb-- 当在此对象上收到通知时要执行的操作
watcherrcb-- 观察会话遇到错误时要执行的操作
arg-- 传递给回调的不透明值
- 返回:
0 on success, negative error code on failure
- int rados_watch_check(rados_ioctx_t io, const char *o, uint64_t *cookie, rados_watchcb2_t watchcb, rados_watcherrcb_t watcherrcb, uint32_t timeout, void *arg)
注册对对象的兴趣
A watch operation registers the client as being interested in notifications on an object. OSDs keep track of watches on persistent storage, so they are preserved across cluster changes by the normal recovery process. Watches are automatically reestablished when a new connection is made, or a placement group switches OSDs.
- 参数:
io-- the pool the object is in
o-- 要观察的对象
cookie-- 存储分配给此观察的内部 ID
watchcb-- 当在此对象上收到通知时要执行的操作
watcherrcb-- 观察会话遇到错误时要执行的操作
timeout -- how many seconds the connection will keep after disconnection
arg-- 传递给回调的不透明值
- 返回:
0 on success, negative error code on failure
- int rados_aio_watch(rados_ioctx_t io, const char *o, rados_completion_t completion, uint64_t *handle, rados_watchcb2_t watchcb, rados_watcherrcb_t watcherrcb, void *arg)
Asynchronous register an interest in an object
观察操作将客户端注册为对对象通知感兴趣。OSD 跟踪持久化存储上的观察,因此它们通过正常的恢复过程在集群更改时得到保留。如果客户端失去对观察对象的 primary OSD 的连接,则观察将在 30 秒后删除。当建立新的连接或放置组切换 OSD 时,观察将自动重新建立。
- 参数:
io-- the pool the object is in
o-- 要观察的对象
completion-- what to do when operation has been attempted
handle-- 存储分配给此观察的内部 ID
watchcb-- 当在此对象上收到通知时要执行的操作
watcherrcb-- 观察会话遇到错误时要执行的操作
arg-- 传递给回调的不透明值
- 返回:
0 on success, negative error code on failure
- int rados_aio_watch2(rados_ioctx_t io, const char *o, rados_completion_t completion, uint64_t *handle, rados_watchcb2_t watchcb, rados_watcherrcb_t watcherrcb, uint32_t timeout, void *arg)
Asynchronous register an interest in an object
A watch operation registers the client as being interested in notifications on an object. OSDs keep track of watches on persistent storage, so they are preserved across cluster changes by the normal recovery process. If the client loses its connection to the primary OSD for a watched object, the watch will be removed after the number of seconds that configured in timeout parameter. Watches are automatically reestablished when a new connection is made, or a placement group switches OSDs.
- 参数:
io-- the pool the object is in
o-- 要观察的对象
completion-- what to do when operation has been attempted
handle-- 存储分配给此观察的内部 ID
watchcb-- 当在此对象上收到通知时要执行的操作
watcherrcb-- 观察会话遇到错误时要执行的操作
timeout -- how many seconds the connection will keep after disconnection
arg-- 传递给回调的不透明值
- 返回:
0 on success, negative error code on failure
- int rados_watch_check(rados_ioctx_t io, uint64_t cookie)
检查观察的状态
返回自上次确认观察以来的毫秒数。或者,如果有错误,则返回该错误。
如果出现错误,观察不再有效,并且应该使用 rados_unwatch2() 销毁。如果用户仍然对对象感兴趣,则应使用 rados_watch2() 创建新的观察。
- 参数:
io-- the pool the object is in
cookie-- 观察句柄
- 返回:
成功时毫秒数,失败时负错误代码
- int rados_unwatch(rados_ioctx_t io, const char *o, uint64_t cookie) __attribute__((deprecated)
取消对对象的兴趣
完成后,将不再向我们发送此观察的通知。应该调用此函数来清理不需要的观察者。
- 参数:
io-- the pool the object is in
o-- 要观察的对象名称(忽略)
cookie-- 要取消注册的观察
- 返回:
0 on success, negative error code on failure
- int rados_unwatch2(rados_ioctx_t io, uint64_t cookie)
取消对对象的兴趣
完成后,将不再向我们发送此观察的通知。应该调用此函数来清理不需要的观察者。
- 参数:
io-- the pool the object is in
cookie-- 要取消注册的观察
- 返回:
0 on success, negative error code on failure
- int rados_aio_unwatch(rados_ioctx_t io, uint64_t cookie, rados_completion_t completion)
异步取消对对象的兴趣
完成后,将不再向我们发送此观察的通知。应该调用此函数来清理不需要的观察者。
- 参数:
io-- the pool the object is in
completion-- what to do when operation has been attempted
cookie-- 要取消注册的观察
- 返回:
0 on success, negative error code on failure
- int rados_notify(rados_ioctx_t io, const char *o, uint64_t ver, const char *buf, int buf_len) __attribute__((deprecated)
同步通知观察者有关对象
这会阻塞,直到所有观察者的对象都已接收并作出反应,或者达到超时。
Note
BUG: 超时无法通过 C API 更改
Note
BUG: rados_watchcb_t 中的缓冲区列表无法访问
- 参数:
io-- the pool the object is in
o-- the name of the object
ver-- 已弃用 - 仅传递零
buf-- 要发送给观察者的数据
buf_len-- length of buf in bytes
- 返回:
0 on success, negative error code on failure
- int rados_aio_notify(rados_ioctx_t io, const char *o, rados_completion_t completion, const char *buf, int buf_len, uint64_t timeout_ms, char **reply_buffer, size_t *reply_buffer_len)
同步通知观察者有关对象
这会阻塞,直到所有观察者的对象都已接收并作出反应,或者达到超时。
回复缓冲区是可选的。如果指定,客户端将收到一个编码缓冲区,其中包含确认通知的客户端的 ID 以及它们的通知确认有效负载(如果有)。超时的客户端不包括在内。即使那些不包含通知确认有效负载的客户端也包括在列表中,但它们的有效负载长度为 0。格式:
le32 num_acks { le64 gid global ID for the client (for client.1234 that’s 1234) le64 cookie cookie for the client le32 buflen length of reply message buffer u8 * buflen payload } * num_acks le32 num_timeouts { le64 gid global id for the client le64 cookie cookie for the client } * num_timeouts
注意:如果通过同一个客户端注册了多个观察者,则可能存在多个相同的 gid 实例。
注意:使用 rados_buffer_free() 释放缓冲区,当用户完成使用它时。
注意:由于结果缓冲区包括超时的客户端,即使 rados_notify() 返回错误代码(如 -ETIMEDOUT),它也会设置。
- 参数:
io-- the pool the object is in
completion-- what to do when operation has been attempted
o-- the name of the object
buf-- 要发送给观察者的数据
buf_len-- length of buf in bytes
timeout_ms-- 通知超时(以毫秒为单位)
reply_buffer-- reply_buffer 指针的指针(使用 rados_buffer_free() 释放)
reply_buffer_len-- reply_buffer 长度的指针
- 返回:
0 on success, negative error code on failure
- int rados_notify2(rados_ioctx_t io, const char *o, const char *buf, int buf_len, uint64_t timeout_ms, char **reply_buffer, size_t *reply_buffer_len)
- int rados_decode_notify_response(char *reply_buffer, size_t reply_buffer_len, struct notify_ack_t **acks, size_t *nr_acks, struct notify_timeout_t **timeouts, size_t *nr_timeouts)
解码通知响应
将 rados_aio_notify() 调用解码为 acks 和超时数组。
- 参数:
reply_buffer-- 来自 rados_aio_notify() 调用的缓冲区
reply_buffer_len-- 回复缓冲区的长度
acks-- 指向 notify_ack_t 结构的指针notify_ack_t指针
nr_acks -- pointer to ack count
timeouts-- 指向 notify_timeout_t 结构的指针notify_timeout_t指针
nr_timeouts-- 指向超时计数的指针
- 返回:
成功为 0
- void rados_free_notify_response(struct notify_ack_t *acks, size_t nr_acks, struct notify_timeout_t *timeouts)
释放由 rados_decode_notify_response() 调用分配的缓冲区
释放由 librados 调用(如 rados_mon_command())分配的内存
- 参数:
acks--notify_ack_t struct (from rados_decode_notify_response())
nr_acks -- ack count
timeouts--notify_timeout_t struct (from rados_decode_notify_response())
- int rados_notify_ack(rados_ioctx_t io, const char *o, uint64_t notify_id, uint64_t cookie, const char *buf, int buf_len)
Acknolwedge receipt of a notify
- 参数:
io-- the pool the object is in
o-- the name of the object
notify_id -- the notify_id we got on the watchcb2_t callback
cookie -- the watcher handle
buf -- payload to return to notifier (optional)
buf_len -- payload length
- 返回:
成功为 0
- int rados_watch_flush(rados_t 集群)
Flush watch/notify callbacks
This call will block until all pending watch/notify callbacks have been executed and the queue is empty. It should usually be called after shutting down any watches before shutting down the ioctx or librados to ensure that any callbacks do not misuse the ioctx (for example by calling rados_notify_ack after the ioctx has been destroyed).
- 参数:
集群 -- the cluster handle
- int rados_aio_watch_flush(rados_t 集群, rados_completion_t completion)
Flush watch/notify callbacks
This call will be nonblock, and the completion will be called until all pending watch/notify callbacks have been executed and the queue is empty. It should usually be called after shutting down any watches before shutting down the ioctx or librados to ensure that any callbacks do not misuse the ioctx (for example by calling rados_notify_ack after the ioctx has been destroyed).
- 参数:
集群 -- the cluster handle
completion-- what to do when operation has been attempted
Mon/OSD/PG Commands
These interfaces send commands relating to the monitor, OSD, or PGs.
- typedef void (*rados_log_callback_t)(void *arg, const char *line, const char *who, uint64_t sec, uint64_t nsec, uint64_t seq, const char *level, const char *msg)
- typedef void (*rados_log_callback2_t)(void *arg, const char *line, const char *channel, const char *who, const char *name, uint64_t sec, uint64_t nsec, uint64_t seq, const char *level, const char *msg)
- int rados_mon_command(rados_t 集群, const char **cmd, size_t cmdlen, const char *inbuf, size_t inbuflen, char **outbuf, size_t *outbuflen, char **outs, size_t *outslen)
Send monitor command.
The result buffers are allocated on the heap; the caller is expected to release that memory with rados_buffer_free(). The buffer and length pointers can all be NULL, in which case they are not filled in.
Note
Takes command string in carefully-formatted JSON; must match defined commands, types, etc.
- 参数:
集群-- cluster handle
cmd -- an array of char *’s representing the command
cmdlen -- count of valid entries in cmd
inbuf -- any bulk input data (crush map, etc.)
inbuflen -- input buffer length
outbuf -- double pointer to output buffer
outbuflen -- pointer to output buffer length
outs -- double pointer to status string
outslen -- pointer to status string length
- 返回:
0 on success, negative error code on failure
- int rados_mgr_command(rados_t 集群, const char **cmd, size_t cmdlen, const char *inbuf, size_t inbuflen, char **outbuf, size_t *outbuflen, char **outs, size_t *outslen)
Send ceph-mgr command.
The result buffers are allocated on the heap; the caller is expected to release that memory with rados_buffer_free(). The buffer and length pointers can all be NULL, in which case they are not filled in.
Note
Takes command string in carefully-formatted JSON; must match defined commands, types, etc.
- 参数:
集群-- cluster handle
cmd -- an array of char *’s representing the command
cmdlen -- count of valid entries in cmd
inbuf -- any bulk input data (crush map, etc.)
inbuflen -- input buffer length
outbuf -- double pointer to output buffer
outbuflen -- pointer to output buffer length
outs -- double pointer to status string
outslen -- pointer to status string length
- 返回:
0 on success, negative error code on failure
- int rados_mgr_command_target(rados_t 集群, const char *name, const char **cmd, size_t cmdlen, const char *inbuf, size_t inbuflen, char **outbuf, size_t *outbuflen, char **outs, size_t *outslen)
Send ceph-mgr tell command.
The result buffers are allocated on the heap; the caller is expected to release that memory with rados_buffer_free(). The buffer and length pointers can all be NULL, in which case they are not filled in.
Note
Takes command string in carefully-formatted JSON; must match defined commands, types, etc.
- 参数:
集群-- cluster handle
name -- mgr name to target
cmd -- an array of char *’s representing the command
cmdlen -- count of valid entries in cmd
inbuf -- any bulk input data (crush map, etc.)
inbuflen -- input buffer length
outbuf -- double pointer to output buffer
outbuflen -- pointer to output buffer length
outs -- double pointer to status string
outslen -- pointer to status string length
- 返回:
0 on success, negative error code on failure
- int rados_mon_command_target(rados_t 集群, const char *name, const char **cmd, size_t cmdlen, const char *inbuf, size_t inbuflen, char **outbuf, size_t *outbuflen, char **outs, size_t *outslen)
Send monitor command to a specific monitor.
The result buffers are allocated on the heap; the caller is expected to release that memory with rados_buffer_free(). The buffer and length pointers can all be NULL, in which case they are not filled in.
Note
Takes command string in carefully-formatted JSON; must match defined commands, types, etc.
- 参数:
集群-- cluster handle
name -- target monitor’s name
cmd -- an array of char *’s representing the command
cmdlen -- count of valid entries in cmd
inbuf -- any bulk input data (crush map, etc.)
inbuflen -- input buffer length
outbuf -- double pointer to output buffer
outbuflen -- pointer to output buffer length
outs -- double pointer to status string
outslen -- pointer to status string length
- 返回:
0 on success, negative error code on failure
- void rados_buffer_free(char *buf)
free a rados-allocated buffer
Release memory allocated by librados calls like rados_mon_command().
- 参数:
buf -- buffer pointer
- int rados_osd_command(rados_t 集群, int osdid, const char **cmd, size_t cmdlen, const char *inbuf, size_t inbuflen, char **outbuf, size_t *outbuflen, char **outs, size_t *outslen)
- int rados_pg_command(rados_t 集群, const char *pgstr, const char **cmd, size_t cmdlen, const char *inbuf, size_t inbuflen, char **outbuf, size_t *outbuflen, char **outs, size_t *outslen)
- int rados_monitor_log(rados_t 集群, const char *level, rados_log_callback_t cb, void *arg)
- int rados_monitor_log2(rados_t 集群, const char *level, rados_log_callback2_t cb, void *arg)
- int rados_service_register(rados_t 集群, const char *服务, const char *守护进程, const char *metadata_dict)
register daemon instance for a service
Register us as a daemon providing a particular service. We identify the service (e.g., ‘rgw’) and our instance name (e.g., ‘rgw.$hostname’). The metadata is a map of keys and values with arbitrary static metdata for this instance. The encoding is a series of NULL-terminated strings, alternating key names and values, terminating with an empty key name. For example, “foo\0bar\0this\0that\0\0” is the dict {foo=bar,this=that}.
For the lifetime of the librados instance, regular beacons will be sent to the cluster to maintain our registration in the service map.
- 参数:
集群 -- handle
服务 -- service name
守护进程 -- daemon instance name
metadata_dict -- static daemon metadata dict
- int rados_service_update_status(rados_t 集群, const char *status_dict)
update daemon status
Update our mutable status information in the service map.
The status dict is encoded the same way the daemon metadata is encoded for rados_service_register. For example, “foo\0bar\0this\0that\0\0” is {foo=bar,this=that}.
- 参数:
集群 -- rados cluster handle
status_dict -- status dict
Setup and Teardown
These are the first and last functions to that should be called when using librados.
- int rados_create(rados_t *集群, const char *const id)
Create a handle for communicating with a RADOS cluster.
Ceph environment variables are read when this is called, so if $CEPH_ARGS specifies everything you need to connect, no further configuration is necessary.
- 参数:
集群 -- where to store the handle
id -- the user to connect as (i.e. admin, not client.admin)
- 返回:
0 on success, negative error code on failure
- int -ERANGE 如果对象的版本大于 ver(rados_t *pcluster, const char *const clustername, const char *const name, uint64_t flags)
Extended version of rados_create.
Like rados_create, but 1) don’t assume ‘client.’+id; allow full specification of name 2) allow specification of cluster name 3) flags for future expansion
- int rados_create_with_context(rados_t *集群, rados_config_t cct)
Initialize a cluster handle from an existing configuration.
Share configuration state with another rados_t instance.
- 参数:
集群 -- where to store the handle
cct -- the existing configuration to use
- 返回:
0 on success, negative error code on failure
- int rados_ping_monitor(rados_t 集群, const char *mon_id, char **outstr, size_t *outstrlen)
Ping the monitor with ID mon_id, storing the resulting reply in buf (if specified) with a maximum size of len.
The result buffer is allocated on the heap; the caller is expected to release that memory with rados_buffer_free(). The buffer and length pointers can be NULL, in which case they are not filled in.
- 参数:
集群-- cluster handle
mon_id -- [in] ID of the monitor to ping
outstr -- [out] double pointer with the resulting reply
outstrlen -- [out] pointer with the size of the reply in outstr
- int rados_connect(rados_t 集群)
Connect to the cluster.
Note
BUG: Before calling this, calling a function that communicates with the cluster will crash.
- 参数:
集群 -- The cluster to connect to.
- Pre:
The cluster handle is configured with at least a monitor address. If cephx is enabled, a client name and secret must also be set.
- Post:
If this succeeds, any function in librados may be used
- 返回:
0 on success, negative error code on failure
- void rados_shutdown(rados_t 集群)
Disconnects from the cluster.
For clean up, this is only necessary after rados_connect() has succeeded.
警告
This does not guarantee any asynchronous writes have completed. To do that, you must call rados_aio_flush() on all open io contexts.
警告
We implicitly call rados_watch_flush() on shutdown. If there are watches being used, this should be done explicitly before destroying the relevant IoCtx. We do it here as a safety measure.
- 参数:
集群-- the cluster to shutdown
- Post:
the cluster handle cannot be used again
配置
These functions read and update Ceph configuration for a cluster handle. Any configuration changes must be done before connecting to the cluster.
Options that librados users might want to set include:
mon_host
auth_supported
key, keyfile, or keyring when using cephx
log_file, log_to_stderr, err_to_stderr, and log_to_syslog
debug_rados, debug_objecter, debug_monc, debug_auth, or debug_ms
See docs.ceph.com for information about available configuration options`
- int rados_conf_read_file(rados_t 集群, const char *path)
Configure the cluster handle using a Ceph config file
If path is NULL, the default locations are searched, and the first found is used. The locations are:
$CEPH_CONF (environment variable)
/etc/ceph/ceph.conf
~/.ceph/config
ceph.conf (in the current working directory)
- 参数:
集群 -- cluster handle to configure
path -- path to a Ceph configuration file
- Pre:
rados_connect() has not been called on the cluster handle
- 返回:
0 on success, negative error code on failure
- int rados_conf_parse_argv(rados_t 集群, int argc, const char **argv)
Configure the cluster handle with command line arguments
argv can contain any common Ceph command line option, including any configuration parameter prefixed by ‘—’ and replacing spaces with dashes or underscores. For example, the following options are equivalent:
—mon-host 10.0.0.1:6789
—mon_host 10.0.0.1:6789
-m 10.0.0.1:6789
- 参数:
集群 -- cluster handle to configure
argc -- number of arguments in argv
argv -- arguments to parse
- Pre:
rados_connect() has not been called on the cluster handle
- 返回:
0 on success, negative error code on failure
- int rados_conf_parse_argv_remainder(rados_t 集群, int argc, const char **argv, const char **remargv)
Configure the cluster handle with command line arguments, returning any remainders. Same rados_conf_parse_argv, except for extra remargv argument to hold returns unrecognized arguments.
- 参数:
集群 -- cluster handle to configure
argc -- number of arguments in argv
argv -- arguments to parse
remargv -- char* array for returned unrecognized arguments
- Pre:
rados_connect() has not been called on the cluster handle
- 返回:
0 on success, negative error code on failure
- int rados_conf_parse_env(rados_t 集群, const char *var)
Configure the cluster handle based on an environment variable
The contents of the environment variable are parsed as if they were Ceph command line options. If var is NULL, the CEPH_ARGS environment variable is used.
Note
BUG: this is not threadsafe - it uses a static buffer
- 参数:
集群 -- cluster handle to configure
var -- name of the environment variable to read
- Pre:
rados_connect() has not been called on the cluster handle
- 返回:
0 on success, negative error code on failure
- int rados_conf_set(rados_t 集群, const char *选项, const char *值)
Set a configuration option
- 参数:
集群 -- cluster handle to configure
选项 -- option to set
值-- 选项的值
- Pre:
rados_connect() has not been called on the cluster handle
- 返回:
0 on success, negative error code on failure
- 返回:
-ENOENT when the option is not a Ceph configuration option
- int rados_conf_get(rados_t 集群, const char *选项, char *buf, size_t len)
Get the value of a configuration option
- 参数:
集群 -- configuration to read
选项 -- which option to read
buf -- where to write the configuration value
len -- the size of buf in bytes
- 返回:
0 on success, negative error code on failure
- 返回:
-ENAMETOOLONG if the buffer is too short to contain the requested value
Pools
RADOS pools are separate namespaces for objects. Pools may have different crush rules associated with them, so they could have differing replication levels or placement strategies. RADOS permissions are also tied to pools - users can have different read, write, and execute permissions on a per-pool basis.
- int rados_pool_list(rados_t 集群, char *buf, size_t len)
List pools
Gets a list of pool names as NULL-terminated strings. The pool names will be placed in the supplied buffer one after another. After the last pool name, there will be two 0 bytes in a row.
If len is too short to fit all the pool name entries we need, we will fill as much as we can.
Buf may be null to determine the buffer size needed to list all pools.
- 参数:
集群-- cluster handle
buf -- output buffer
len -- output buffer length
- 返回:
length of the buffer we would need to list all pools
- int rados_inconsistent_pg_list(rados_t 集群, int64_t pool, char *buf, size_t len)
List inconsistent placement groups of the given pool
Gets a list of inconsistent placement groups as NULL-terminated strings. The placement group names will be placed in the supplied buffer one after another. After the last name, there will be two 0 types in a row.
If len is too short to fit all the placement group entries we need, we will fill as much as we can.
- 参数:
集群-- cluster handle
pool -- pool ID
buf -- output buffer
len -- output buffer length
- 返回:
length of the buffer we would need to list all pools
- rados_config_t rados_cct(rados_t 集群)
Get a configuration handle for a rados cluster handle
This handle is valid only as long as the cluster handle is valid.
- 参数:
集群-- cluster handle
- 返回:
config handle for this cluster
- uint64_t rados_get_instance_id(rados_t 集群)
Get a global id for current instance
This id is a unique representation of current connection to the cluster
- 参数:
集群-- cluster handle
- 返回:
instance global id
- int rados_get_min_compatible_osd(rados_t 集群, int8_t *require_osd_release)
Gets the minimum compatible OSD version
- 参数:
集群-- cluster handle
require_osd_release -- [out] minimum compatible OSD version based upon the current features
- 返回:
0 on sucess, negative error code on failure
- int rados_get_min_compatible_client(rados_t 集群, int8_t *min_compat_client, int8_t *require_min_compat_client)
Gets the minimum compatible client version
- 参数:
集群-- cluster handle
min_compat_client -- [out] minimum compatible client version based upon the current features
require_min_compat_client -- [out] required minimum client version based upon explicit setting
- 返回:
0 on success, negative error code on failure
- int rados_ioctx_create(rados_t 集群, const char *pool_name, rados_ioctx_t *ioctx)
Create an io context
The io context allows you to perform operations within a particular pool. For more details see rados_ioctx_t.
- 参数:
集群 -- which cluster the pool is in
pool_name -- name of the pool
ioctx -- where to store the io context
- 返回:
0 on success, negative error code on failure
- int rados_ioctx_create2(rados_t 集群, int64_t pool_id, rados_ioctx_t *ioctx)
- void rados_ioctx_destroy(rados_ioctx_t io)
The opposite of rados_ioctx_create
This just tells librados that you no longer need to use the io context. It may not be freed immediately if there are pending asynchronous requests on it, but you should not use an io context again after calling this function on it.
警告
This does not guarantee any asynchronous writes have completed. You must call rados_aio_flush() on the io context before destroying it to do that.
警告
If this ioctx is used by rados_watch, the caller needs to be sure that all registered watches are disconnected via rados_unwatch() and that rados_watch_flush() is called. This ensures that a racing watch callback does not make use of a destroyed ioctx.
- 参数:
io -- the io context to dispose of
- rados_config_t rados_ioctx_cct(rados_ioctx_t io)
Get configuration handle for a pool handle
- 参数:
io -- pool handle
- 返回:
rados_config_t for this cluster
- rados_t rados_ioctx_get_cluster(rados_ioctx_t io)
Get the cluster handle used by this rados_ioctx_t Note that this is a weak reference, and should not be destroyed via rados_shutdown().
- 参数:
io -- the io context
- 返回:
the cluster handle for this io context
- int rados_ioctx_pool_stat(rados_ioctx_t io, struct rados_pool_stat_t *stats)
Get pool usage statistics
Fills in a rados_pool_stat_t after querying the cluster.
- 参数:
io -- determines which pool to query
stats-- where to store the results
- 返回:
0 on success, negative error code on failure
- int64_t rados_pool_lookup(rados_t 集群, const char *pool_name)
Get the id of a pool
- 参数:
集群 -- which cluster the pool is in
pool_name -- which pool to look up
- 返回:
id of the pool
- 返回:
-ENOENT if the pool is not found
- int rados_pool_reverse_lookup(rados_t 集群, int64_t id, char *buf, size_t maxlen)
Get the name of a pool
- 参数:
集群 -- which cluster the pool is in
id -- the id of the pool
buf -- where to store the pool name
maxlen -- size of buffer where name will be stored
- 返回:
length of string stored, or -ERANGE if buffer too small
- int rados_pool_create(rados_t 集群, const char *pool_name)
Create a pool with default settings
The default crush rule is rule 0.
- 参数:
集群 -- the cluster in which the pool will be created
pool_name -- the name of the new pool
- 返回:
0 on success, negative error code on failure
- int rados_pool_create_with_auid(rados_t 集群, const char *pool_name, uint64_t auid) __attribute__((deprecated)
Create a pool owned by a specific auid.
DEPRECATED: auid support has been removed, and this call will be removed in a future release.
- 参数:
集群 -- the cluster in which the pool will be created
pool_name -- the name of the new pool
auid -- the id of the owner of the new pool
- 返回:
0 on success, negative error code on failure
- int rados_pool_create_with_crush_rule(rados_t 集群, const char *pool_name, uint8_t crush_rule_num)
Create a pool with a specific CRUSH rule
- 参数:
集群 -- the cluster in which the pool will be created
pool_name -- the name of the new pool
crush_rule_num -- which rule to use for placement in the new pool1
- 返回:
0 on success, negative error code on failure
- int rados_pool_create_with_all(rados_t 集群, const char *pool_name, uint64_t auid, uint8_t crush_rule_num) __attribute__((deprecated)
Create a pool with a specific CRUSH rule and auid
DEPRECATED: auid support has been removed and this call will be removed in a future release.
This is a combination of rados_pool_create_with_crush_rule() and rados_pool_create_with_auid().
- 参数:
集群 -- the cluster in which the pool will be created
pool_name -- the name of the new pool
crush_rule_num -- which rule to use for placement in the new pool2
auid -- the id of the owner of the new pool
- 返回:
0 on success, negative error code on failure
- int rados_pool_get_base_tier(rados_t 集群, int64_t pool, int64_t *base_tier)
Returns the pool that is the base tier for this pool.
The return value is the ID of the pool that should be used to read from/write to. If tiering is not set up for the pool, returns
pool
.
- 参数:
集群 -- the cluster the pool is in
pool -- ID of the pool to query
base_tier -- [out] base tier, or
pool
if tiering is not configured- 返回:
0 on success, negative error code on failure
- int rados_pool_delete(rados_t 集群, const char *pool_name)
Delete a pool and all data inside it
The pool is removed from the cluster immediately, but the actual data is deleted in the background.
- 参数:
集群 -- the cluster the pool is in
pool_name -- which pool to delete
- 返回:
0 on success, negative error code on failure
- int rados_ioctx_pool_set_auid(rados_ioctx_t io, uint64_t auid) __attribute__((deprecated)
Attempt to change an io context’s associated auid “owner”
DEPRECATED: auid support has been removed and this call has no effect.
Requires that you have write permission on both the current and new auid.
- 参数:
io -- reference to the pool to change.
auid -- the auid you wish the io to have.
- 返回:
0 on success, negative error code on failure
- int rados_ioctx_pool_get_auid(rados_ioctx_t io, uint64_t *auid) __attribute__((deprecated)
Get the auid of a pool
DEPRECATED: auid support has been removed and this call always reports CEPH_AUTH_UID_DEFAULT (-1).
- 参数:
io -- pool to query
auid -- where to store the auid
- 返回:
0 on success, negative error code on failure
- int rados_ioctx_pool_requires_alignment(rados_ioctx_t io) __attribute__((deprecated)
- int rados_ioctx_pool_requires_alignment2(rados_ioctx_t io, int *req)
Test whether the specified pool requires alignment or not.
- 参数:
io -- pool to query
req -- 1 if alignment is supported, 0 if not.
- 返回:
0 on success, negative error code on failure
- uint64_t rados_ioctx_pool_required_alignment(rados_ioctx_t io) __attribute__((deprecated)
- int rados_ioctx_pool_required_alignment2(rados_ioctx_t io, uint64_t *alignment)
Get the alignment flavor of a pool
- 参数:
io -- pool to query
alignment -- where to store the alignment flavor
- 返回:
0 on success, negative error code on failure
- int64_t rados_ioctx_get_id(rados_ioctx_t io)
Get the pool id of the io context
- 参数:
io -- the io context to query
- 返回:
the id of the pool the io context uses
- int rados_ioctx_get_pool_name(rados_ioctx_t io, char *buf, unsigned maxlen)
Get the pool name of the io context
- 参数:
io -- the io context to query
buf -- pointer to buffer where name will be stored
maxlen -- size of buffer where name will be stored
- 返回:
length of string stored, or -ERANGE if buffer too small
Object Locators
- void rados_ioctx_locator_set_key(rados_ioctx_t io, const char *key)
Set the key for mapping objects to pgs within an io context.
The key is used instead of the object name to determine which placement groups an object is put in. This affects all subsequent operations of the io context - until a different locator key is set, all objects in this io context will be placed in the same pg.
- 参数:
io -- the io context to change
key -- the key to use as the object locator, or NULL to discard any previously set key
- void rados_ioctx_set_namespace(rados_ioctx_t io, const char *nspace)
Set the namespace for objects within an io context
The namespace specification further refines a pool into different domains. The mapping of objects to pgs is also based on this value.
- 参数:
io -- the io context to change
nspace -- the name to use as the namespace, or NULL use the default namespace
- int rados_ioctx_get_namespace(rados_ioctx_t io, char *buf, unsigned maxlen)
Get the namespace for objects within the io context
- 参数:
io -- the io context to query
buf -- pointer to buffer where name will be stored
maxlen -- size of buffer where name will be stored
- 返回:
length of string stored, or -ERANGE if buffer too small
列出对象
- int rados_nobjects_list_open(rados_ioctx_t io, rados_list_ctx_t *ctx)
Start listing objects in a pool
- 参数:
io -- the pool to list from
ctx -- the handle to store list context in
- 返回:
0 on success, negative error code on failure
- uint32_t rados_nobjects_list_get_pg_hash_position(rados_list_ctx_t ctx)
Return hash position of iterator, rounded to the current PG
- 参数:
ctx -- iterator marking where you are in the listing
- 返回:
current hash position, rounded to the current pg
- uint32_t rados_nobjects_list_seek(rados_list_ctx_t ctx, uint32_t pos)
Reposition object iterator to a different hash position
- 参数:
ctx -- iterator marking where you are in the listing
pos -- hash position to move to
- 返回:
actual (rounded) position we moved to
- uint32_t rados_nobjects_list_seek_cursor(rados_list_ctx_t ctx, rados_object_list_cursor cursor)
Reposition object iterator to a different position
- 参数:
ctx -- iterator marking where you are in the listing
cursor -- position to move to
- 返回:
rounded position we moved to
- int rados_nobjects_list_get_cursor(rados_list_ctx_t ctx, rados_object_list_cursor *cursor)
Reposition object iterator to a different position
The returned handle must be released with rados_object_list_cursor_free().
- 参数:
ctx -- iterator marking where you are in the listing
cursor -- where to store cursor
- 返回:
0 on success, negative error code on failure
- int rados_nobjects_list_next(rados_list_ctx_t ctx, const char **entry, const char **key, const char **nspace)
Get the next object name and locator in the pool
entry and *key are valid until next call to rados_nobjects_list_
- 参数:
ctx -- iterator marking where you are in the listing
entry -- where to store the name of the entry
key -- where to store the object locator (set to NULL to ignore)
nspace -- where to store the object namespace (set to NULL to ignore)
- 返回:
0 on success, negative error code on failure
- 返回:
-ENOENT when there are no more objects to list
- int rados_nobjects_list_next2(rados_list_ctx_t ctx, const char **entry, const char **key, const char **nspace, size_t *entry_size, size_t *key_size, size_t *nspace_size)
Get the next object name, locator and their sizes in the pool
The sizes allow to list objects with \0 (the NUL character) in .e.g entry. Is is unusual see such object names but a bug in a client has risen the need to handle them as well. *entry and *key are valid until next call to rados_nobjects_list_
- 参数:
ctx -- iterator marking where you are in the listing
entry -- where to store the name of the entry
key -- where to store the object locator (set to NULL to ignore)
nspace -- where to store the object namespace (set to NULL to ignore)
entry_size -- where to store the size of name of the entry
key_size -- where to store the size of object locator (set to NULL to ignore)
nspace_size -- where to store the size of object namespace (set to NULL to ignore)
- 返回:
0 on success, negative error code on failure
- 返回:
-ENOENT when there are no more objects to list
- void rados_nobjects_list_close(rados_list_ctx_t ctx)
Close the object listing handle.
This should be called when the handle is no longer needed. The handle should not be used after it has been closed.
- 参数:
ctx -- the handle to close
- rados_object_list_cursor rados_object_list_begin(rados_ioctx_t io)
Get cursor handle pointing to the beginning of a pool.
This is an opaque handle pointing to the start of a pool. It must be released with rados_object_list_cursor_free().
- 参数:
io -- ioctx for the pool
- 返回:
handle for the pool, NULL on error (pool does not exist)
- rados_object_list_cursor rados_object_list_end(rados_ioctx_t io)
Get cursor handle pointing to the end of a pool.
This is an opaque handle pointing to the start of a pool. It must be released with rados_object_list_cursor_free().
- 参数:
io -- ioctx for the pool
- 返回:
handle for the pool, NULL on error (pool does not exist)
- int rados_object_list_is_end(rados_ioctx_t io, rados_object_list_cursor cur)
Check if a cursor has reached the end of a pool
- 参数:
io-- ioctx
cur -- cursor
- 返回:
1 if the cursor has reached the end of the pool, 0 otherwise
- void rados_object_list_cursor_free(rados_ioctx_t io, rados_object_list_cursor cur)
Release a cursor
Release a cursor. The handle may not be used after this point.
- 参数:
io-- ioctx
cur -- cursor
- int rados_object_list_cursor_cmp(rados_ioctx_t io, rados_object_list_cursor lhs, rados_object_list_cursor rhs)
Compare two cursor positions
Compare two cursors, and indicate whether the first cursor precedes, matches, or follows the second.
- 参数:
io-- ioctx
lhs -- first cursor
rhs -- second cursor
- 返回:
-1, 0, or 1 for lhs < rhs, lhs == rhs, or lhs > rhs
- int rados_object_list(rados_ioctx_t io, const rados_object_list_cursor start, const rados_object_list_cursor finish, const size_t result_size, const char *filter_buf, const size_t filter_buf_len, rados_object_list_item *results, rados_object_list_cursor *next)
- 返回:
the number of items set in the results array
- void rados_object_list_free(const size_t result_size, rados_object_list_item *results)
- void rados_object_list_slice(rados_ioctx_t io, const rados_object_list_cursor start, const rados_object_list_cursor finish, const size_t n, const size_t m, rados_object_list_cursor *split_start, rados_object_list_cursor *split_finish)
Obtain cursors delineating a subset of a range. Use this when you want to split up the work of iterating over the global namespace. Expected use case is when you are iterating in parallel, with
m
workers, and each worker taking an idn
.
- 参数:
io-- ioctx
start -- start of the range to be sliced up (inclusive)
finish -- end of the range to be sliced up (exclusive)
n -- which of the m chunks you would like to get cursors for
m -- how many chunks to divide start-finish into
split_start -- cursor populated with start of the subrange (inclusive)
split_finish -- cursor populated with end of the subrange (exclusive)
快照
RADOS snapshots are based upon sequence numbers that form a snapshot context. They are pool-specific. The snapshot context consists of the current snapshot sequence number for a pool, and an array of sequence numbers at which snapshots were taken, in descending order. Whenever a snapshot is created or deleted, the snapshot sequence number for the pool is increased. To add a new snapshot, the new snapshot sequence number must be increased and added to the snapshot context.
There are two ways to manage these snapshot contexts:
within the RADOS cluster These are called pool snapshots, and store the snapshot context in the OSDMap. These represent a snapshot of all the objects in a pool.
within the RADOS clients These are called self-managed snapshots, and push the responsibility for keeping track of the snapshot context to the clients. For every write, the client must send the snapshot context. In librados, this is accomplished with rados_selfmanaged_snap_set_write_ctx(). These are more difficult to manage, but are restricted to specific objects instead of applying to an entire pool.
- int rados_ioctx_snap_create(rados_ioctx_t io, const char *snapname)
Create a pool-wide snapshot
- 参数:
io -- the pool to snapshot
snapname -- the name of the snapshot
- 返回:
0 on success, negative error code on failure
- int rados_ioctx_snap_remove(rados_ioctx_t io, const char *snapname)
Delete a pool snapshot
- 参数:
io -- the pool to delete the snapshot from
snapname -- which snapshot to delete
- 返回:
0 on success, negative error code on failure
- int rados_ioctx_snap_rollback(rados_ioctx_t io, const char *oid, const char *snapname)
Rollback an object to a pool snapshot
The contents of the object will be the same as when the snapshot was taken.
- 参数:
io -- the pool in which the object is stored
oid -- the name of the object to rollback
snapname -- which snapshot to rollback to
- 返回:
0 on success, negative error code on failure
- int rados_rollback(rados_ioctx_t io, const char *oid, const char *snapname) __attribute__((deprecated)
警告
Deprecated: Use rados_ioctx_snap_rollback() instead
- void rados_ioctx_snap_set_read(rados_ioctx_t io, rados_snap_t snap)
Set the snapshot from which reads are performed.
Subsequent reads will return data as it was at the time of that snapshot.
- 参数:
io -- the io context to change
snap -- the id of the snapshot to set, or LIBRADOS_SNAP_HEAD for no snapshot (i.e. normal operation)
- int rados_ioctx_selfmanaged_snap_create(rados_ioctx_t io, rados_snap_t *snapid)
Allocate an ID for a self-managed snapshot
Get a unique ID to put in the snaphot context to create a snapshot. A clone of an object is not created until a write with the new snapshot context is completed.
- 参数:
io -- the pool in which the snapshot will exist
snapid -- where to store the newly allocated snapshot ID
- 返回:
0 on success, negative error code on failure
- void rados_aio_ioctx_selfmanaged_snap_create(rados_ioctx_t io, rados_snap_t *snapid, rados_completion_t completion)
- int rados_ioctx_selfmanaged_snap_remove(rados_ioctx_t io, rados_snap_t snapid)
Remove a self-managed snapshot
This increases the snapshot sequence number, which will cause snapshots to be removed lazily.
- 参数:
io -- the pool in which the snapshot will exist
snapid -- where to store the newly allocated snapshot ID
- 返回:
0 on success, negative error code on failure
- void rados_aio_ioctx_selfmanaged_snap_remove(rados_ioctx_t io, rados_snap_t snapid, rados_completion_t completion)
- int rados_ioctx_selfmanaged_snap_rollback(rados_ioctx_t io, const char *oid, rados_snap_t snapid)
Rollback an object to a self-managed snapshot
The contents of the object will be the same as when the snapshot was taken.
- 参数:
io -- the pool in which the object is stored
oid -- the name of the object to rollback
snapid -- which snapshot to rollback to
- 返回:
0 on success, negative error code on failure
- int rados_ioctx_selfmanaged_snap_set_write_ctx(rados_ioctx_t io, rados_snap_t seq, rados_snap_t *snaps, int num_snaps)
Set the snapshot context for use when writing to objects
This is stored in the io context, and applies to all future writes.
- 参数:
io -- the io context to change
seq -- the newest snapshot sequence number for the pool
snaps -- array of snapshots in sorted by descending id
num_snaps -- how many snaphosts are in the snaps array
- 返回:
0 on success, negative error code on failure
- 返回:
-EINVAL if snaps are not in descending order
- int rados_ioctx_snap_list(rados_ioctx_t io, rados_snap_t *snaps, int maxlen)
List all the ids of pool snapshots
If the output array does not have enough space to fit all the snapshots, -ERANGE is returned and the caller should retry with a larger array.
- 参数:
io -- the pool to read from
snaps-- where to store the results
maxlen -- the number of rados_snap_t that fit in the snaps array
- 返回:
number of snapshots on success, negative error code on failure
- 返回:
-ERANGE is returned if the snaps array is too short
- int rados_ioctx_snap_lookup(rados_ioctx_t io, const char *name, rados_snap_t *id)
Get the id of a pool snapshot
- 参数:
io -- the pool to read from
name -- the snapshot to find
id-- 存储结果的位置
- 返回:
0 on success, negative error code on failure
- int rados_ioctx_snap_get_name(rados_ioctx_t io, rados_snap_t id, char *name, int maxlen)
Get the name of a pool snapshot
- 参数:
io -- the pool to read from
id -- the snapshot to find
name-- 存储结果的位置
maxlen -- the size of the name array
- 返回:
0 on success, negative error code on failure
- 返回:
-ERANGE if the name array is too small
- int rados_ioctx_snap_get_stamp(rados_ioctx_t io, rados_snap_t id, time_t *t)
Find when a pool snapshot occurred
- 参数:
io -- the pool the snapshot was taken in
id -- the snapshot to lookup
t-- 存储结果的位置
- 返回:
0 on success, negative error code on failure
Synchronous I/O
Writes are replicated to a number of OSDs based on the configuration of the pool they are in. These write functions block until data is in memory on all replicas of the object they’re writing to - they are equivalent to doing the corresponding asynchronous write, and the calling rados_ioctx_wait_for_complete(). For greater data safety, use the asynchronous functions and rados_aio_wait_for_safe().
- uint64_t rados_get_last_version(rados_ioctx_t io)
Return the version of the last object read or written to.
This exposes the internal version number of the last object read or written via this io context
- 参数:
io -- the io context to check
- 返回:
last read or written object version
- int rados_write(rados_ioctx_t io, const char *oid, const char *buf, size_t len, uint64_t off)
Write len bytes from buf into the oid object, starting at offset off. The value of len must be <= UINT_MAX/2.
Note
This will never return a positive value not equal to len.
- 参数:
io-- 执行写入的上下文
oid-- 对象名称
buf-- 要写入的数据
len-- 数据长度,以字节为单位
off-- 对象中开始写入的字节偏移量
- 返回:
0 on success, negative error code on failure
- int rados_write_full(rados_ioctx_t io, const char *oid, const char *buf, size_t len)
Write len bytes from buf into the oid object. The value of len must be <= UINT_MAX/2.
The object is filled with the provided data. If the object exists, it is atomically truncated and then written.
- 参数:
io-- 执行写入的上下文
oid-- 对象名称
buf-- 要写入的数据
len-- 数据长度,以字节为单位
- 返回:
0 on success, negative error code on failure
- int rados_writesame(rados_ioctx_t io, const char *oid, const char *buf, size_t data_len, size_t write_len, uint64_t off)
Write the same data_len bytes from buf multiple times into the oid object. write_len bytes are written in total, which must be a multiple of data_len. The value of write_len和data_len must be <= UINT_MAX/2.
- 参数:
io-- 执行写入的上下文
oid-- 对象名称
buf-- 要写入的数据
data_len-- 数据长度,以字节为单位
write_len-- 要写入的总字节数
off-- 对象中开始写入的字节偏移量
- 返回:
0 on success, negative error code on failure
- int rados_append(rados_ioctx_t io, const char *oid, const char *buf, size_t len)
Append len bytes from buf into the oid object. The value of len must be <= UINT_MAX/2.
- 参数:
io-- the context to operate in
oid-- the name of the object
buf-- 要追加的数据
len-- buf 的长度(以字节为单位)
- 返回:
0 on success, negative error code on failure
- int rados_read(rados_ioctx_t io, const char *oid, char *buf, size_t len, uint64_t off)
Read data from an object
io 上下文确定要读取的快照,如果 rados_ioctx_snap_set_read() 设置了任何快照,则读取该快照。完成回调的返回值在成功时为读取的字节数,在失败时为负错误代码。
- 参数:
io-- 执行读取的上下文
oid-- 要读取的对象名称
buf-- where to store the results
len-- 要读取的字节数
off-- 对象中要开始读取的偏移量
- 返回:
number of bytes read on success, negative error code on failure
- int rados_checksum(rados_ioctx_t io, const char *oid, rados_checksum_type_t type, const char *init_value, size_t init_value_len, size_t len, uint64_t off, size_t chunk_size, char *pchecksum, size_t checksum_len)
Compute checksum from object data
The io context determines the snapshot to checksum, if any was set by rados_ioctx_snap_set_read(). The length of the init_value and resulting checksum are dependent upon the checksum type:
XXHASH64: le64 XXHASH32: le32 CRC32C: le32
The checksum result is encoded the following manner:
le32 num_checksum_chunks { leXX checksum for chunk (where XX = appropriate size for the checksum type) } * num_checksum_chunks
- 参数:
io -- the context in which to perform the checksum
oid -- the name of the object to checksum
type -- the checksum algorithm to utilize
init_value -- the init value for the algorithm
init_value_len -- the length of the init value
len -- the number of bytes to checksum
off -- the offset to start checksumming in the object
chunk_size -- optional length-aligned chunk size for checksums
pchecksum -- where to store the checksum result
checksum_len -- the number of bytes available for the result
- 返回:
negative error code on failure
- int rados_remove(rados_ioctx_t io, const char *oid)
Delete an object
Note
这不会删除对象的任何快照。
- 参数:
io -- the pool to delete the object from
oid -- the name of the object to delete
- 返回:
0 on success, negative error code on failure
- int rados_trunc(rados_ioctx_t io, const char *oid, uint64_t size定位特定驱动器容量:)
Resize an object
If this enlarges the object, the new area is logically filled with zeroes. If this shrinks the object, the excess data is removed.
- 参数:
io -- the context in which to truncate
oid-- the name of the object
size定位特定驱动器容量: -- the new size of the object in bytes
- 返回:
0 on success, negative error code on failure
- int rados_cmpext(rados_ioctx_t io, const char *o, const char *cmp_buf, size_t cmp_len, uint64_t off)
Compare an on-disk object range with a buffer
- 参数:
io-- 执行比较的上下文
o-- 对象名称
cmp_buf-- 包含要与对象内容进行比较的字节的缓冲区
cmp_len-- 比较的长度和对象定位器的大小
cmp_buf
in bytesoff-- 对象字节偏移量,从中开始比较
- 返回:
成功时为 0,负错误代码失败,(-MAX_ERRNO - mismatch_off) 在不匹配的情况下
Xattrs
扩展属性存储为 OSD 上表示对象的文件的扩展属性。因此,它们具有与底层文件系统相同的限制。在 ext4 上,这意味着存储在 xattrs 中的总数据量不能超过 4KB。
- int rados_getxattr(rados_ioctx_t io, const char *o, const char *name, char *buf, size_t len)
Get the value of an extended attribute on an object.
- 参数:
io-- 读取属性的上下文
o-- 对象名称
name-- 要读取的扩展属性
buf-- 存储结果的位置
len-- buf 的长度,以字节为单位
- 返回:
成功时 xattr 值的长度,负错误代码失败
- int rados_setxattr(rados_ioctx_t io, const char *o, const char *name, const char *buf, size_t len)
Set an extended attribute on an object.
- 参数:
io-- 设置 xattr 的上下文
o-- 对象名称
name-- 要设置的扩展属性
buf-- 要存储在 xattr 中的内容
len-- buf 中的字节数
- 返回:
0 on success, negative error code on failure
- int rados_rmxattr(rados_ioctx_t io, const char *o, const char *name)
Delete an extended attribute from an object.
- 参数:
io-- 删除 xattr 的上下文
o-- the name of the object
name-- 要删除的 xattr
- 返回:
0 on success, negative error code on failure
- int rados_getxattrs(rados_ioctx_t io, const char *oid, rados_xattrs_iter_t *iter)
Start iterating over xattrs on an object.
- 参数:
io-- 列出 xattrs 的上下文
oid-- 对象名称
iter-- where to store the iterator
- Post:
iter is a valid iterator
- 返回:
0 on success, negative error code on failure
- int rados_getxattrs_next(rados_xattrs_iter_t iter, const char **name, const char **val, size_t *len)
获取对象上的下一个 xattr。
- 参数:
iter-- iterator to advance
name-- 存储下一个 xattr 的名称
val-- 存储下一个 xattr 的值
len-- 下一个 xattr 值的字节数
- Pre:
iter is a valid iterator
- Post:
名称是下一个 xattr 的 NULL 终止名称,val 包含 xattr 的值,其长度为 len。如果已达到列表的末尾,则 name 和 val 为 NULL,len 为 0。
- 返回:
0 on success, negative error code on failure
- void rados_getxattrs_end(rados_xattrs_iter_t iter)
关闭 xattr 迭代器。
iter should not be used after this is called.
- 参数:
iter-- the iterator to close
异步 Xattrs
扩展属性存储为 OSD 上表示对象的文件的扩展属性。因此,它们具有与底层文件系统相同的限制。在 ext4 上,这意味着存储在 xattrs 中的总数据量不能超过 4KB。
- int rados_aio_getxattr(rados_ioctx_t io, const char *o, rados_completion_t completion, const char *name, char *buf, size_t len)
异步获取对象上的扩展属性值。
- 参数:
io-- 读取属性的上下文
o-- 对象名称
completion-- getxattr 完成时要执行的操作
name-- 要读取的扩展属性
buf-- 存储结果的位置
len-- buf 的长度,以字节为单位
- 返回:
成功时 xattr 值的长度,负错误代码失败
- int rados_aio_setxattr(rados_ioctx_t io, const char *o, rados_completion_t completion, const char *name, const char *buf, size_t len)
异步设置对象上的扩展属性。
- 参数:
io-- 设置 xattr 的上下文
o-- 对象名称
completion-- getxattr 完成时要执行的操作
name-- 要设置的扩展属性
buf-- 要存储在 xattr 中的内容
len-- buf 中的字节数
- 返回:
0 on success, negative error code on failure
- int rados_aio_rmxattr(rados_ioctx_t io, const char *o, rados_completion_t completion, const char *name)
异步从对象删除扩展属性。
- 参数:
io-- 删除 xattr 的上下文
o-- the name of the object
completion -- what to do when the rmxattr completes
name-- 要删除的 xattr
- 返回:
0 on success, negative error code on failure
- int rados_aio_getxattrs(rados_ioctx_t io, const char *oid, rados_completion_t completion, rados_xattrs_iter_t *iter)
异步开始迭代对象上的 xattrs。
- 参数:
io-- 列出 xattrs 的上下文
oid-- 对象名称
completion-- getxattrs 完成时要执行的操作
iter-- where to store the iterator
- Post:
iter is a valid iterator
- 返回:
0 on success, negative error code on failure
提示
- int rados_set_alloc_hint(rados_ioctx_t io, const char *o, uint64_t expected_object_size, uint64_t expected_write_size)
为对象设置分配提示
这是一个建议操作,它将始终成功(就像它被提交时设置了 LIBRADOS_OP_FLAG_FAILOK 标志)并且不保证在后台执行任何操作。
- 参数:
io-- the pool the object is in
o-- the name of the object
expected_object_size-- 对象的预期大小,以字节为单位
expected_write_size-- 写入对象的预期大小,以字节为单位
- 返回:
0 on success, negative error code on failure
- int rados_set_alloc_hint2(rados_ioctx_t io, const char *o, uint64_t expected_object_size, uint64_t expected_write_size, uint32_t flags)
为对象设置分配提示
这是一个建议操作,它将始终成功(就像它被提交时设置了 LIBRADOS_OP_FLAG_FAILOK 标志)并且不保证在后台执行任何操作。
- 参数:
io-- the pool the object is in
o-- the name of the object
expected_object_size-- 对象的预期大小,以字节为单位
expected_write_size-- 写入对象的预期大小,以字节为单位
flags-- 关于未来 IO 模式的提示
- 返回:
0 on success, negative error code on failure
对象操作
单个 rados 操作可以对一个对象执行多个操作。整个操作将成功或失败,并且不会显示任何部分结果。
操作可以是读取操作,它可以返回数据,也可以是写入操作,写入操作不能返回数据。写入操作的效果将一次性应用和可见,因此设置 xattr 然后检查其值的操作将看不到更新后的值。
- rados_write_op_t rados_create_write_op(void)
Create a new rados_write_op_t write operation. This will store all actions to be performed atomically. You must call rados_release_write_op when you are finished with it.
Note
the ownership of a write operartion is passed to the function performing the operation, so the same instance of
rados_write_op_t
实例在被执行后无法再次使用。
- 返回:
非空值,内存分配错误时为 NULL。
- void rados_release_write_op(rados_write_op_t write_op)
Free a rados_write_op_t, must be called when you’re done with it.
- 参数:
write_op -- operation to deallocate, created with rados_create_write_op
- void rados_write_op_set_flags(rados_write_op_t write_op, int flags)
Set flags for the last operation added to this write_op. At least one op must have been added to the write_op.
- 参数:
write_op-- operation to add this action to
flags-- 查看 librados.h 中以 LIBRADOS_OP_FLAG 开头的常量
- void rados_write_op_assert_exists(rados_write_op_t write_op)
Ensure that the object exists before writing
- 参数:
write_op-- operation to add this action to
- void rados_write_op_assert_version(rados_write_op_t write_op, uint64_t ver)
Ensure that the object exists and that its internal version number is equal to “ver” before writing. “ver” should be a version number previously obtained with rados_get_last_version().
If the object’s version is greater than the asserted version then rados_write_op_operate will return -ERANGE instead of executing the op.
If the object’s version is less than the asserted version then rados_write_op_operate will return -EOVERFLOW instead of executing the op.
- 参数:
write_op-- operation to add this action to
ver-- 对象版本号
- void rados_write_op_cmpext(rados_write_op_t write_op, const char *cmp_buf, size_t cmp_len, uint64_t off, int *prval)
确保给定的对象范围(范围)满足比较。
- 参数:
write_op-- operation to add this action to
cmp_buf-- 包含要与对象内容进行比较的字节的缓冲区
cmp_len-- 比较的长度和对象定位器的大小
cmp_buf
in bytesoff-- 对象字节偏移量,从中开始比较
prval-- 比较返回的结果,成功时为 0,失败时为负错误代码,(-MAX_ERRNO - mismatch_off) 在不匹配的情况下
- void rados_write_op_cmpxattr(rados_write_op_t write_op, const char *name, uint8_t comparison_operator, const char *值, size_t value_len)
Ensure that given xattr satisfies comparison. If the comparison is not satisfied, the return code of the operation will be -ECANCELED
- 参数:
write_op-- operation to add this action to
name-- 要查找的 xattr 名称
comparison_operator-- 目前未记录,查找 librados.h 中的 LIBRADOS_CMPXATTR_OP_EQ
值-- 用于比较实际 xattr 值的缓冲区
value_len-- 用于比较实际 xattr 值的缓冲区的长度
- void rados_write_op_omap_cmp(rados_write_op_t write_op, const char *key, uint8_t comparison_operator, const char *val, size_t val_len, int *prval)
Ensure that the an omap value satisfies a comparison, with the supplied value on the right hand side (i.e. for OP_LT, the comparison is actual_value < value.
- 参数:
write_op-- operation to add this action to
key-- which omap value to compare
comparison_operator-- one of LIBRADOS_CMPXATTR_OP_EQ, LIBRADOS_CMPXATTR_OP_LT, or LIBRADOS_CMPXATTR_OP_GT
val-- value to compare with
val_len-- length of value in bytes
prval-- where to store the return value from this action
- void rados_write_op_omap_cmp2(rados_write_op_t write_op, const char *key, uint8_t comparison_operator, const char *val, size_t key_len, size_t val_len, int *prval)
Ensure that the an omap value satisfies a comparison, with the supplied value on the right hand side (i.e. for OP_LT, the comparison is actual_value < value.
- 参数:
write_op-- operation to add this action to
key-- which omap value to compare
comparison_operator-- one of LIBRADOS_CMPXATTR_OP_EQ, LIBRADOS_CMPXATTR_OP_LT, or LIBRADOS_CMPXATTR_OP_GT
val-- value to compare with
key_len -- length of key in bytes
val_len-- length of value in bytes
prval-- where to store the return value from this action
- void rados_write_op_omap_cmp2(rados_write_op_t write_op, const char *name, const char *值, size_t value_len)
Set an xattr
- 参数:
write_op-- operation to add this action to
name -- name of the xattr
值 -- buffer to set xattr to
value_len -- length of buffer to set xattr to
- void rados_write_op_rmxattr(rados_write_op_t write_op, const char *name)
Remove an xattr
- 参数:
write_op-- operation to add this action to
name -- name of the xattr to remove
- void rados_write_op_create(rados_write_op_t write_op, int exclusive, const char *category)
Create the object
- 参数:
write_op-- operation to add this action to
exclusive -- set to either LIBRADOS_CREATE_EXCLUSIVE or LIBRADOS_CREATE_IDEMPOTENT will error if the object already exists.
category -- category string (DEPRECATED, HAS NO EFFECT)
- void rados_write_op_write(rados_write_op_t write_op, const char *buffer, size_t len, uint64_t offset)
Write to offset
- 参数:
write_op-- operation to add this action to
offset -- offset to write to
buffer -- bytes to write
len -- length of buffer
- void rados_write_op_write_full(rados_write_op_t write_op, const char *buffer, size_t len)
Write whole object, atomically replacing it.
- 参数:
write_op-- operation to add this action to
buffer -- bytes to write
len -- length of buffer
- void rados_write_op_writesame(rados_write_op_t write_op, const char *buffer, size_t data_len, size_t write_len, uint64_t offset)
Write the same buffer multiple times
- 参数:
write_op-- operation to add this action to
buffer -- bytes to write
data_len -- length of buffer
write_len -- total number of bytes to write, as a multiple of
data_len
offset -- offset to write to
- void rados_write_op_append(rados_write_op_t write_op, const char *buffer, size_t len)
Append to end of object.
- 参数:
write_op-- operation to add this action to
buffer -- bytes to write
len -- length of buffer
- void rados_write_op_remove(rados_write_op_t write_op)
Remove object
- 参数:
write_op-- operation to add this action to
- void rados_write_op_truncate(rados_write_op_t write_op, uint64_t offset)
Truncate an object
- 参数:
write_op-- operation to add this action to
offset -- Offset to truncate to
- void rados_write_op_zero(rados_write_op_t write_op, uint64_t offset, uint64_t len)
Zero part of an object
- 参数:
write_op-- operation to add this action to
offset -- Offset to zero
len -- length to zero
- void rados_write_op_exec(rados_write_op_t write_op, const char *cls, const char *方法, const char *in_buf, size_t in_len, int *prval)
Execute an OSD class method on an object See rados_exec() for general description.
- 参数:
write_op-- operation to add this action to
cls-- the name of the class
方法-- the name of the method
in_buf-- where to find input
in_len-- length of in_buf in bytes
prval-- where to store the return value from the method
- void rados_write_op_omap_set(rados_write_op_t write_op, char const *const *keys, char const *const *vals, const size_t *lens, size_t num)
在对象上设置键/值对
- 参数:
write_op-- operation to add this action to
keys-- 代表要设置的键的 null 终止 char 数组数组
vals-- 要设置的值的指针数组
lens-- 每个值的长度数组
num-- 要设置的键/值对的数量
- void rados_write_op_omap_set2(rados_write_op_t write_op, char const *const *keys, char const *const *vals, const size_t *key_lens, const size_t *val_lens, size_t num)
在对象上设置键/值对
- 参数:
write_op-- operation to add this action to
keys-- 代表要设置的键的 null 终止 char 数组数组
vals-- 要设置的值的指针数组
key_lens -- array of lengths corresponding to each key
val_lens-- 每个值的长度数组
num-- 要设置的键/值对的数量
- void rados_write_op_rmxattr(rados_write_op_t write_op, char const *const *keys, size_t keys_len)
从对象中删除键/值对
- 参数:
write_op-- operation to add this action to
keys-- 代表要删除的键的 null 终止 char 数组数组
keys_len-- 要删除的键/值对的数量
- void rados_write_op_omap_rm_keys2(rados_write_op_t write_op, char const *const *keys, const size_t *key_lens, size_t keys_len)
从对象中删除键/值对
- 参数:
write_op-- operation to add this action to
keys-- char 数组数组,代表要删除的键
key_lens-- size_t 值数组,代表每个键的长度
keys_len-- 要删除的键/值对的数量
- void rados_write_op_omap_rm_range2(rados_write_op_t write_op, const char *key_begin, size_t key_begin_len, const char *key_end, size_t key_end_len)
从键范围 [key_begin, key_end) 的对象中删除键/值对
- 参数:
write_op-- operation to add this action to
key_begin-- 要删除的键范围的下限
key_begin_len-- key_begin 的长度
key_end-- 要删除的键范围的上限
key_end_len-- key_end 的长度
- void rados_write_op_omap_clear(rados_write_op_t write_op)
从对象中删除所有键/值对
- 参数:
write_op-- operation to add this action to
- void rados_write_op_set_alloc_hint(rados_write_op_t write_op, uint64_t expected_object_size, uint64_t expected_write_size)
为对象设置分配提示
- 参数:
write_op-- operation to add this action to
expected_object_size-- 对象的预期大小,以字节为单位
expected_write_size-- 写入对象的预期大小,以字节为单位
- void rados_write_op_set_alloc_hint2(rados_write_op_t write_op, uint64_t expected_object_size, uint64_t expected_write_size, uint32_t flags)
为对象设置分配提示
- 参数:
write_op-- operation to add this action to
expected_object_size-- 对象的预期大小,以字节为单位
expected_write_size-- 写入对象的预期大小,以字节为单位
flags-- 关于未来 IO 模式的提示
- int rados_write_op_operate(rados_write_op_t write_op, rados_ioctx_t io, const char *oid, time_t *mtime, int flags)
同步执行写入操作
- 参数:
write_op-- operation to perform
io-- the ioctx that the object is in
oid-- the object id
mtime-- 设置 mtime 的时间,NULL 为当前时间
flags-- flags to apply to the entire operation (LIBRADOS_OPERATION_*
- int rados_write_op_operate2(rados_write_op_t write_op, rados_ioctx_t io, const char *oid, struct timespec *mtime, int flags)
同步执行写入操作
- 参数:
write_op-- operation to perform
io-- the ioctx that the object is in
oid-- the object id
mtime-- 设置 mtime 的时间,NULL 为当前时间
flags-- flags to apply to the entire operation (LIBRADOS_OPERATION_*
- int rados_aio_write_op_operate(rados_write_op_t write_op, rados_ioctx_t io, rados_completion_t completion, const char *oid, time_t *mtime, int flags)
异步执行写入操作
- 参数:
write_op-- operation to perform
io-- the ioctx that the object is in
completion-- what to do when operation has been attempted
oid-- the object id
mtime-- 设置 mtime 的时间,NULL 为当前时间
flags-- flags to apply to the entire operation (LIBRADOS_OPERATION_*
- int rados_aio_write_op_operate2(rados_write_op_t write_op, rados_ioctx_t io, rados_completion_t completion, const char *oid, struct timespec *mtime, int flags)
异步执行写入操作
- 参数:
write_op-- operation to perform
io-- the ioctx that the object is in
completion-- what to do when operation has been attempted
oid-- the object id
mtime-- 设置 mtime 的时间,NULL 为当前时间
flags-- flags to apply to the entire operation (LIBRADOS_OPERATION_*
- rados_read_op_t rados_create_read_op(void)
创建一个新的 rados_write_op_t 写入操作。这将存储所有要原子执行的操作。当您完成使用它时,您必须调用 rados_release_write_op。
Note
写入操作的所有权传递给执行操作的功能,因此执行相同的
rados_read_op_t
实例在被执行后无法再次使用。
- 返回:
非空值,内存分配错误时为 NULL。
- void rados_release_write_op(rados_read_op_t read_op)
释放 rados_read_op_t,完成使用时必须调用。
- 参数:
read_op-- 要释放的操作,使用 rados_create_read_op 创建
- void rados_read_op_set_flags(rados_read_op_t read_op, int flags)
为此 read_op 添加的最后一个操作设置标志。至少有一个 op 必须已添加到 read_op。
- 参数:
read_op-- operation to add this action to
flags-- 查看 librados.h 中以 LIBRADOS_OP_FLAG 开头的常量
- void rados_read_op_assert_exists(rados_read_op_t read_op)
在读取之前确保对象存在
- 参数:
read_op-- operation to add this action to
- void rados_read_op_assert_version(rados_read_op_t read_op, uint64_t ver)
在读取之前确保对象存在并且其内部版本号等于“ver”。“ver”应该是之前使用 rados_get_last_version() 获取的版本号。
如果对象的版本大于断言的版本,则 rados_read_op_operate 将返回 -ERANGE 而不是执行操作。
如果对象的版本小于断言的版本,则 rados_read_op_operate 将返回 -EOVERFLOW 而不是执行操作。
- 参数:
read_op-- operation to add this action to
ver-- 对象版本号
- void rados_read_op_cmpext(rados_read_op_t read_op, const char *cmp_buf, size_t cmp_len, uint64_t off, int *prval)
确保给定的对象范围(范围)满足比较。
- 参数:
read_op-- operation to add this action to
cmp_buf-- 包含要与对象内容进行比较的字节的缓冲区
cmp_len-- 比较的长度和对象定位器的大小
cmp_buf
in bytesoff-- 对象字节偏移量,从中开始比较
prval-- 比较返回的结果,成功时为 0,失败时为负错误代码,(-MAX_ERRNO - mismatch_off) 在不匹配的情况下
- void rados_read_op_cmpxattr(rados_read_op_t read_op, const char *name, uint8_t comparison_operator, const char *值, size_t value_len)
确保给定的 xattr 满足比较。如果比较不满足,操作的返回代码将为 -ECANCELED
- 参数:
read_op-- operation to add this action to
name-- 要查找的 xattr 名称
comparison_operator-- 目前未记录,查找 librados.h 中的 LIBRADOS_CMPXATTR_OP_EQ
值-- 用于比较实际 xattr 值的缓冲区
value_len-- 用于比较实际 xattr 值的缓冲区的长度
- void rados_read_op_getxattrs(rados_read_op_t read_op, rados_xattrs_iter_t *iter, int *prval)
Start iterating over xattrs on an object.
- 参数:
read_op-- operation to add this action to
iter-- where to store the iterator
prval-- where to store the return value of this action
- void rados_read_op_omap_cmp(rados_read_op_t read_op, const char *key, uint8_t comparison_operator, const char *val, size_t val_len, int *prval)
Ensure that the an omap value satisfies a comparison, with the supplied value on the right hand side (i.e. for OP_LT, the comparison is actual_value < value.
- 参数:
read_op-- operation to add this action to
key-- which omap value to compare
comparison_operator-- one of LIBRADOS_CMPXATTR_OP_EQ, LIBRADOS_CMPXATTR_OP_LT, or LIBRADOS_CMPXATTR_OP_GT
val-- value to compare with
val_len-- length of value in bytes
prval-- where to store the return value from this action
- void rados_read_op_omap_cmp2(rados_read_op_t read_op, const char *key, uint8_t comparison_operator, const char *val, size_t key_len, size_t val_len, int *prval)
Ensure that the an omap value satisfies a comparison, with the supplied value on the right hand side (i.e. for OP_LT, the comparison is actual_value < value.
- 参数:
read_op-- operation to add this action to
key-- which omap value to compare
comparison_operator-- one of LIBRADOS_CMPXATTR_OP_EQ, LIBRADOS_CMPXATTR_OP_LT, or LIBRADOS_CMPXATTR_OP_GT
val-- value to compare with
key_len -- length of key in bytes
val_len-- length of value in bytes
prval-- where to store the return value from this action
- void rados_read_op_stat(rados_read_op_t read_op, uint64_t *psize, time_t *pmtime, int *prval)
Get object size and mtime
- 参数:
read_op-- operation to add this action to
psize-- where to store object size
pmtime-- where to store modification time
prval-- where to store the return value of this action
- void rados_read_op_stat2(rados_read_op_t read_op, uint64_t *psize, struct timespec *pmtime, int *prval)
- void rados_read_op_exec(rados_read_op_t read_op, uint64_t offset, size_t len, char *buffer, size_t *bytes_read, int *prval)
Read bytes from offset into buffer.
prlen will be filled with the number of bytes read if successful. A short read can only occur if the read reaches the end of the object.
- 参数:
read_op-- operation to add this action to
offset -- offset to read from
len -- length of buffer
buffer -- where to put the data
bytes_read -- where to store the number of bytes read by this action
prval-- where to store the return value of this action
- void rados_read_op_checksum(rados_read_op_t read_op, rados_checksum_type_t type, const char *init_value, size_t init_value_len, uint64_t offset, size_t len, size_t chunk_size, char *pchecksum, size_t checksum_len, int *prval)
Compute checksum from object data
- 参数:
read_op-- operation to add this action to
type -- the checksum algorithm to utilize
init_value -- the init value for the algorithm
init_value_len -- the length of the init value
offset -- the offset to start checksumming in the object
len -- the number of bytes to checksum
chunk_size -- optional length-aligned chunk size for checksums
pchecksum -- where to store the checksum result for this action
checksum_len -- the number of bytes available for the result
prval -- where to store the return value for this action
- void rados_read_op_exec(rados_read_op_t read_op, const char *cls, const char *方法, const char *in_buf, size_t in_len, char **out_buf, size_t *out_len, int *prval)
Execute an OSD class method on an object See rados_exec() for general description.
The output buffer is allocated on the heap; the caller is expected to release that memory with rados_buffer_free(). The buffer and length pointers can all be NULL, in which case they are not filled in.
- 参数:
read_op-- operation to add this action to
cls-- the name of the class
方法-- the name of the method
in_buf-- where to find input
in_len-- length of in_buf in bytes
out_buf-- where to put librados-allocated output buffer
out_len-- length of out_buf in bytes
prval-- where to store the return value from the method
- void rados_read_op_exec_user_buf(rados_read_op_t read_op, const char *cls, const char *方法, const char *in_buf, size_t in_len, char *out_buf, size_t out_len, size_t *used_len, int *prval)
Execute an OSD class method on an object See rados_exec() for general description.
If the output buffer is too small, prval will be set to -ERANGE and used_len will be 0.
- 参数:
read_op-- operation to add this action to
cls-- the name of the class
方法-- the name of the method
in_buf-- where to find input
in_len-- length of in_buf in bytes
out_buf-- user-provided buffer to read into
out_len-- length of out_buf in bytes
used_len-- where to store the number of bytes read into out_buf
prval-- where to store the return value from the method
- void rados_read_op_omap_get_vals(rados_read_op_t read_op, const char *start_after, const char *filter_prefix, uint64_t max_return, rados_omap_iter_t *iter, int *prval) __attribute__((deprecated)
Start iterating over key/value pairs on an object.
They will be returned sorted by key.
- 参数:
read_op-- operation to add this action to
start_after-- list keys starting after start_after
filter_prefix-- list only keys beginning with filter_prefix
max_return-- list no more than max_return key/value pairs
iter-- where to store the iterator
prval-- where to store the return value from this action
- void rados_read_op_omap_get_vals2(rados_read_op_t read_op, const char *start_after, const char *filter_prefix, uint64_t max_return, rados_omap_iter_t *iter, unsigned char *pmore, int *prval)
Start iterating over key/value pairs on an object.
They will be returned sorted by key.
- 参数:
read_op-- operation to add this action to
start_after-- list keys starting after start_after
filter_prefix-- list only keys beginning with filter_prefix
max_return-- list no more than max_return key/value pairs
iter-- where to store the iterator
pmore-- flag indicating whether there are more keys to fetch
prval-- where to store the return value from this action
- void rados_read_op_omap_get_keys(rados_read_op_t read_op, const char *start_after, uint64_t max_return, rados_omap_iter_t *iter, int *prval) __attribute__((deprecated)
Start iterating over keys on an object.
They will be returned sorted by key, and the iterator will fill in NULL for all values if specified.
- 参数:
read_op-- operation to add this action to
start_after-- list keys starting after start_after
max_return-- list no more than max_return keys
iter-- where to store the iterator
prval-- where to store the return value from this action
- void rados_read_op_omap_get_keys2(rados_read_op_t read_op, const char *start_after, uint64_t max_return, rados_omap_iter_t *iter, unsigned char *pmore, int *prval)
Start iterating over keys on an object.
They will be returned sorted by key, and the iterator will fill in NULL for all values if specified.
- 参数:
read_op-- operation to add this action to
start_after-- list keys starting after start_after
max_return-- list no more than max_return keys
iter-- where to store the iterator
pmore-- flag indicating whether there are more keys to fetch
prval-- where to store the return value from this action
- void rados_read_op_omap_get_vals_by_keys(rados_read_op_t read_op, char const *const *keys, size_t keys_len, rados_omap_iter_t *iter, int *prval)
Start iterating over specific key/value pairs
They will be returned sorted by key.
- 参数:
read_op-- operation to add this action to
keys-- array of pointers to null-terminated keys to get
keys_len-- the number of strings in keys
iter-- where to store the iterator
prval-- where to store the return value from this action
- void rados_read_op_omap_get_vals_by_keys2(rados_read_op_t read_op, char const *const *keys, size_t num_keys, const size_t *key_lens, rados_omap_iter_t *iter, int *prval)
Start iterating over specific key/value pairs
They will be returned sorted by key.
- 参数:
read_op-- operation to add this action to
keys-- array of pointers to keys to get
num_keys-- the number of strings in keys
key_lens-- array of size_t’s describing each key len (in bytes)
iter-- where to store the iterator
prval-- where to store the return value from this action
- int rados_read_op_operate(rados_read_op_t read_op, rados_ioctx_t io, const char *oid, int flags)
Perform a read operation synchronously
- 参数:
read_op-- operation to perform
io-- the ioctx that the object is in
oid-- the object id
flags-- flags to apply to the entire operation (LIBRADOS_OPERATION_*
- int rados_aio_read_op_operate(rados_read_op_t read_op, rados_ioctx_t io, rados_completion_t completion, const char *oid, int flags)
Perform a read operation asynchronously
- 参数:
read_op-- operation to perform
io-- the ioctx that the object is in
completion-- what to do when operation has been attempted
oid-- the object id
flags-- flags to apply to the entire operation (LIBRADOS_OPERATION_*
Defines
- CEPH_OSD_TMAP_HDR
- CEPH_OSD_TMAP_SET
- CEPH_OSD_TMAP_CREATE
- CEPH_OSD_TMAP_RM
- LIBRADOS_VER_MAJOR
- LIBRADOS_VER_MINOR
- LIBRADOS_VER_EXTRA
- LIBRADOS_VERSION(maj, min, extra)
- LIBRADOS_VERSION_CODE
- LIBRADOS_SUPPORTS_WATCH
- LIBRADOS_SUPPORTS_SERVICES
- LIBRADOS_SUPPORTS_GETADDRS
- LIBRADOS_SUPPORTS_APP_METADATA
- LIBRADOS_LOCK_FLAG_RENEW
- LIBRADOS_LOCK_FLAG_MAY_RENEW
- LIBRADOS_LOCK_FLAG_MUST_RENEW
- LIBRADOS_CREATE_EXCLUSIVE
- LIBRADOS_CREATE_IDEMPOTENT
- CEPH_RADOS_API
- LIBRADOS_SNAP_HEAD
- LIBRADOS_SNAP_DIR
- VOIDPTR_RADOS_T
Typedefs
- typedef void *rados_t
A handle for interacting with a RADOS cluster. It encapsulates all RADOS client configuration, including username, key for authentication, logging, and debugging. Talking to different clusters — or to the same cluster with different users — requires different cluster handles.
- typedef void *rados_config_t
A handle for the ceph configuration context for the rados_t cluster instance. This can be used to share configuration context/state (e.g., logging configuration) between librados instance.
警告
The config context does not have independent reference counting. As such, a rados_config_t handle retrieved from a given rados_t is only valid as long as that rados_t.
- typedef void *rados_ioctx_t
An io context encapsulates a few settings for all I/O operations done on it:
pool - set when the io context is created (see rados_ioctx_create())
snapshot context for writes (see rados_ioctx_selfmanaged_snap_set_write_ctx())
snapshot id to read from (see rados_ioctx_snap_set_read())
object locator for all single-object operations (see rados_ioctx_locator_set_key())
namespace for all single-object operations (see rados_ioctx_set_namespace()). Set to LIBRADOS_ALL_NSPACES before rados_nobjects_list_open() will list all objects in all namespaces.
警告
Changing any of these settings is not thread-safe - librados users must synchronize any of these changes on their own, or use separate io contexts for each thread
- typedef void *rados_list_ctx_t
An iterator for listing the objects in a pool. Used with rados_nobjects_list_open(), rados_nobjects_list_next(), rados_nobjects_list_next2(), and rados_nobjects_list_close().
- typedef void *rados_object_list_cursor
The cursor used with rados_enumerate_objects and accompanying methods.
- typedef uint64_t rados_snap_t
The id of a snapshot.
- typedef void *rados_xattrs_iter_t
An iterator for listing extended attrbutes on an object. Used with rados_getxattrs(), rados_getxattrs_next(), and rados_getxattrs_end().
- typedef void *rados_omap_iter_t
An iterator for listing omap key/value pairs on an object. Used with rados_read_op_omap_get_keys(), rados_read_op_omap_get_vals(), rados_read_op_omap_get_vals_by_keys(), rados_omap_get_next(), and rados_omap_get_end().
- typedef void *rados_write_op_t
An object write operation stores a number of operations which can be executed atomically. For usage, see:
Creation and deletion: rados_create_write_op() rados_release_write_op()
Extended attribute manipulation: rados_write_op_cmpxattr() rados_write_op_cmpxattr(), rados_write_op_setxattr(), rados_write_op_rmxattr()
Object map key/value pairs: rados_write_op_omap_set(), rados_write_op_omap_rm_keys(), rados_write_op_omap_clear(), rados_write_op_omap_cmp()
Object properties: rados_write_op_assert_exists(), rados_write_op_assert_version()
Creating objects: rados_write_op_create()
IO on objects: rados_write_op_append(), rados_write_op_write(), rados_write_op_zero rados_write_op_write_full(), rados_write_op_writesame(), rados_write_op_remove, rados_write_op_truncate(), rados_write_op_zero(), rados_write_op_cmpext()
Hints: rados_write_op_set_alloc_hint()
Performing the operation: rados_write_op_operate(), rados_aio_write_op_operate()
- typedef void *rados_read_op_t
An object read operation stores a number of operations which can be executed atomically. For usage, see:
Creation and deletion: rados_create_read_op() rados_release_read_op()
Extended attribute manipulation: rados_read_op_cmpxattr(), rados_read_op_getxattr(), rados_read_op_getxattrs()
Object map key/value pairs: rados_read_op_omap_get_vals(), rados_read_op_omap_get_keys(), rados_read_op_omap_get_vals_by_keys(), rados_read_op_omap_cmp()
Object properties: rados_read_op_stat(), rados_read_op_assert_exists(), rados_read_op_assert_version()
IO on objects: rados_read_op_read(), rados_read_op_checksum(), rados_read_op_cmpext()
Custom operations: rados_read_op_exec(), rados_read_op_exec_user_buf()
Request properties: rados_read_op_set_flags()
Performing the operation: rados_read_op_operate(), rados_aio_read_op_operate()
- typedef void *rados_completion_t
Represents the state of an asynchronous operation - it contains the return value once the operation completes, and can be used to block until the operation is complete or safe.
Enums
- enum [anonymous]
Values:
- enumerator LIBRADOS_OP_FLAG_EXCL
- enumerator LIBRADOS_OP_FLAG_FAILOK
- enumerator LIBRADOS_OP_FLAG_FADVISE_RANDOM
- enumerator LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL
- enumerator LIBRADOS_OP_FLAG_FADVISE_WILLNEED
- enumerator LIBRADOS_OP_FLAG_FADVISE_DONTNEED
- enumerator LIBRADOS_OP_FLAG_FADVISE_NOCACHE
- enumerator LIBRADOS_OP_FLAG_FADVISE_FUA
Functions
- void rados_version(int *major, int *minor, int *extra)
Get the version of librados.
The version number is major.minor.extra. Note that this is unrelated to the Ceph version number.
TODO: define version semantics, i.e.:
incrementing major is for backwards-incompatible changes
incrementing minor is for backwards-compatible changes
incrementing extra is for bug fixes
- 参数:
major-- where to store the major version number
minor-- where to store the minor version number
extra-- where to store the extra version number
- int rados_cluster_stat(rados_t 集群, struct rados_cluster_stat_t *result)
Read usage info about the cluster
This tells you total space, space used, space available, and number of objects. These are not updated immediately when data is written, they are eventually consistent.
- 参数:
集群-- cluster to query
result-- where to store the results
- 返回:
0 on success, negative error code on failure
- int rados_cluster_fsid(rados_t 集群, char *buf, size_t len)
Get the fsid of the cluster as a hexadecimal string.
The fsid is a unique id of an entire Ceph cluster.
- 参数:
集群-- where to get the fsid
buf-- where to write the fsid
len-- the size of buf in bytes (should be 37)
- 返回:
0 on success, negative error code on failure
- 返回:
-ERANGE if the buffer is too short to contain the fsid
- int rados_wait_for_latest_osdmap(rados_t 集群)
Get/wait for the most recent osdmap
- 参数:
集群-- the cluster to shutdown
- 返回:
0 on success, negative error code on failure
- int rados_omap_get_next(rados_omap_iter_t iter, char **key, char **val, size_t *len)
Get the next omap key/value pair on the object
- 参数:
iter-- iterator to advance
key-- where to store the key of the next omap entry
val-- where to store the value of the next omap entry
len-- where to store the number of bytes in val
- Pre:
iter is a valid iterator
- Post:
key and val are the next key/value pair. key is null-terminated, and val has length len. If the end of the list has been reached, key and val are NULL, and len is 0. key and val will not be accessible after rados_omap_get_end() is called on iter, so if they are needed after that they should be copied.
- 返回:
0 on success, negative error code on failure
- int rados_omap_get_next2(rados_omap_iter_t iter, char **key, char **val, size_t *key_len, size_t *val_len)
Get the next omap key/value pair on the object. Note that it’s perfectly safe to mix calls to rados_omap_get_next and rados_omap_get_next2.
- 参数:
iter-- iterator to advance
key-- where to store the key of the next omap entry
val-- where to store the value of the next omap entry
key_len-- where to store the number of bytes in key
val_len-- where to store the number of bytes in val
- Pre:
iter is a valid iterator
- Post:
key and val are the next key/value pair. key has length keylen and val has length vallen. If the end of the list has been reached, key and val are NULL, and keylen and vallen is 0. key and val will not be accessible after rados_omap_get_end() is called on iter, so if they are needed after that they should be copied.
- 返回:
0 on success, negative error code on failure
- unsigned int rados_omap_iter_size(rados_omap_iter_t iter)
Return number of elements in the iterator
- 参数:
iter-- the iterator of which to return the size
- void rados_omap_get_end(rados_omap_iter_t iter)
Close the omap iterator.
iter should not be used after this is called.
- 参数:
iter-- the iterator to close
- int rados_stat(rados_ioctx_t io, const char *o, uint64_t *psize, time_t *pmtime)
Get object size and most recent update time from the OSD.
- 参数:
io-- ioctx
o-- object name
psize-- where to store object size
pmtime-- where to store modification time
- 返回:
0 on success, negative error code on failure
- int rados_stat2(rados_ioctx_t io, const char *o, uint64_t *psize, struct timespec *pmtime)
- int rados_exec(rados_ioctx_t io, const char *oid, const char *cls, const char *方法, const char *in_buf, size_t in_len, char *buf, size_t out_len)
Execute an OSD class method on an object
The OSD has a plugin mechanism for performing complicated operations on an object atomically. These plugins are called classes. This function allows librados users to call the custom methods. The input and output formats are defined by the class. Classes in ceph.git can be found in src/cls subdirectories
- 参数:
io-- the context in which to call the method
oid-- the object to call the method on
cls-- the name of the class
方法-- the name of the method
in_buf-- where to find input
in_len-- length of in_buf in bytes
buf-- where to store output
out_len-- length of buf in bytes
- 返回:
the length of the output, or -ERANGE if out_buf does not have enough space to store it (For methods that return data). For methods that don't return data, the return value is method-specific.
- int rados_cache_pin(rados_ioctx_t io, const char *o)
Pin an object in the cache tier
When an object is pinned in the cache tier, it stays in the cache tier, and won’t be flushed out.
- 参数:
io-- the pool the object is in
o-- the object id
- 返回:
0 on success, negative error code on failure
- int rados_cache_unpin(rados_ioctx_t io, const char *o)
Unpin an object in the cache tier
After an object is unpinned in the cache tier, it can be flushed out
- 参数:
io-- the pool the object is in
o-- the object id
- 返回:
0 on success, negative error code on failure
- int rados_lock_exclusive(rados_ioctx_t io, const char *oid, const char *name, const char *cookie, const char *desc, struct timeval *duration, uint8_t flags)
Take an exclusive lock on an object.
- 参数:
io-- the context to operate in
oid-- the name of the object
name-- the name of the lock
cookie-- user-defined identifier for this instance of the lock
desc-- user-defined lock description
duration-- the duration of the lock. Set to NULL for infinite duration.
flags-- lock flags
- 返回:
0 on success, negative error code on failure
- 返回:
-EBUSY if the lock is already held by another (client, cookie) pair
- 返回:
-EEXIST if the lock is already held by the same (client, cookie) pair
Take a shared lock on an object.
- 参数:
io-- the context to operate in
o-- the name of the object
name-- the name of the lock
cookie-- user-defined identifier for this instance of the lock
tag-- The tag of the lock
desc-- user-defined lock description
duration-- the duration of the lock. Set to NULL for infinite duration.
flags-- lock flags
- 返回:
0 on success, negative error code on failure
- 返回:
-EBUSY if the lock is already held by another (client, cookie) pair
- 返回:
-EEXIST if the lock is already held by the same (client, cookie) pair
- int rados_unlock(rados_ioctx_t io, const char *o, const char *name, const char *cookie)
Release a shared or exclusive lock on an object.
- 参数:
io-- the context to operate in
o-- the name of the object
name-- the name of the lock
cookie-- user-defined identifier for the instance of the lock
- 返回:
0 on success, negative error code on failure
- 返回:
-ENOENT if the lock is not held by the specified (client, cookie) pair
- int rados_aio_unlock(rados_ioctx_t io, const char *o, const char *name, const char *cookie, rados_completion_t completion)
Asynchronous release a shared or exclusive lock on an object.
- 参数:
io-- the context to operate in
o-- the name of the object
name-- the name of the lock
cookie-- user-defined identifier for the instance of the lock
completion-- what to do when operation has been attempted
- 返回:
0 on success, negative error code on failure
- ssize_t rados_list_lockers(rados_ioctx_t io, const char *o, const char *name, int *exclusive, char *tag, size_t *tag_len, char *clients, size_t *clients_len, char *cookies, size_t *cookies_len, char *addrs, size_t *addrs_len)
List clients that have locked the named object lock and information about the lock.
The number of bytes required in each buffer is put in the corresponding size out parameter. If any of the provided buffers are too short, -ERANGE is returned after these sizes are filled in.
- 参数:
io-- the context to operate in
o-- the name of the object
name-- the name of the lock
exclusive-- where to store whether the lock is exclusive (1) or shared (0)
tag-- where to store the tag associated with the object lock
tag_len-- number of bytes in tag buffer
clients-- buffer in which locker clients are stored, separated by ‘\0’
clients_len-- number of bytes in the clients buffer
cookies-- buffer in which locker cookies are stored, separated by ‘\0’
cookies_len-- number of lockers on success, negative error code on failure
addrs -- buffer in which locker addresses are stored, separated by ‘\0’
addrs_len-- number of bytes in the clients buffer
- 返回:
number of lockers on success, negative error code on failure
- 返回:
-ERANGE if any of the buffers are too short
- int rados_break_lock(rados_ioctx_t io, const char *o, const char *name, const char *客户端, const char *cookie)
Releases a shared or exclusive lock on an object, which was taken by the specified client.
- 参数:
io-- the context to operate in
o-- the name of the object
name-- the name of the lock
客户端-- the client currently holding the lock
cookie-- user-defined identifier for the instance of the lock
- 返回:
0 on success, negative error code on failure
- 返回:
-ENOENT if the lock is not held by the specified (client, cookie) pair
- 返回:
-EINVAL if the client cannot be parsed
- int rados_blocklist_add(rados_t 集群, char *client_address, uint32_t expire_seconds)
Blocklists the specified client from the OSDs
- 参数:
集群-- cluster handle
client_address-- client address
expire_seconds-- number of seconds to blocklist (0 for default)
- 返回:
0 on success, negative error code on failure
- int rados_blacklist_add(rados_t 集群, char *client_address, uint32_t expire_seconds) __attribute__((deprecated)
- int rados_getaddrs(rados_t 集群, char **addrs)
Gets addresses of the RADOS session, suitable for blocklisting.
- 参数:
集群-- cluster handle
addrs-- the output string.
- 返回:
0 on success, negative error code on failure
- void rados_set_osdmap_full_try(rados_ioctx_t io) __attribute__((deprecated)
- void rados_unset_osdmap_full_try(rados_ioctx_t io) __attribute__((deprecated)
- void rados_set_pool_full_try(rados_ioctx_t io)
- void rados_unset_pool_full_try(rados_ioctx_t io)
- int rados_application_enable(rados_ioctx_t io, const char *app_name, int force)
Enable an application on a pool
- 参数:
io-- pool ioctx
app_name-- application name
force-- 0 if only single application per pool
- 返回:
0 on success, negative error code on failure
- int rados_application_list(rados_ioctx_t io, char *values, size_t *values_len)
List all enabled applications
If the provided buffer is too short, the required length is filled in and -ERANGE is returned. Otherwise, the buffers are filled with the application names, with a ‘\0’ after each.
- 参数:
io-- pool ioctx
values-- buffer in which to store application names
values_len-- number of bytes in values buffer
- 返回:
0 on success, negative error code on failure
- 返回:
-ERANGE if either buffer is too short
- int rados_application_metadata_get(rados_ioctx_t io, const char *app_name, const char *key, char *值, size_t *value_len)
Get application metadata value from pool
- 参数:
io-- pool ioctx
app_name-- application name
key-- metadata key
值-- result buffer
value_len-- maximum len of value
- 返回:
0 on success, negative error code on failure
- int rados_application_metadata_set(rados_ioctx_t io, const char *app_name, const char *key, const char *值)
Set application metadata on a pool
- 参数:
io-- pool ioctx
app_name-- application name
key-- metadata key
值-- metadata key
- 返回:
0 on success, negative error code on failure
- int rados_application_metadata_remove(rados_ioctx_t io, const char *app_name, const char *key)
Remove application metadata from a pool
- 参数:
io-- pool ioctx
app_name-- application name
key-- metadata key
- 返回:
0 on success, negative error code on failure
- int rados_application_metadata_list(rados_ioctx_t io, const char *app_name, char *keys, size_t *key_len, char *values, size_t *vals_len)
List all metadata key/value pairs associated with an application.
This iterates over all metadata, key_len and val_len are filled in with the number of bytes put into the keys and values buffers.
If the provided buffers are too short, the required lengths are filled in and -ERANGE is returned. Otherwise, the buffers are filled with the keys and values of the metadata, with a ‘\0’ after each.
- 参数:
io-- pool ioctx
app_name-- application name
keys-- buffer in which to store key names
key_len-- number of bytes in keys buffer
values-- buffer in which to store values
vals_len-- number of bytes in values buffer
- 返回:
0 on success, negative error code on failure
- 返回:
-ERANGE if either buffer is too short
- int rados_objects_list_open(rados_ioctx_t io, rados_list_ctx_t *ctx) __attribute__((deprecated)
- uint32_t rados_objects_list_get_pg_hash_position(rados_list_ctx_t ctx) __attribute__((deprecated)
- uint32_t rados_objects_list_seek(rados_list_ctx_t ctx, uint32_t pos) __attribute__((deprecated)
- int rados_objects_list_next(rados_list_ctx_t ctx, const char **entry, const char **key) __attribute__((deprecated)
- void rados_objects_list_close(rados_list_ctx_t ctx) __attribute__((deprecated)
- struct rados_object_list_item
#include <librados.h>The item populated by rados_object_list in the results array.
- struct rados_pool_stat_t
#include <librados.h>Usage information for a pool.
Public Members
- uint64_t num_bytes
space used in bytes
- uint64_t num_kb
space used in KB
- uint64_t num_objects
number of objects in the pool
- uint64_t num_object_clones
number of clones of objects
- uint64_t num_object_copies
num_objects * num_replicas
- uint64_t num_objects_missing_on_primary
number of objects missing on primary
- uint64_t num_objects_unfound
number of objects found on no OSDs
- uint64_t num_objects_degraded
number of objects replicated fewer times than they should be (but found on at least one OSD)
- uint64_t num_rd
number of objects read
- uint64_t num_rd_kb
objects read in KB
- uint64_t num_wr
number of objects written
- uint64_t num_wr_kb
objects written in KB
- uint64_t num_user_bytes
bytes originally provided by user
- uint64_t compressed_bytes_orig
bytes passed compression
- uint64_t compressed_bytes
bytes resulted after compression
- uint64_t compressed_bytes_alloc
bytes allocated at storage
- struct rados_cluster_stat_t
#include <librados.h>Cluster-wide usage information
由 Ceph 基金会带给您
Ceph 文档是一个社区资源,由非盈利的 Ceph 基金会资助和托管Ceph Foundation. 如果您想支持这一点和我们的其他工作,请考虑加入现在加入.