我首先要指出ctypes官方文档:[Python 3.5]:ctypes - Python 的外部函数库.
我定义了一个更复杂的结构树(2 个嵌套级别)。
数据类型.py:
import ctypes
PRAGMA_PACK = 0
class Struct2(ctypes.Structure):
if PRAGMA_PACK:
_pack_ = PRAGMA_PACK
_fields_ = [
("c_0", ctypes.c_char), # 1B
("s_0", ctypes.c_short), # 2B
("wanted", ctypes.c_int), # 4B
]
class Struct1(ctypes.Structure):
if PRAGMA_PACK:
_pack_ = PRAGMA_PACK
_fields_ = [
("d_0", ctypes.c_double), # 8B
("c_0", ctypes.c_char), # 1B
("struct2_0", Struct2),
]
class Struct0(ctypes.Structure):
if PRAGMA_PACK:
_pack_ = PRAGMA_PACK
_fields_ = [
("i_0", ctypes.c_int), # 4B
("s_0", ctypes.c_short), # 2B
("struct1_0", Struct1),
]
Notes:
- 我指定了感兴趣的成员wanted(部分Struct2这是最深的)
- 处理事情时重要的一件事structs, it's 结盟. Check [MSDN]:#pragma pack更多细节。
In order to illustrate the 2nd bullet (above), I prepared a small example (which has nothing to do with the question).
测试地址.py:
import sys
import ctypes
import data_types
OFFSET_TEXT = "Offset of '{:s}' member in '{:s}' instance: {:3d} (0x{:08X})"
def offset_addressof(child_structure_instance, parent_structure_instance):
return ctypes.addressof(child_structure_instance) - ctypes.addressof(parent_structure_instance)
def print_offset_addressof_data(child_structure_instance, parent_structure_instance):
offset = offset_addressof(child_structure_instance, parent_structure_instance)
print(OFFSET_TEXT.format(child_structure_instance.__class__.__name__, parent_structure_instance.__class__.__name__, offset, offset))
def main():
s0 = data_types.Struct0()
s1 = s0.struct1_0
s2 = s1.struct2_0
print("PRAGMA_PACK: {:d} {:s}\n".format(data_types.PRAGMA_PACK, "" if data_types.PRAGMA_PACK else "(default)"))
print_offset_addressof_data(s1, s0)
print_offset_addressof_data(s2, s1)
print_offset_addressof_data(s2, s0)
print("\nAlignments and sizes:\n\t'{:s}': {:3d} - {:3d}\n\t'{:s}': {:3d} - {:3d}\n\t'{:s}': {:3d} - {:3d}".format(
s0.__class__.__name__, ctypes.alignment(s0), ctypes.sizeof(s0),
s1.__class__.__name__, ctypes.alignment(s1), ctypes.sizeof(s1),
s2.__class__.__name__, ctypes.alignment(s2), ctypes.sizeof(s2)
)
)
#print("Struct0().i_0 type: {:s}".format(s0.i_0.__class__.__name__))
if __name__ == "__main__":
print("Python {:s} on {:s}\n".format(sys.version, sys.platform))
main()
Notes:
- Native C成员类型转换为Python类型,在一个
ctypes.Structure
, and ctypes.addressof
会提高类型错误如果收到这样的论点(检查注释print from main)
- 我尝试使用C不同类型具有相同大小OSes(例如我避免了
ctypes.c_long
其长度为 8 个字节Lnx和 4 个字节长Win(当然是指 64 位版本))
- 两次示例运行之间需要修改源代码。我可以动态生成类,但这会给代码增加不必要的复杂性(并且偏离了我想要表达的观点)
Output:
(py35x64_test) e:\Work\Dev\StackOverflow\q050304516>python test_addressof.py
Python 3.5.4 (v3.5.4:3f56838, Aug 8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)] on win32
PRAGMA_PACK: 0 (default)
Offset of 'Struct1' member in 'Struct0' instance: 8 (0x00000008)
Offset of 'Struct2' member in 'Struct1' instance: 12 (0x0000000C)
Offset of 'Struct2' member in 'Struct0' instance: 20 (0x00000014)
Alignments and sizes:
'Struct0': 8 - 32
'Struct1': 8 - 24
'Struct2': 4 - 8
(py35x64_test) e:\Work\Dev\StackOverflow\q050304516>rem change PRAGMA_PACK = 1 in data_types.py
(py35x64_test) e:\Work\Dev\StackOverflow\q050304516>python test_addressof.py
Python 3.5.4 (v3.5.4:3f56838, Aug 8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)] on win32
PRAGMA_PACK: 1
Offset of 'Struct1' member in 'Struct0' instance: 6 (0x00000006)
Offset of 'Struct2' member in 'Struct1' instance: 9 (0x00000009)
Offset of 'Struct2' member in 'Struct0' instance: 15 (0x0000000F)
Alignments and sizes:
'Struct0': 1 - 22
'Struct1': 1 - 16
'Struct2': 1 - 7
struct_util.py:
import sys
import ctypes
import data_types
WANTED_MEMBER_NAME = "wanted"
FIELDS_MEMBER_NAME = "_fields_"
def _get_padded_size(sizes, align_size):
padded_size = temp = 0
for size in sizes:
if temp >= align_size:
padded_size += temp
temp = size
elif temp + size > align_size:
padded_size += align_size
temp = size
else:
temp += size
if temp:
padded_size += max(size, align_size)
return padded_size
def _get_array_type_sizes(array_type):
if issubclass(array_type._type_, ctypes.Array):
return _get_array_type_sizes(array_type._type_) * array_type._type_._length_
else:
return [array_type._type_] * array_type._length_
def get_nested_offset_recursive(struct_instance, wanted_member_name):
if not isinstance(struct_instance, ctypes.Structure):
return -1
align_size = ctypes.alignment(struct_instance)
base_address = ctypes.addressof(struct_instance)
member_sizes = list()
for member_name, member_type in getattr(struct_instance, FIELDS_MEMBER_NAME, list()):
if member_name == wanted_member_name:
return _get_padded_size(member_sizes, align_size)
if issubclass(member_type, ctypes.Structure):
nested_struct_instance = getattr(struct_instance, member_name)
inner_offset = get_nested_offset_recursive(nested_struct_instance, wanted_member_name)
if inner_offset != -1:
return ctypes.addressof(nested_struct_instance) - base_address + inner_offset
else:
member_sizes.append(ctypes.sizeof(member_type))
else:
if issubclass(member_type, ctypes.Array):
member_sizes.extend(_get_array_type_sizes(member_type))
else:
member_sizes.append(ctypes.sizeof(member_type))
return -1
def _get_struct_instance_from_name(struct_name):
struct_class = getattr(data_types, struct_name, None)
if struct_class:
return struct_class()
def get_nested_offset(struct_name, wanted_member_name):
struct_instance = _get_struct_instance_from_name(struct_name)
return get_nested_offset_recursive(struct_instance, wanted_member_name)
def main():
struct_names = [
"Struct2",
"Struct1",
"Struct0"
]
wanted_member_name = WANTED_MEMBER_NAME
print("PRAGMA_PACK: {:d} {:s}\n".format(data_types.PRAGMA_PACK, "" if data_types.PRAGMA_PACK else "(default)"))
for struct_name in struct_names:
print("'{:s}' offset in '{:s}' (size: {:3d}): {:3d}".format(wanted_member_name,
struct_name,
ctypes.sizeof(_get_struct_instance_from_name(struct_name)),
get_nested_offset(struct_name, wanted_member_name)))
if __name__ == "__main__":
print("Python {:s} on {:s}\n".format(sys.version, sys.platform))
main()
Notes:
- 该代码(waaaay)比我最初预期的更复杂。我认为有一个更简单的方法,但我就是看不到。希望我没有错过如此明显的事情,整个事情可以用 2 - 3 行代码完成
- 它应该适用于任何结构,尽管有(很多)情况我没有测试(特别是结构数组,其中有些情况不起作用)
- It will stop at the 1st member occurrence found
- Functions (1 by 1):
-
get_nested_offset_recursive - core function: recursively searches for the member in the structures and calculates its offset. There are 2 cases:
- 会员处于child结构(或child's child, ...): 偏移到child结构体的计算方法是减去 2 个结构体地址(使用
ctypes.addressof
)
- 成员位于当前结构中(复杂情况):计算偏移量时考虑其之前成员的大小以及结构对齐
-
_get_padd_size- 尝试适应成员尺寸(在我们关心的尺寸之前)对齐大小大块,并返回块大小总和
-
_get_array_type_sizes- 数组不是atomic(从对齐PoV): a
char c[10];
成员可以替换为char c0, c1, ..., c9;
。这就是这个函数的作用(递归)
-
_get_struct_instance_from_\name- 辅助函数或便利函数:返回结构名称的实例(在数据类型模块)作为参数给出
-
获取嵌套偏移量- 包装函数
Output(同上原理):
(py35x64_test) e:\Work\Dev\StackOverflow\q050304516>python struct_util.py
Python 3.5.4 (v3.5.4:3f56838, Aug 8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)] on win32
PRAGMA_PACK: 0 (default)
'wanted' offset in 'Struct2' (size: 8): 4
'wanted' offset in 'Struct1' (size: 24): 16
'wanted' offset in 'Struct0' (size: 32): 24
(py35x64_test) e:\Work\Dev\StackOverflow\q050304516>rem change PRAGMA_PACK = 1 in data_types.py
(py35x64_test) e:\Work\Dev\StackOverflow\q050304516>python struct_util.py
Python 3.5.4 (v3.5.4:3f56838, Aug 8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)] on win32
PRAGMA_PACK: 1
'wanted' offset in 'Struct2' (size: 7): 3
'wanted' offset in 'Struct1' (size: 16): 12
'wanted' offset in 'Struct0' (size: 22): 18
@EDIT0:
As I specified in the 1st and (especially) the 2nd notes, I wasn't happy with the solution, mainly because even if it works the current scenario, it doesn't for the general one (nesting arrays and structures). Then I came across [SO]: Ctypes: Get a pointer to a struct field
(@MarkTolonen's answer), and took a different approach.
数据类型.py (add前面内容的代码如下):
class Struct0_1(ctypes.Structure):
if PRAGMA_PACK:
_pack_ = PRAGMA_PACK
_fields_ = [
("i_0", ctypes.c_int), # 4B
("s_0", ctypes.c_short), # 2B
("struct1_0_2", Struct1 * 2),
("i_1", ctypes.c_int * 2), # 2 * 4B
("struct1_1", Struct1),
("i_2", ctypes.c_int), # 4B
("struct1_2_3", Struct1 * 3),
]
struct_util_v2.py:
import sys
import ctypes
import data_types
WANTED_MEMBER_NAME = "wanted"
def _get_nested_offset_recursive_struct(struct_ctype, member_name):
for struct_member_name, struct_member_ctype in struct_ctype._fields_:
struct_member = getattr(struct_ctype, struct_member_name)
offset = struct_member.offset
if struct_member_name == member_name:
return offset
else:
if issubclass(struct_member_ctype, ctypes.Structure):
inner_offset = _get_nested_offset_recursive_struct(struct_member_ctype, member_name)
elif issubclass(struct_member_ctype, ctypes.Array):
inner_offset = _get_nested_offset_recursive_array(struct_member_ctype, member_name)
else:
inner_offset = -1
if inner_offset != -1:
return inner_offset + offset
return -1
def _get_nested_offset_recursive_array(array_ctype, member_name):
array_base_ctype = array_ctype._type_
for idx in range(array_ctype._length_):
if issubclass(array_base_ctype, ctypes.Structure):
inner_offset = _get_nested_offset_recursive_struct(array_base_ctype, member_name)
elif issubclass(array_base_ctype, ctypes.Array):
inner_offset = _get_nested_offset_recursive_array(array_base_ctype, member_name)
else:
inner_offset = -1
return inner_offset
def get_nested_offset_recursive(ctype, member_name, nth=1):
if issubclass(ctype, ctypes.Structure):
return _get_nested_offset_recursive_struct(ctype, member_name)
elif issubclass(ctype, ctypes.Array):
return _get_nested_offset_recursive_array(ctype, member_name)
else:
return -1
def main():
struct_names = [
"Struct2",
"Struct1",
"Struct0",
"Struct0_1",
]
member_name = WANTED_MEMBER_NAME
print("PRAGMA_PACK: {:d} {:s}\n".format(data_types.PRAGMA_PACK, "" if data_types.PRAGMA_PACK else "(default)"))
for struct_name in struct_names:
struct_ctype = getattr(data_types, struct_name)
print("'{:s}' offset in '{:s}' (size: {:3d}): {:3d}".format(member_name,
struct_name,
ctypes.sizeof(struct_ctype),
get_nested_offset_recursive(struct_ctype, member_name)))
if __name__ == "__main__":
print("Python {:s} on {:s}\n".format(sys.version, sys.platform))
main()
Notes:
- 不再使用实例,因为偏移元数据存储在类本身中(地址不再需要)
- 对于新添加的结构,以前的代码不起作用
- 新代码的真正威力在于处理获取嵌套偏移量递归's nth参数(现在什么也不做 - 可以删除)告诉应该报告成员名称的哪个出现的偏移量(它只对结构数组有意义),但这有点复杂,因此需要更多代码
- 争论的主题可能是结构成员是指向结构的指针(有些人可能会认为将它们视为数组),但我认为由于此类(内部)结构驻留在另一个内存区域中,因此只需跳过它们(事实上,使用这种方法与决定无关)
Output:
(py35x64_test) e:\Work\Dev\StackOverflow\q050304516>python struct_util_v2.py
Python 3.5.4 (v3.5.4:3f56838, Aug 8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)] on win32
PRAGMA_PACK: 0 (default)
'wanted' offset in 'Struct2' (size: 8): 4
'wanted' offset in 'Struct1' (size: 24): 16
'wanted' offset in 'Struct0' (size: 32): 24
'wanted' offset in 'Struct0_1' (size: 168): 24
(py35x64_test) e:\Work\Dev\StackOverflow\q050304516>rem change PRAGMA_PACK = 1 in data_types.py
(py35x64_test) e:\Work\Dev\StackOverflow\q050304516>python struct_util_v2.py
Python 3.5.4 (v3.5.4:3f56838, Aug 8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)] on win32
PRAGMA_PACK: 1
'wanted' offset in 'Struct2' (size: 7): 3
'wanted' offset in 'Struct1' (size: 16): 12
'wanted' offset in 'Struct0' (size: 22): 18
'wanted' offset in 'Struct0_1' (size: 114): 18
@EDIT1:
添加了对以下内容的支持nth参数(重命名为:index).
struct_util_v3.py:
import sys
import ctypes
import data_types
WANTED_MEMBER_NAME = "wanted"
OFFSET_INVALID = -1
def _get_nested_offset_recursive_struct(struct_ctype, member_name, index):
current_index = 0
for struct_member_name, struct_member_ctype in struct_ctype._fields_:
struct_member = getattr(struct_ctype, struct_member_name)
offset = struct_member.offset
if struct_member_name == member_name:
if index == 0:
return offset, 0
else:
current_index += 1
else:
if issubclass(struct_member_ctype, ctypes.Structure):
inner_offset, occurences = _get_nested_offset_recursive_struct(struct_member_ctype, member_name, index - current_index)
elif issubclass(struct_member_ctype, ctypes.Array):
inner_offset, occurences = _get_nested_offset_recursive_array(struct_member_ctype, member_name, index - current_index)
else:
inner_offset, occurences = OFFSET_INVALID, 0
if inner_offset != OFFSET_INVALID:
return inner_offset + offset, 0
else:
current_index += occurences
return OFFSET_INVALID, current_index
def _get_nested_offset_recursive_array(array_ctype, member_name, index):
array_base_ctype = array_ctype._type_
array_base_ctype_size = ctypes.sizeof(array_base_ctype)
current_index = 0
for idx in range(array_ctype._length_):
if issubclass(array_base_ctype, ctypes.Structure):
inner_offset, occurences = _get_nested_offset_recursive_struct(array_base_ctype, member_name, index - current_index)
elif issubclass(array_base_ctype, ctypes.Array):
inner_offset, occurences = _get_nested_offset_recursive_array(array_base_ctype, member_name, index - current_index)
else:
inner_offset, occurences = OFFSET_INVALID, 0
if inner_offset != OFFSET_INVALID:
return array_base_ctype_size * idx + inner_offset, 0
else:
if occurences == 0:
return OFFSET_INVALID, 0
else:
current_index += occurences
return OFFSET_INVALID, current_index
def get_nested_offset_recursive(ctype, member_name, index=0):
if index < 0:
return OFFSET_INVALID
if issubclass(ctype, ctypes.Structure):
return _get_nested_offset_recursive_struct(ctype, member_name, index)[0]
elif issubclass(ctype, ctypes.Array):
return _get_nested_offset_recursive_array(ctype, member_name, index)[0]
else:
return OFFSET_INVALID
def main():
struct_names = [
"Struct2",
"Struct1",
"Struct0",
"Struct0_1",
]
member_name = WANTED_MEMBER_NAME
print("PRAGMA_PACK: {:d} {:s}\n".format(data_types.PRAGMA_PACK, "" if data_types.PRAGMA_PACK else "(default)"))
for struct_name in struct_names:
struct_ctype = getattr(data_types, struct_name)
nth = 1
ofs = get_nested_offset_recursive(struct_ctype, member_name, index=nth - 1)
while ofs != OFFSET_INVALID:
print("'{:s}' offset (#{:03d}) in '{:s}' (size: {:3d}): {:3d}".format(member_name,
nth,
struct_name,
ctypes.sizeof(struct_ctype),
ofs))
nth += 1
ofs = get_nested_offset_recursive(struct_ctype, member_name, index=nth - 1)
if __name__ == "__main__":
print("Python {:s} on {:s}\n".format(sys.version, sys.platform))
main()
Notes:
-
get_nested_offset_recursive's index argument is the (0 based) index in the member occurrences list - or how many occurrences to skip before reporting the offset (default: 0 - meaning that it will report 1st occurrence's offset)
- 没有彻底测试,但我想我涵盖了所有情况
- 对于每个结构,程序列出所有成员出现的偏移量(直到找不到)
- 现在,代码就是我一开始想到的形状
Output:
(py35x64_test) e:\Work\Dev\StackOverflow\q050304516>python struct_util_v3.py
Python 3.5.4 (v3.5.4:3f56838, Aug 8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)] on win32
PRAGMA_PACK: 0 (default)
'wanted' offset (#001) in 'Struct2' (size: 8): 4
'wanted' offset (#001) in 'Struct1' (size: 24): 16
'wanted' offset (#001) in 'Struct0' (size: 32): 24
'wanted' offset (#001) in 'Struct0_1' (size: 192): 24
'wanted' offset (#002) in 'Struct0_1' (size: 192): 48
'wanted' offset (#003) in 'Struct0_1' (size: 192): 72
'wanted' offset (#004) in 'Struct0_1' (size: 192): 104
'wanted' offset (#005) in 'Struct0_1' (size: 192): 136
'wanted' offset (#006) in 'Struct0_1' (size: 192): 160
(py35x64_test) e:\Work\Dev\StackOverflow\q050304516>rem change PRAGMA_PACK = 1 in data_types.py
(py35x64_test) e:\Work\Dev\StackOverflow\q050304516>python struct_util_v3.py
Python 3.5.4 (v3.5.4:3f56838, Aug 8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)] on win32
PRAGMA_PACK: 1
'wanted' offset (#001) in 'Struct2' (size: 7): 3
'wanted' offset (#001) in 'Struct1' (size: 16): 12
'wanted' offset (#001) in 'Struct0' (size: 22): 18
'wanted' offset (#001) in 'Struct0_1' (size: 130): 18
'wanted' offset (#002) in 'Struct0_1' (size: 130): 34
'wanted' offset (#003) in 'Struct0_1' (size: 130): 50
'wanted' offset (#004) in 'Struct0_1' (size: 130): 74
'wanted' offset (#005) in 'Struct0_1' (size: 130): 94
'wanted' offset (#006) in 'Struct0_1' (size: 130): 110