昨天,报告了一例相册程序崩溃的问题,反汇编问题栈:
(gdb) bt #0 __strlen_asimd () at ../sysdeps/aarch64/multiarch/strlen_asimd.S:96 #1 0x000000555bf15dc0 in is_parent (parent=parent@entry=0x558025b174 "/nas/pool0/u71108721", child=child@entry=0x7f7b2b5240 <error: Cannot access memory at address 0x7f7b2b5240>) at photo.c:178 #2 0x000000555bf167ac in get_path_db (albumd=albumd@entry=0x555bf401c8 <albumd>, user=user@entry=0x0, path=path@entry=0x7f7b2b5240 <error: Cannot access memory at address 0x7f7b2b5240>, child=child@entry=0x7fd6061160) at photo.c:227 #3 0x000000555bf17498 in get_path_photo (albumd=albumd@entry=0x555bf401c8 <albumd>, path=path@entry=0x7f7b2b5240 <error: Cannot access memory at address 0x7f7b2b5240>, db_out=db_out@entry=0x7fd6061210) at photo.c:728 #4 0x000000555bf1418c in do_move_trash (albumd=albumd@entry=0x555bf401c8 <albumd>, user=user@entry=0x5580255230, in=in@entry=0x7f94fbf010) at ctrl_photo.c:583 #5 0x000000555bf156b8 in cli_del_photo (albumd=albumd@entry=0x555bf401c8 <albumd>, fd=fd@entry=25, user=user@entry=0x5580255230, in=in@entry=0x7f94fbf010, out=out@entry=0x7f94f9e010) at ctrl_photo.c:798 #6 0x000000555bf1071c in proc_del_photo (out=0x7f94f9e010, hdr=0x7f94fbf010, user=0x5580255230, fd=25, albumd=0x555bf401c8 <albumd>) at control.c:661 #7 serve_request (albumd=albumd@entry=0x555bf401c8 <albumd>, fd=fd@entry=25, uid=27192) at control.c:887 #8 0x000000555bf10c34 in on_unix_fd (events=1, albumd=0x555bf401c8 <albumd>) at control.c:971 #9 albumd_run (albumd=albumd@entry=0x555bf401c8 <albumd>, sig_chld=sig_chld@entry=0x555bf401c4 <sig_chld>, oldmask=oldmask@entry=0x555bf40260 <oldmask>) at control.c:1074 #10 0x000000555bf039d8 in main (argc=<optimized out>, argv=<optimized out>) at main.c:253
显然是path是一个非法的地址(error: Cannot access memory at address)。这段程序是处理客户端通过IPC发过来的消息。然后是
消息解析出现了问题。
切到4帧,查看消息头和当前解析的位置:
(gdb) f 4
#4 0x000000555bf1418c in do_move_trash (albumd=albumd@entry=0x555bf401c8 <albumd>, user=user@entry=0x5580255230, in=in@entry=0x7f94fbf010)
at ctrl_photo.c:583
(gdb) p in
$28 = (struct album_header *) 0x7f94fbf010
(gdb) p info
$29 = (struct get_photo_info *) 0x7f7b2b5220
in是头,info是当前解析的对象。
解析代码:
576             for_each_info(info, in, 0) {
577                     struct photo *photo;
578                     struct db *db;
579                     char *path;
580                     int path_len = 0;
581                     char *arg;
582
(gdb)
583                     photo = get_path_photo(albumd, info->path, &db);
584                     if (IS_ERR(photo))
585                             continue;for_each_info是一个宏,解析消息里面所有的对象。每个对象是变长的
struct album_header
{
        unsigned magic1;
        unsigned magic2;
        union {
                uint16_t cmd;
                uint16_t error;
        };
        uint16_t flags;
        unsigned len;
};
#define ALBUM_ALIGN(len)        (((len) + 7) & ~ 7)
#define PAYLOAD(hdr, offset)    (void *)((char *)(hdr + 1) + offset)
struct get_photo_info
{
        int len;
        int error;      // for albumd internal use
        int client_id;
        uint64_t album_uuid;
        uint64_t member_id;
        char path[];
};
#define GET_PHOTO_INFO_SZ(len) ALBUM_ALIGN(sizeof(struct get_photo_info) + (len))
#define FIRST_INFO(in, off)     (struct get_photo_info *)PAYLOAD(in, off)
#define LAST_INFO(in)   (struct get_photo_info *)PAYLOAD(in, in->len)
#define NEXT_INFO(info) (struct get_photo_info *)((char *)info + GET_PHOTO_INFO_SZ(info->len))
#define for_each_info(info, in, off)            \
        for (info = FIRST_INFO(in, off); info < LAST_INFO(in); info = NEXT_INFO(info))过来的消息是一个头+N个负载,每个负载是变长的,有一个len字段表示长度。
当时想不明白,为什么校验了info < LAST_INFO(in),为啥没校验住。后面发现,info地址比in小,那就瞬间明白了。肯定是len变成负数了。导致info往前跑了。
所以for_each_info,要加个len的合法性判断
#define for_each_info(info, in, off) \
for (info = FIRST_INFO(in, off); info->len >= 0 && info < LAST_INFO(in); info = NEXT_INFO(info))