ILD

ipq5018移植后reboot最后阶段卡住不能重启问题
作者:Yuan Jianpeng 邮箱:yuanjp89@163.com
发布时间:2022-5-29 站点:Inside Linux Development

执行reboot后,卡在reboot: Restarting system


/ # reboot

The system is going down NOW!

Sent SIGTERM to all processes

Sent SIGKILL to all processes

Requesting system reboot

[  754.486488] reboot: Restarting system


reboot流程分析

内核重启的入口是:kernel/reboot.c

244 void kernel_restart(char *cmd)

245 {

246         kernel_restart_prepare(cmd);

247         migrate_to_reboot_cpu();

248         syscore_shutdown();

249         if (!cmd)

250                 pr_emerg("Restarting system\n");

251         else

252                 pr_emerg("Restarting system with command '%s'\n", cmd);

253         kmsg_dump(KMSG_DUMP_RESTART);

254         machine_restart(cmd);

255 }

256 EXPORT_SYMBOL_GPL(kernel_restart);


他会打印Restarting system,然后调用架构定义的machine_restart(),


5.4内核

arch/arm/kernel/reboot.c

136 void machine_restart(char *cmd)

137 {

138         local_irq_disable();

139         smp_send_stop();

140

141         if (arm_pm_restart)

142                 arm_pm_restart(reboot_mode, cmd);

143         else

144                 do_kernel_restart(cmd);

145

146         /* Give a grace period for failure to restart of 1s */

147         mdelay(1000);

148

149         /* Whoops - the platform was unable to reboot. Tell the user! */

150         printk("Reboot failed -- System halted\n");

151         while (1);

152 }


smp_send_stop()停止其它cpu,只保持只有一个cpu运行。

如果设置了arm_pm_restart,则使用arm_pm_restart()实现重启。否则调用do_kernel_restart()实现重启。


arm_pm_restart是在psci框架设置的

drivers/firmware/psci/psci.c

412 static void __init psci_0_2_set_functions(void)

413 {

414         pr_info("Using standard PSCI v0.2 function IDs\n");

415         psci_ops.get_version = psci_get_version;

416

417         psci_function_id[PSCI_FN_CPU_SUSPEND] =

418                                         PSCI_FN_NATIVE(0_2, CPU_SUSPEND);

419         psci_ops.cpu_suspend = psci_cpu_suspend;

420

421         psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF;

422         psci_ops.cpu_off = psci_cpu_off;

423

424         psci_function_id[PSCI_FN_CPU_ON] = PSCI_FN_NATIVE(0_2, CPU_ON);

425         psci_ops.cpu_on = psci_cpu_on;

426

427         psci_function_id[PSCI_FN_MIGRATE] = PSCI_FN_NATIVE(0_2, MIGRATE);

428         psci_ops.migrate = psci_migrate;

429

430         psci_ops.affinity_info = psci_affinity_info;

431

432         psci_ops.migrate_info_type = psci_migrate_info_type;

433

434         arm_pm_restart = psci_sys_reset;

435         printk("================ arm_pm_restart %pS\n", arm_pm_restart);

436

437         pm_power_off = psci_sys_poweroff;

438 }


新的5.15内核arm_pm_restart已经去掉了:使用do_kernel_restart()框架


135 void machine_restart(char *cmd)

136 {

137         local_irq_disable();

138         smp_send_stop();

139

140         do_kernel_restart(cmd);

141

142         /* Give a grace period for failure to restart of 1s */

143         mdelay(1000);

144

145         /* Whoops - the platform was unable to reboot. Tell the user! */

146         printk("Reboot failed -- System halted\n");

147         while (1);

148 }


kernel/reboot.c

216 void do_kernel_restart(char *cmd)

217 {

218         atomic_notifier_call_chain(&restart_handler_list, reboot_mode, cmd);

219 }


do_kernel_restart()就是执行使用register_restart_handler(struct notifier_block *nb)注册的重启handler。


添加调试信息:

183 int register_restart_handler(struct notifier_block *nb)

184 {

185         printk("==========register restart handler : %pS\n", nb->notifier_call);

186         return atomic_notifier_chain_register(&restart_handler_list, nb);

187 }


[    0.000000] ==========register restart handler : psci_sys_reset+0x0/0x74

[    0.992572] ==========register restart handler : watchdog_restart_notifier+0x0/0x28


重启是卡在psci_sys_reset了:


psci子系统

Power State Coordination Interface (PSCI) 


这里分析5.15内核:


dts如下:

  83         psci: psci {

  84                 compatible = "arm,psci-1.0";

  85                 method = "smc";

  86         };


初始化在setup_arch的时候调用psci_dt_init

arch/arm/kernel/setup.c setup_arch(char **cmdline_p)

    =>psci_dt_init


代码路径:drivers/firmware/psci/psci.c


606 static const struct of_device_id psci_of_match[] __initconst = {

607         { .compatible = "arm,psci",     .data = psci_0_1_init},

608         { .compatible = "arm,psci-0.2", .data = psci_0_2_init},

609         { .compatible = "arm,psci-1.0", .data = psci_1_0_init},

610         {},

611 };

612

613 int __init psci_dt_init(void)

614 {

615         struct device_node *np;

616         const struct of_device_id *matched_np;

617         psci_initcall_t init_fn;

618         int ret;

619

620         np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);

621

622         if (!np || !of_device_is_available(np))

623                 return -ENODEV;

624

625         init_fn = (psci_initcall_t)matched_np->data;

626         ret = init_fn(np);

627

628         of_node_put(np);

629         return ret;

630 }


277 static int get_set_conduit_method(struct device_node *np)

278 {

279         const char *method;

280

281         pr_info("probing for conduit method from DT.\n");

282

283         if (of_property_read_string(np, "method", &method)) {

284                 pr_warn("missing \"method\" property\n");

285                 return -ENXIO;

286         }

287

288         if (!strcmp("hvc", method)) {

289                 set_conduit(SMCCC_CONDUIT_HVC);

290         } else if (!strcmp("smc", method)) {

291                 set_conduit(SMCCC_CONDUIT_SMC);

292         } else {

293                 pr_warn("invalid \"method\" property: %s\n", method);

294                 return -EINVAL;

295         }

296         return 0;

297 }


dts method为smc。所以set_conduit(SMCCC_CONDUIT_SMC)。

261 static void set_conduit(enum arm_smccc_conduit conduit)

262 {

263         switch (conduit) {

264         case SMCCC_CONDUIT_HVC:

265                 invoke_psci_fn = __invoke_psci_fn_hvc;

266                 break;

267         case SMCCC_CONDUIT_SMC:

268                 invoke_psci_fn = __invoke_psci_fn_smc;

269                 break;

270         default:

271                 WARN(1, "Unexpected PSCI conduit %d\n", conduit);

272         }

273

274         psci_conduit = conduit;

275 }


120 static unsigned long __invoke_psci_fn_smc(unsigned long function_id,

121                         unsigned long arg0, unsigned long arg1,

122                         unsigned long arg2)

123 {

124         struct arm_smccc_res res;

125

126         arm_smccc_smc(function_id, arg0, arg1, arg2, 0, 0, 0, 0, &res);

127         return res.a0;

128 }



include/linux/arm-smccc.h

361 #define arm_smccc_smc(...) __arm_smccc_smc(__VA_ARGS__, NULL)


arch/arm/kernel/smccc-call.S

 52 ENTRY(__arm_smccc_smc)

 53         SMCCC SMCCC_SMC

 54 ENDPROC(__arm_smccc_smc)


尝试一直qca-smc也无效。应该是pinctrl和watchdog没移植好,trustzone内部应该也是通过watchdog啥的复位的。



2022/5/29 工作:

U-BOOT reset对比

uboot的reset过程:

arch/arm/lib/reset.c:


 26 __weak void reset_misc(void)

 27 {

 28 }


 30 int do_reset(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[])

 31 {

 32         puts ("resetting ...\n");

 33

 34         udelay (50000);                         /* wait 50 ms */

 35

 36         disable_interrupts();

 37

 38         reset_misc();

 39         reset_cpu(0);

 40

 41         /*NOTREACHED*/

 42         return 0;

 43 }


进入到 

board/qca/arm/ipq5018/ipq5018.c


 754 void reset_cpu(unsigned long a)

 755 {

 756         reset_crashdump();

 757         if (is_scm_armv8())

 758         {

 759                 printf("=======psci sys reset3\n");

 760                 psci_sys_reset();

 761         }

 762         else {

 763                 printf("=======qti reset\n");

 764                 qti_scm_pshold();

 765         }

 766         while(1);

 767 }


is_scm_armv8() 为真。尝试注释掉reset_crashdump()后不能复位。


 726 void reset_crashdump(void)

 727 {

 728         unsigned int ret = 0;

 729         qca_scm_sdi();

 730         ret = qca_scm_dload(CLEAR_MAGIC);

 731         if (ret)

 732                 printf ("Error in reseting the Magic cookie\n");

 733         return;

 734 }


尝试注释掉qca_scm_sdi(); 不能复位,尝试注释掉qca_scm_dload(CLEAR_MAGIC); 可以复位。

因为问题出现在 qca_scm_sdi。


arch/arm/cpu/armv7/qca/common/scm.c

767

768 static int qca_scm_sdi_v8(void)

769 {

770         struct qca_scm_desc desc = {0};

771         int ret;

772

773         desc.args[0] = 1ul;    /* Disable wdog debug */

774         desc.args[1] = 0ul;    /* SDI Enable */

775         desc.arginfo = QCA_SCM_ARGS(2, SCM_VAL, SCM_VAL);

776         ret = scm_call_64(SCM_SVC_BOOT,

777                              SCM_CMD_TZ_CONFIG_HW_FOR_RAM_DUMP_ID, &desc);

778

779         if (ret)

780                 return ret;

781

782         return le32_to_cpu(desc.ret[0]);

783 }


785 int qca_scm_sdi(void)

786 {

787         int ret;

788         unsigned int clear_info[] = {

789                 1 /* Disable wdog debug */, 0 /* SDI enable*/, };

790

791         if (is_scm_armv8())

792                 return qca_scm_sdi_v8();

793

794         ret = scm_call(SCM_SVC_BOOT, SCM_CMD_TZ_CONFIG_HW_FOR_RAM_DUMP_ID, &clear_info,

795                                 sizeof(clear_info), NULL, 0);

796

797         return ret;

798 }


查找4.4内核相关代码,发现:

./drivers/firmware/qca_scm_restart_reason.c:100:        qcom_scm_sdi(QCOM_SCM_SVC_BOOT, SCM_CMD_TZ_CONFIG_HW_FOR_RAM_DUMP_ID);


而新版内核,没有qca_scm_restart_reason.c了。因此定位出原因了。在老的qseecom需要调用SCM_CMD_TZ_CONFIG_HW_FOR_RAM_DUMP_ID。


参考

https://developer.arm.com/Architectures/Power%20State%20Coordination%20Interface

https://www.cnblogs.com/loyenwang/p/11370557.html


https://community.nxp.com/t5/i-MX-Processors/mx8mm-u-boot-psci-doesn-t-reset-the-board-without-WDOG-B/m-p/1151484


https://discuss.96boards.org/t/reset-reboot-behavior/5293


https://github.com/ARM-software/arm-trusted-firmware

有讲到PSCI和SCM


Copyright © linuxdev.cc 2017-2024. Some Rights Reserved.