qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

migrate: move max-bandwidth and downtime-limit to migrate_set_parameter

Mark the old commands 'migrate_set_speed' and 'migrate_set_downtime' as
deprecated.
Move max-bandwidth and downtime-limit into migrate-set-parameters for
setting maximum migration speed and expected downtime limit parameters
respectively.
Change downtime units to milliseconds (only for new-command) and set
its upper bound limit to 2000 seconds.
Update the query part in both hmp and qmp qemu control interfaces.

Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>

authored by

Ashijeet Acharya and committed by
Juan Quintela
2ff30257 9308ae54

+107 -40
+10 -3
docs/qmp-commands.txt
··· 2910 2910 throttled for auto-converge (json-int) 2911 2911 - "cpu-throttle-increment": set throttle increasing percentage for 2912 2912 auto-converge (json-int) 2913 - 2913 + - "max-bandwidth": set maximum speed for migrations (in bytes/sec) (json-int) 2914 + - "downtime-limit": set maximum tolerated downtime (in milliseconds) for 2915 + migrations (json-int) 2914 2916 Arguments: 2915 2917 2916 2918 Example: ··· 2931 2933 throttled (json-int) 2932 2934 - "cpu-throttle-increment" : throttle increasing percentage for 2933 2935 auto-converge (json-int) 2934 - 2936 + - "max-bandwidth" : maximium migration speed in bytes per second 2937 + (json-int) 2938 + - "downtime-limit" : maximum tolerated downtime of migration in 2939 + milliseconds (json-int) 2935 2940 Arguments: 2936 2941 2937 2942 Example: ··· 2943 2948 "cpu-throttle-increment": 10, 2944 2949 "compress-threads": 8, 2945 2950 "compress-level": 1, 2946 - "cpu-throttle-initial": 20 2951 + "cpu-throttle-initial": 20, 2952 + "max-bandwidth": 33554432, 2953 + "downtime-limit": 300 2947 2954 } 2948 2955 } 2949 2956
+27
hmp.c
··· 310 310 monitor_printf(mon, " %s: '%s'", 311 311 MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_HOSTNAME], 312 312 params->has_tls_hostname ? params->tls_hostname : ""); 313 + assert(params->has_max_bandwidth); 314 + monitor_printf(mon, " %s: %" PRId64 " bytes/second", 315 + MigrationParameter_lookup[MIGRATION_PARAMETER_MAX_BANDWIDTH], 316 + params->max_bandwidth); 317 + assert(params->has_downtime_limit); 318 + monitor_printf(mon, " %s: %" PRId64 " milliseconds", 319 + MigrationParameter_lookup[MIGRATION_PARAMETER_DOWNTIME_LIMIT], 320 + params->downtime_limit); 313 321 monitor_printf(mon, "\n"); 314 322 } 315 323 ··· 1265 1273 hmp_handle_error(mon, &err); 1266 1274 } 1267 1275 1276 + /* Kept for backwards compatibility */ 1268 1277 void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict) 1269 1278 { 1270 1279 double value = qdict_get_double(qdict, "value"); ··· 1283 1292 } 1284 1293 } 1285 1294 1295 + /* Kept for backwards compatibility */ 1286 1296 void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict) 1287 1297 { 1288 1298 int64_t value = qdict_get_int(qdict, "value"); ··· 1323 1333 { 1324 1334 const char *param = qdict_get_str(qdict, "parameter"); 1325 1335 const char *valuestr = qdict_get_str(qdict, "value"); 1336 + int64_t valuebw = 0; 1326 1337 long valueint = 0; 1338 + char *endp; 1327 1339 Error *err = NULL; 1328 1340 bool use_int_value = false; 1329 1341 int i; ··· 1360 1372 p.has_tls_hostname = true; 1361 1373 p.tls_hostname = (char *) valuestr; 1362 1374 break; 1375 + case MIGRATION_PARAMETER_MAX_BANDWIDTH: 1376 + p.has_max_bandwidth = true; 1377 + valuebw = qemu_strtosz(valuestr, &endp); 1378 + if (valuebw < 0 || (size_t)valuebw != valuebw 1379 + || *endp != '\0') { 1380 + error_setg(&err, "Invalid size %s", valuestr); 1381 + goto cleanup; 1382 + } 1383 + p.max_bandwidth = valuebw; 1384 + break; 1385 + case MIGRATION_PARAMETER_DOWNTIME_LIMIT: 1386 + p.has_downtime_limit = true; 1387 + use_int_value = true; 1388 + break; 1363 1389 } 1364 1390 1365 1391 if (use_int_value) { ··· 1375 1401 p.decompress_threads = valueint; 1376 1402 p.cpu_throttle_initial = valueint; 1377 1403 p.cpu_throttle_increment = valueint; 1404 + p.downtime_limit = valueint; 1378 1405 } 1379 1406 1380 1407 qmp_migrate_set_parameters(&p, &err);
-1
include/migration/migration.h
··· 129 129 130 130 struct MigrationState 131 131 { 132 - int64_t bandwidth_limit; 133 132 size_t bytes_xfer; 134 133 size_t xfer_limit; 135 134 QemuThread thread;
+50 -33
migration/migration.c
··· 44 44 #define BUFFER_DELAY 100 45 45 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) 46 46 47 + /* Time in milliseconds we are allowed to stop the source, 48 + * for sending the last part */ 49 + #define DEFAULT_MIGRATE_SET_DOWNTIME 300 50 + 47 51 /* Default compression thread count */ 48 52 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 49 53 /* Default decompression thread count, usually decompression is at ··· 80 84 static bool once; 81 85 static MigrationState current_migration = { 82 86 .state = MIGRATION_STATUS_NONE, 83 - .bandwidth_limit = MAX_THROTTLE, 84 87 .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE, 85 88 .mbps = -1, 86 89 .parameters = { ··· 89 92 .decompress_threads = DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT, 90 93 .cpu_throttle_initial = DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL, 91 94 .cpu_throttle_increment = DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT, 95 + .max_bandwidth = MAX_THROTTLE, 96 + .downtime_limit = DEFAULT_MIGRATE_SET_DOWNTIME, 92 97 }, 93 98 }; 94 99 ··· 517 522 migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf); 518 523 } 519 524 520 - /* amount of nanoseconds we are willing to wait for migration to be down. 521 - * the choice of nanoseconds is because it is the maximum resolution that 522 - * get_clock() can achieve. It is an internal measure. All user-visible 523 - * units must be in seconds */ 524 - static uint64_t max_downtime = 300000000; 525 - 526 - uint64_t migrate_max_downtime(void) 527 - { 528 - return max_downtime; 529 - } 530 - 531 525 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) 532 526 { 533 527 MigrationCapabilityStatusList *head = NULL; ··· 573 567 params->tls_creds = g_strdup(s->parameters.tls_creds); 574 568 params->has_tls_hostname = !!s->parameters.tls_hostname; 575 569 params->tls_hostname = g_strdup(s->parameters.tls_hostname); 570 + params->has_max_bandwidth = true; 571 + params->max_bandwidth = s->parameters.max_bandwidth; 572 + params->has_downtime_limit = true; 573 + params->downtime_limit = s->parameters.downtime_limit; 576 574 577 575 return params; 578 576 } ··· 806 804 "an integer in the range of 1 to 99"); 807 805 return; 808 806 } 807 + if (params->has_max_bandwidth && 808 + (params->max_bandwidth < 0 || params->max_bandwidth > SIZE_MAX)) { 809 + error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the" 810 + " range of 0 to %zu bytes/second", SIZE_MAX); 811 + return; 812 + } 813 + if (params->has_downtime_limit && 814 + (params->downtime_limit < 0 || params->downtime_limit > 2000000)) { 815 + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 816 + "downtime_limit", 817 + "an integer in the range of 0 to 2000000 milliseconds"); 818 + return; 819 + } 809 820 810 821 if (params->has_compress_level) { 811 822 s->parameters.compress_level = params->compress_level; ··· 830 841 g_free(s->parameters.tls_hostname); 831 842 s->parameters.tls_hostname = g_strdup(params->tls_hostname); 832 843 } 844 + if (params->has_max_bandwidth) { 845 + s->parameters.max_bandwidth = params->max_bandwidth; 846 + if (s->to_dst_file) { 847 + qemu_file_set_rate_limit(s->to_dst_file, 848 + s->parameters.max_bandwidth / XFER_LIMIT_RATIO); 849 + } 850 + } 851 + if (params->has_downtime_limit) { 852 + s->parameters.downtime_limit = params->downtime_limit; 853 + } 833 854 } 834 855 835 856 ··· 1163 1184 1164 1185 void qmp_migrate_set_speed(int64_t value, Error **errp) 1165 1186 { 1166 - MigrationState *s; 1167 - 1168 - if (value < 0) { 1169 - value = 0; 1170 - } 1171 - if (value > SIZE_MAX) { 1172 - value = SIZE_MAX; 1173 - } 1187 + MigrationParameters p = { 1188 + .has_max_bandwidth = true, 1189 + .max_bandwidth = value, 1190 + }; 1174 1191 1175 - s = migrate_get_current(); 1176 - s->bandwidth_limit = value; 1177 - if (s->to_dst_file) { 1178 - qemu_file_set_rate_limit(s->to_dst_file, 1179 - s->bandwidth_limit / XFER_LIMIT_RATIO); 1180 - } 1192 + qmp_migrate_set_parameters(&p, errp); 1181 1193 } 1182 1194 1183 1195 void qmp_migrate_set_downtime(double value, Error **errp) 1184 1196 { 1185 - value *= 1e9; 1186 - value = MAX(0, MIN(UINT64_MAX, value)); 1187 - max_downtime = (uint64_t)value; 1197 + value *= 1000; /* Convert to milliseconds */ 1198 + value = MAX(0, MIN(INT64_MAX, value)); 1199 + 1200 + MigrationParameters p = { 1201 + .has_downtime_limit = true, 1202 + .downtime_limit = value, 1203 + }; 1204 + 1205 + qmp_migrate_set_parameters(&p, errp); 1188 1206 } 1189 1207 1190 1208 bool migrate_postcopy_ram(void) ··· 1791 1809 initial_bytes; 1792 1810 uint64_t time_spent = current_time - initial_time; 1793 1811 double bandwidth = (double)transferred_bytes / time_spent; 1794 - max_size = bandwidth * migrate_max_downtime() / 1000000; 1812 + max_size = bandwidth * s->parameters.downtime_limit; 1795 1813 1796 1814 s->mbps = (((double) transferred_bytes * 8.0) / 1797 1815 ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; ··· 1850 1868 1851 1869 void migrate_fd_connect(MigrationState *s) 1852 1870 { 1853 - /* This is a best 1st approximation. ns to ms */ 1854 - s->expected_downtime = max_downtime/1000000; 1871 + s->expected_downtime = s->parameters.downtime_limit; 1855 1872 s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s); 1856 1873 1857 1874 qemu_file_set_blocking(s->to_dst_file, true); 1858 1875 qemu_file_set_rate_limit(s->to_dst_file, 1859 - s->bandwidth_limit / XFER_LIMIT_RATIO); 1876 + s->parameters.max_bandwidth / XFER_LIMIT_RATIO); 1860 1877 1861 1878 /* Notify before starting migration thread */ 1862 1879 notifier_list_notify(&migration_state_notifiers, s);
+20 -3
qapi-schema.json
··· 658 658 # hostname must be provided so that the server's x509 659 659 # certificate identity can be validated. (Since 2.7) 660 660 # 661 + # @max-bandwidth: to set maximum speed for migration. maximum speed in 662 + # bytes per second. (Since 2.8) 663 + # 664 + # @downtime-limit: set maximum tolerated downtime for migration. maximum 665 + # downtime in milliseconds (Since 2.8) 666 + # 661 667 # Since: 2.4 662 668 ## 663 669 { 'enum': 'MigrationParameter', 664 670 'data': ['compress-level', 'compress-threads', 'decompress-threads', 665 671 'cpu-throttle-initial', 'cpu-throttle-increment', 666 - 'tls-creds', 'tls-hostname'] } 672 + 'tls-creds', 'tls-hostname', 'max-bandwidth', 673 + 'downtime-limit'] } 667 674 668 675 # 669 676 # @migrate-set-parameters ··· 712 719 # hostname must be provided so that the server's x509 713 720 # certificate identity can be validated. (Since 2.7) 714 721 # 722 + # @max-bandwidth: to set maximum speed for migration. maximum speed in 723 + # bytes per second. (Since 2.8) 724 + # 725 + # @downtime-limit: set maximum tolerated downtime for migration. maximum 726 + # downtime in milliseconds (Since 2.8) 727 + # 715 728 # Since: 2.4 716 729 ## 717 730 { 'struct': 'MigrationParameters', ··· 721 734 '*cpu-throttle-initial': 'int', 722 735 '*cpu-throttle-increment': 'int', 723 736 '*tls-creds': 'str', 724 - '*tls-hostname': 'str'} } 737 + '*tls-hostname': 'str', 738 + '*max-bandwidth': 'int', 739 + '*downtime-limit': 'int'} } 725 740 ## 726 741 # @query-migrate-parameters 727 742 # ··· 1803 1818 # 1804 1819 # Returns: nothing on success 1805 1820 # 1821 + # Notes: This command is deprecated in favor of 'migrate-set-parameters' 1822 + # 1806 1823 # Since: 0.14.0 1807 1824 ## 1808 1825 { 'command': 'migrate_set_downtime', 'data': {'value': 'number'} } ··· 1816 1833 # 1817 1834 # Returns: nothing on success 1818 1835 # 1819 - # Notes: A value lesser than zero will be automatically round up to zero. 1836 + # Notes: This command is deprecated in favor of 'migrate-set-parameters' 1820 1837 # 1821 1838 # Since: 0.14.0 1822 1839 ##