Merge branch 'pytorch:main' into main

xujuntwt95329 · web-flow · commit 979c842e0461 · 2024-07-02T16:00:40.000+08:00
diff --git a/BUILD.bazel b/BUILD.bazel
@@ -63,6 +63,10 @@ MACH_SRCS = [
     "src/mach/topology.c",
 ]
 
+FREEBSD_SRCS = [
+    "src/freebsd/topology.c",
+]
+
 EMSCRIPTEN_SRCS = [
     "src/emscripten/init.c",
 ]
@@ -112,6 +116,10 @@ MACH_ARM_SRCS = [
     "src/arm/mach/init.c",
 ]
 
+FREEBSD_X86_SRCS = [
+    "src/x86/freebsd/init.c",
+]
+
 EMSCRIPTEN_SRCS = [
     "src/emscripten/init.c",
 ]
@@ -132,6 +140,7 @@ cc_library(
         ":macos_x86_64": COMMON_SRCS + X86_SRCS + MACH_SRCS + MACH_X86_SRCS,
         ":macos_x86_64_legacy": COMMON_SRCS + X86_SRCS + MACH_SRCS + MACH_X86_SRCS,
         ":macos_arm64": COMMON_SRCS + MACH_SRCS + MACH_ARM_SRCS,
+        ":freebsd_x86_64": COMMON_SRCS + X86_SRCS + FREEBSD_SRCS + FREEBSD_X86_SRCS,
         ":windows_x86_64": COMMON_SRCS + X86_SRCS + WINDOWS_X86_SRCS,
         ":windows_arm64": COMMON_SRCS + ARM_SRCS + WINDOWS_ARM_SRCS,
         ":android_armv7": COMMON_SRCS + ARM_SRCS + LINUX_SRCS + LINUX_ARM32_SRCS + ANDROID_ARM_SRCS,
@@ -175,6 +184,7 @@ cc_library(
     # Headers must be in textual_hdrs to allow us to set the standard to C99
     textual_hdrs = [
         "include/cpuinfo.h",
+        "src/freebsd/api.h",
         "src/linux/api.h",
         "src/mach/api.h",
         "src/cpuinfo/common.h",
@@ -463,3 +473,10 @@ config_setting(
         "cpu": "asmjs",
     },
 )
+
+config_setting(
+    name = "freebsd_x86_64",
+    values = {
+        "cpu": "freebsd",
+    },
+)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -67,6 +67,9 @@ ENDIF()
 
 # -- [ Determine target processor
 SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_SYSTEM_PROCESSOR}")
+IF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND CPUINFO_TARGET_PROCESSOR STREQUAL "amd64")
+  SET(CPUINFO_TARGET_PROCESSOR "AMD64")
+ENDIF()
 IF(IS_APPLE_OS AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64.*)$")
   SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}")
 ELSEIF(CMAKE_GENERATOR MATCHES "^Visual Studio " AND CMAKE_VS_PLATFORM_NAME)
@@ -105,7 +108,7 @@ IF(NOT CMAKE_SYSTEM_NAME)
       "Target operating system is not specified. "
       "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
   SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
-ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|WindowsStore|CYGWIN|MSYS|Darwin|Linux|Android)$")
+ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|WindowsStore|CYGWIN|MSYS|Darwin|Linux|Android|FreeBSD)$")
   IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14" AND NOT IS_APPLE_OS)
     MESSAGE(WARNING
       "Target operating system \"${CMAKE_SYSTEM_NAME}\" is not supported in cpuinfo. "
@@ -178,6 +181,8 @@ IF(CPUINFO_SUPPORTED_PLATFORM)
       LIST(APPEND CPUINFO_SRCS src/x86/mach/init.c)
     ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|WindowsStore|CYGWIN|MSYS)$")
       LIST(APPEND CPUINFO_SRCS src/x86/windows/init.c)
+    ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD")
+      LIST(APPEND CPUINFO_SRCS src/x86/freebsd/init.c)
     ENDIF()
   ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^Windows" AND CPUINFO_TARGET_PROCESSOR MATCHES "^(ARM64|arm64)$")
     LIST(APPEND CPUINFO_SRCS
@@ -234,9 +239,11 @@ IF(CPUINFO_SUPPORTED_PLATFORM)
       src/linux/processors.c)
   ELSEIF(IS_APPLE_OS)
     LIST(APPEND CPUINFO_SRCS src/mach/topology.c)
+  ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD")
+    LIST(APPEND CPUINFO_SRCS src/freebsd/topology.c)
   ENDIF()
 
-  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
+  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android" OR CMAKE_SYSTEM_NAME STREQUAL "FreeBSD")
     SET(CMAKE_THREAD_PREFER_PTHREAD TRUE)
     SET(THREADS_PREFER_PTHREAD_FLAG TRUE)
     FIND_PACKAGE(Threads REQUIRED)
@@ -301,6 +308,9 @@ IF(CPUINFO_SUPPORTED_PLATFORM)
     TARGET_LINK_LIBRARIES(cpuinfo_internals PUBLIC ${CMAKE_THREAD_LIBS_INIT})
     TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _GNU_SOURCE=1)
     TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _GNU_SOURCE=1)
+  ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD")
+    TARGET_LINK_LIBRARIES(cpuinfo PUBLIC ${CMAKE_THREAD_LIBS_INIT})
+    TARGET_LINK_LIBRARIES(cpuinfo_internals PUBLIC ${CMAKE_THREAD_LIBS_INIT})
   ENDIF()
 ELSE()
   TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=0)
diff --git a/src/cpuinfo/internal-api.h b/src/cpuinfo/internal-api.h
@@ -49,6 +49,7 @@ extern CPUINFO_INTERNAL const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_ma
 
 CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void);
 CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
+CPUINFO_PRIVATE void cpuinfo_x86_freebsd_init(void);
 #if defined(_WIN32) || defined(__CYGWIN__)
 #if CPUINFO_ARCH_ARM64
 CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context);
diff --git a/src/freebsd/api.h b/src/freebsd/api.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include <stdint.h>
+
+struct cpuinfo_freebsd_topology {
+	uint32_t packages;
+	uint32_t cores;
+	uint32_t threads;
+	uint32_t threads_per_core;
+};
+
+struct cpuinfo_freebsd_topology cpuinfo_freebsd_detect_topology(void);
diff --git a/src/freebsd/topology.c b/src/freebsd/topology.c
@@ -0,0 +1,104 @@
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/sysctl.h>
+#include <sys/types.h>
+
+#include <cpuinfo/log.h>
+#include <freebsd/api.h>
+
+static int sysctl_int(const char* name) {
+	int value = 0;
+	size_t value_size = sizeof(value);
+	if (sysctlbyname(name, &value, &value_size, NULL, 0) != 0) {
+		cpuinfo_log_error("sysctlbyname(\"%s\") failed: %s", name, strerror(errno));
+	} else if (value <= 0) {
+		cpuinfo_log_error("sysctlbyname(\"%s\") returned invalid value %d %zu", name, value, value_size);
+		value = 0;
+	}
+	return value;
+}
+
+static char* sysctl_str(const char* name) {
+	size_t value_size = 0;
+	if (sysctlbyname(name, NULL, &value_size, NULL, 0) != 0) {
+		cpuinfo_log_error("sysctlbyname(\"%s\") failed: %s", name, strerror(errno));
+	} else if (value_size <= 0) {
+		cpuinfo_log_error("sysctlbyname(\"%s\") returned invalid value size %zu", name, value_size);
+	}
+	value_size += 1;
+	char* value = calloc(value_size, 1);
+	if (!value) {
+		cpuinfo_log_error("calloc %zu bytes failed", value_size);
+		return NULL;
+	}
+	if (sysctlbyname(name, value, &value_size, NULL, 0) != 0) {
+		cpuinfo_log_error("sysctlbyname(\"%s\") failed: %s", name, strerror(errno));
+		free(value);
+		return NULL;
+	}
+	return value;
+}
+
+struct cpuinfo_freebsd_topology cpuinfo_freebsd_detect_topology(void) {
+	struct cpuinfo_freebsd_topology topology = {
+		.packages = 0,
+		.cores = 0,
+		.threads_per_core = 0,
+		.threads = 0,
+	};
+	char* topology_spec = sysctl_str("kern.sched.topology_spec");
+	if (!topology_spec) {
+		return topology;
+	}
+	const char* group_tag = "<group level=\"1\" cache-level=\"0\">";
+	char* p = strstr(topology_spec, group_tag);
+	while (p) {
+		const char* cpu_tag = "cpu count=\"";
+		char* q = strstr(p, cpu_tag);
+		if (q) {
+			p = q + strlen(cpu_tag);
+			topology.packages += atoi(p);
+		} else {
+			break;
+		}
+	}
+	if (topology.packages == 0) {
+		const char* group_tag = "<group level=\"1\"";
+		char* p = strstr(topology_spec, group_tag);
+		while (p) {
+			topology.packages += 1;
+			p++;
+			p = strstr(p, group_tag);
+		}
+	}
+	if (topology.packages == 0) {
+		cpuinfo_log_error("failed to parse topology_spec:%s", topology_spec);
+		free(topology_spec);
+		goto fail;
+	}
+	free(topology_spec);
+	topology.cores = sysctl_int("kern.smp.cores");
+	if (topology.cores == 0) {
+		goto fail;
+	}
+	if (topology.cores < topology.packages) {
+		goto fail;
+	}
+	topology.threads_per_core = sysctl_int("kern.smp.threads_per_core");
+	if (topology.threads_per_core == 0) {
+		goto fail;
+	}
+	cpuinfo_log_debug(
+		"freebsd topology: packages = %d, cores = %d, "
+		"threads_per_core = %d",
+		topology.packages,
+		topology.cores,
+		topology.threads_per_core);
+	topology.threads = topology.threads_per_core * topology.cores;
+	return topology;
+fail:
+	topology.packages = 0;
+	return topology;
+}
diff --git a/src/init.c b/src/init.c
@@ -24,6 +24,8 @@ bool CPUINFO_ABI cpuinfo_initialize(void) {
 #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
 #if defined(__MACH__) && defined(__APPLE__)
 	pthread_once(&init_guard, &cpuinfo_x86_mach_init);
+#elif defined(__FreeBSD__)
+	pthread_once(&init_guard, &cpuinfo_x86_freebsd_init);
 #elif defined(__linux__)
 	pthread_once(&init_guard, &cpuinfo_x86_linux_init);
 #elif defined(_WIN32) || defined(__CYGWIN__)
diff --git a/src/x86/freebsd/init.c b/src/x86/freebsd/init.c

-Original file line number
+Diff line change
 +#include <stdint.h>
 +#include <stdlib.h>
 +#include <string.h>
++
 +#include <cpuinfo.h>
 +#include <cpuinfo/internal-api.h>
 +#include <cpuinfo/log.h>
 +#include <freebsd/api.h>
 +#include <x86/api.h>
++
 +static inline uint32_t max(uint32_t a, uint32_t b) {
 +	return a > b ? a : b;
 +}
++
 +static inline uint32_t bit_mask(uint32_t bits) {
 +	return (UINT32_C(1) << bits) - UINT32_C(1);
 +}
++
 +void cpuinfo_x86_freebsd_init(void) {
 +	struct cpuinfo_processor* processors = NULL;
 +	struct cpuinfo_core* cores = NULL;
 +	struct cpuinfo_cluster* clusters = NULL;
 +	struct cpuinfo_package* packages = NULL;
 +	struct cpuinfo_cache* l1i = NULL;
 +	struct cpuinfo_cache* l1d = NULL;
 +	struct cpuinfo_cache* l2 = NULL;
 +	struct cpuinfo_cache* l3 = NULL;
 +	struct cpuinfo_cache* l4 = NULL;
++
 +	struct cpuinfo_freebsd_topology freebsd_topology = cpuinfo_freebsd_detect_topology();
 +	if (freebsd_topology.packages == 0) {
 +		cpuinfo_log_error("failed to detect topology");
 +		goto cleanup;
 +	}
 +	processors = calloc(freebsd_topology.threads, sizeof(struct cpuinfo_processor));
 +	if (processors == NULL) {
 +		cpuinfo_log_error(
 +			"failed to allocate %zu bytes for descriptions of %" PRIu32 " logical processors",
 +			freebsd_topology.threads * sizeof(struct cpuinfo_processor),
 +			freebsd_topology.threads);
 +		goto cleanup;
 +	}
 +	cores = calloc(freebsd_topology.cores, sizeof(struct cpuinfo_core));
 +	if (cores == NULL) {
 +		cpuinfo_log_error(
 +			"failed to allocate %zu bytes for descriptions of %" PRIu32 " cores",
 +			freebsd_topology.cores * sizeof(struct cpuinfo_core),
 +			freebsd_topology.cores);
 +		goto cleanup;
 +	}
 +	/* On x86 a cluster of cores is the biggest group of cores that shares a
 +	 * cache. */
 +	clusters = calloc(freebsd_topology.packages, sizeof(struct cpuinfo_cluster));
 +	if (clusters == NULL) {
 +		cpuinfo_log_error(
 +			"failed to allocate %zu bytes for descriptions of %" PRIu32 " core clusters",
 +			freebsd_topology.packages * sizeof(struct cpuinfo_cluster),
 +			freebsd_topology.packages);
 +		goto cleanup;
 +	}
 +	packages = calloc(freebsd_topology.packages, sizeof(struct cpuinfo_package));
 +	if (packages == NULL) {
 +		cpuinfo_log_error(
 +			"failed to allocate %zu bytes for descriptions of %" PRIu32 " physical packages",
 +			freebsd_topology.packages * sizeof(struct cpuinfo_package),
 +			freebsd_topology.packages);
 +		goto cleanup;
 +	}
++
 +	struct cpuinfo_x86_processor x86_processor;
 +	memset(&x86_processor, 0, sizeof(x86_processor));
 +	cpuinfo_x86_init_processor(&x86_processor);
 +	char brand_string[48];
 +	cpuinfo_x86_normalize_brand_string(x86_processor.brand_string, brand_string);
++
 +	const uint32_t threads_per_core = freebsd_topology.threads_per_core;
 +	const uint32_t threads_per_package = freebsd_topology.threads / freebsd_topology.packages;
 +	const uint32_t cores_per_package = freebsd_topology.cores / freebsd_topology.packages;
 +	for (uint32_t i = 0; i < freebsd_topology.packages; i++) {
 +		clusters[i] = (struct cpuinfo_cluster){
 +			.processor_start = i * threads_per_package,
 +			.processor_count = threads_per_package,
 +			.core_start = i * cores_per_package,
 +			.core_count = cores_per_package,
 +			.cluster_id = 0,
 +			.package = packages + i,
 +			.vendor = x86_processor.vendor,
 +			.uarch = x86_processor.uarch,
 +			.cpuid = x86_processor.cpuid,
 +		};
 +		packages[i].processor_start = i * threads_per_package;
 +		packages[i].processor_count = threads_per_package;
 +		packages[i].core_start = i * cores_per_package;
 +		packages[i].core_count = cores_per_package;
 +		packages[i].cluster_start = i;
 +		packages[i].cluster_count = 1;
 +		cpuinfo_x86_format_package_name(x86_processor.vendor, brand_string, packages[i].name);
 +	}
 +	for (uint32_t i = 0; i < freebsd_topology.cores; i++) {
 +		cores[i] = (struct cpuinfo_core){
 +			.processor_start = i * threads_per_core,
 +			.processor_count = threads_per_core,
 +			.core_id = i % cores_per_package,
 +			.cluster = clusters + i / cores_per_package,
 +			.package = packages + i / cores_per_package,
 +			.vendor = x86_processor.vendor,
 +			.uarch = x86_processor.uarch,
 +			.cpuid = x86_processor.cpuid,
 +		};
 +	}
 +	for (uint32_t i = 0; i < freebsd_topology.threads; i++) {
 +		const uint32_t smt_id = i % threads_per_core;
 +		const uint32_t core_id = i / threads_per_core;
 +		const uint32_t package_id = i / threads_per_package;
++
 +		/* Reconstruct APIC IDs from topology components */
 +		const uint32_t thread_bits_mask = bit_mask(x86_processor.topology.thread_bits_length);
 +		const uint32_t core_bits_mask = bit_mask(x86_processor.topology.core_bits_length);
 +		const uint32_t package_bits_offset =
 +			max(x86_processor.topology.thread_bits_offset + x86_processor.topology.thread_bits_length,
 +			    x86_processor.topology.core_bits_offset + x86_processor.topology.core_bits_length);
 +		const uint32_t apic_id = ((smt_id & thread_bits_mask) << x86_processor.topology.thread_bits_offset) |
 +			((core_id & core_bits_mask) << x86_processor.topology.core_bits_offset) |
 +			(package_id << package_bits_offset);
 +		cpuinfo_log_debug("reconstructed APIC ID 0x%08" PRIx32 " for thread %" PRIu32, apic_id, i);
++
 +		processors[i].smt_id = smt_id;
 +		processors[i].core = cores + i / threads_per_core;
 +		processors[i].cluster = clusters + i / threads_per_package;
 +		processors[i].package = packages + i / threads_per_package;
 +		processors[i].apic_id = apic_id;
 +	}
++
 +	uint32_t threads_per_l1 = 0, l1_count = 0;
 +	if (x86_processor.cache.l1i.size != 0 || x86_processor.cache.l1d.size != 0) {
 +		/* Assume that threads on the same core share L1 */
 +		threads_per_l1 = freebsd_topology.threads / freebsd_topology.cores;
 +		cpuinfo_log_warning(
 +			"freebsd kernel did not report number of "
 +			"threads sharing L1 cache; assume %" PRIu32,
 +			threads_per_l1);
 +		l1_count = freebsd_topology.threads / threads_per_l1;
 +		cpuinfo_log_debug("detected %" PRIu32 " L1 caches", l1_count);
 +	}
++
 +	uint32_t threads_per_l2 = 0, l2_count = 0;
 +	if (x86_processor.cache.l2.size != 0) {
 +		if (x86_processor.cache.l3.size != 0) {
 +			/* This is not a last-level cache; assume that threads
 +			 * on the same core share L2 */
 +			threads_per_l2 = freebsd_topology.threads / freebsd_topology.cores;
 +		} else {
 +			/* This is a last-level cache; assume that threads on
 +			 * the same package share L2 */
 +			threads_per_l2 = freebsd_topology.threads / freebsd_topology.packages;
 +		}
 +		cpuinfo_log_warning(
 +			"freebsd kernel did not report number of "
 +			"threads sharing L2 cache; assume %" PRIu32,
 +			threads_per_l2);
 +		l2_count = freebsd_topology.threads / threads_per_l2;
 +		cpuinfo_log_debug("detected %" PRIu32 " L2 caches", l2_count);
 +	}
++
 +	uint32_t threads_per_l3 = 0, l3_count = 0;
 +	if (x86_processor.cache.l3.size != 0) {
 +		/*
 +		 * Assume that threads on the same package share L3.
 +		 * However, is it not necessarily the last-level cache (there
 +		 * may be L4 cache as well)
 +		 */
 +		threads_per_l3 = freebsd_topology.threads / freebsd_topology.packages;
 +		cpuinfo_log_warning(
 +			"freebsd kernel did not report number of "
 +			"threads sharing L3 cache; assume %" PRIu32,
 +			threads_per_l3);
 +		l3_count = freebsd_topology.threads / threads_per_l3;
 +		cpuinfo_log_debug("detected %" PRIu32 " L3 caches", l3_count);
 +	}
++
 +	uint32_t threads_per_l4 = 0, l4_count = 0;
 +	if (x86_processor.cache.l4.size != 0) {
 +		/*
 +		 * Assume that all threads share this L4.
 +		 * As of now, L4 cache exists only on notebook x86 CPUs, which
 +		 * are single-package, but multi-socket systems could have
 +		 * shared L4 (like on IBM POWER8).
 +		 */
 +		threads_per_l4 = freebsd_topology.threads;
 +		cpuinfo_log_warning(
 +			"freebsd kernel did not report number of "
 +			"threads sharing L4 cache; assume %" PRIu32,
 +			threads_per_l4);
 +		l4_count = freebsd_topology.threads / threads_per_l4;
 +		cpuinfo_log_debug("detected %" PRIu32 " L4 caches", l4_count);
 +	}
++
 +	if (x86_processor.cache.l1i.size != 0) {
 +		l1i = calloc(l1_count, sizeof(struct cpuinfo_cache));
 +		if (l1i == NULL) {
 +			cpuinfo_log_error(
 +				"failed to allocate %zu bytes for descriptions of "
 +				"%" PRIu32 " L1I caches",
 +				l1_count * sizeof(struct cpuinfo_cache),
 +				l1_count);
 +			return;
 +		}
 +		for (uint32_t c = 0; c < l1_count; c++) {
 +			l1i[c] = (struct cpuinfo_cache){
 +				.size = x86_processor.cache.l1i.size,
 +				.associativity = x86_processor.cache.l1i.associativity,
 +				.sets = x86_processor.cache.l1i.sets,
 +				.partitions = x86_processor.cache.l1i.partitions,
 +				.line_size = x86_processor.cache.l1i.line_size,
 +				.flags = x86_processor.cache.l1i.flags,
 +				.processor_start = c * threads_per_l1,
 +				.processor_count = threads_per_l1,
 +			};
 +		}
 +		for (uint32_t t = 0; t < freebsd_topology.threads; t++) {
 +			processors[t].cache.l1i = &l1i[t / threads_per_l1];
 +		}
 +	}
++
 +	if (x86_processor.cache.l1d.size != 0) {
 +		l1d = calloc(l1_count, sizeof(struct cpuinfo_cache));
 +		if (l1d == NULL) {
 +			cpuinfo_log_error(
 +				"failed to allocate %zu bytes for descriptions of "
 +				"%" PRIu32 " L1D caches",
 +				l1_count * sizeof(struct cpuinfo_cache),
 +				l1_count);
 +			return;
 +		}
 +		for (uint32_t c = 0; c < l1_count; c++) {
 +			l1d[c] = (struct cpuinfo_cache){
 +				.size = x86_processor.cache.l1d.size,
 +				.associativity = x86_processor.cache.l1d.associativity,
 +				.sets = x86_processor.cache.l1d.sets,
 +				.partitions = x86_processor.cache.l1d.partitions,
 +				.line_size = x86_processor.cache.l1d.line_size,
 +				.flags = x86_processor.cache.l1d.flags,
 +				.processor_start = c * threads_per_l1,
 +				.processor_count = threads_per_l1,
 +			};
 +		}
 +		for (uint32_t t = 0; t < freebsd_topology.threads; t++) {
 +			processors[t].cache.l1d = &l1d[t / threads_per_l1];
 +		}
 +	}
++
 +	if (l2_count != 0) {
 +		l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
 +		if (l2 == NULL) {
 +			cpuinfo_log_error(
 +				"failed to allocate %zu bytes for descriptions of "
 +				"%" PRIu32 " L2 caches",
 +				l2_count * sizeof(struct cpuinfo_cache),
 +				l2_count);
 +			return;
 +		}
 +		for (uint32_t c = 0; c < l2_count; c++) {
 +			l2[c] = (struct cpuinfo_cache){
 +				.size = x86_processor.cache.l2.size,
 +				.associativity = x86_processor.cache.l2.associativity,
 +				.sets = x86_processor.cache.l2.sets,
 +				.partitions = x86_processor.cache.l2.partitions,
 +				.line_size = x86_processor.cache.l2.line_size,
 +				.flags = x86_processor.cache.l2.flags,
 +				.processor_start = c * threads_per_l2,
 +				.processor_count = threads_per_l2,
 +			};
 +		}
 +		for (uint32_t t = 0; t < freebsd_topology.threads; t++) {
 +			processors[t].cache.l2 = &l2[t / threads_per_l2];
 +		}
 +	}
++
 +	if (l3_count != 0) {
 +		l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
 +		if (l3 == NULL) {
 +			cpuinfo_log_error(
 +				"failed to allocate %zu bytes for descriptions of "
 +				"%" PRIu32 " L3 caches",
 +				l3_count * sizeof(struct cpuinfo_cache),
 +				l3_count);
 +			return;
 +		}
 +		for (uint32_t c = 0; c < l3_count; c++) {
 +			l3[c] = (struct cpuinfo_cache){
 +				.size = x86_processor.cache.l3.size,
 +				.associativity = x86_processor.cache.l3.associativity,
 +				.sets = x86_processor.cache.l3.sets,
 +				.partitions = x86_processor.cache.l3.partitions,
 +				.line_size = x86_processor.cache.l3.line_size,
 +				.flags = x86_processor.cache.l3.flags,
 +				.processor_start = c * threads_per_l3,
 +				.processor_count = threads_per_l3,
 +			};
 +		}
 +		for (uint32_t t = 0; t < freebsd_topology.threads; t++) {
 +			processors[t].cache.l3 = &l3[t / threads_per_l3];
 +		}
 +	}
++
 +	if (l4_count != 0) {
 +		l4 = calloc(l4_count, sizeof(struct cpuinfo_cache));
 +		if (l4 == NULL) {
 +			cpuinfo_log_error(
 +				"failed to allocate %zu bytes for descriptions of "
 +				"%" PRIu32 " L4 caches",
 +				l4_count * sizeof(struct cpuinfo_cache),
 +				l4_count);
 +			return;
 +		}
 +		for (uint32_t c = 0; c < l4_count; c++) {
 +			l4[c] = (struct cpuinfo_cache){
 +				.size = x86_processor.cache.l4.size,
 +				.associativity = x86_processor.cache.l4.associativity,
 +				.sets = x86_processor.cache.l4.sets,
 +				.partitions = x86_processor.cache.l4.partitions,
 +				.line_size = x86_processor.cache.l4.line_size,
 +				.flags = x86_processor.cache.l4.flags,
 +				.processor_start = c * threads_per_l4,
 +				.processor_count = threads_per_l4,
 +			};
 +		}
 +		for (uint32_t t = 0; t < freebsd_topology.threads; t++) {
 +			processors[t].cache.l4 = &l4[t / threads_per_l4];
 +		}
 +	}
++
 +	/* Commit changes */
 +	cpuinfo_processors = processors;
 +	cpuinfo_cores = cores;
 +	cpuinfo_clusters = clusters;
 +	cpuinfo_packages = packages;
 +	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
 +	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
 +	cpuinfo_cache[cpuinfo_cache_level_2] = l2;
 +	cpuinfo_cache[cpuinfo_cache_level_3] = l3;
 +	cpuinfo_cache[cpuinfo_cache_level_4] = l4;
++
 +	cpuinfo_processors_count = freebsd_topology.threads;
 +	cpuinfo_cores_count = freebsd_topology.cores;
 +	cpuinfo_clusters_count = freebsd_topology.packages;
 +	cpuinfo_packages_count = freebsd_topology.packages;
 +	cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
 +	cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
 +	cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
 +	cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
 +	cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
 +	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
++
 +	cpuinfo_global_uarch = (struct cpuinfo_uarch_info){
 +		.uarch = x86_processor.uarch,
 +		.cpuid = x86_processor.cpuid,
 +		.processor_count = freebsd_topology.threads,
 +		.core_count = freebsd_topology.cores,
 +	};
++
 +	__sync_synchronize();
++
 +	cpuinfo_is_initialized = true;
++
 +	processors = NULL;
 +	cores = NULL;
 +	clusters = NULL;
 +	packages = NULL;
 +	l1i = l1d = l2 = l3 = l4 = NULL;
++
 +cleanup:
 +	free(processors);
 +	free(cores);
 +	free(clusters);
 +	free(packages);
 +	free(l1i);
 +	free(l1d);
 +	free(l2);
 +	free(l3);
 +	free(l4);
 +}