Skip to content

Commit e2b5207

Browse files
author
Nick Spain
authored
mach: support archive entries in fat binaries (fixes #320)
Multi-arch containers can be made up of archives or Mach-O binaries. This adds support for archives. It is a breaking change because previously the `MachO` struct was returned and now we're returning a new enum: `SingleArch`. This required some refactoring of the `lib.rs` file to share the required functions and data structures for parsing the hint at the top of files. `take_hint_bytes`, `Hint` and `HintData` dont't require any special features but I think were inside the `if_everything!` because that's the only case they were used in. I did expand the API by making `take_byte_hints` public, this was mainly because I thought it was better solution than trying to maintain the various combinations of features required to stop the compiler warning about the function being unused. It's also a function that may be useful for goblin users. Also add tests for parsing fat binaries. This tests parsing fat binaries made up of Mach-O binaries and archives. I've checked in the binaries to make testing easier as they're quite small (both are built from the hello_world.c file in the same directory). Above the tests themselves are instructions for how to compile the binaries they use.
1 parent a20ce47 commit e2b5207

File tree

7 files changed

+180
-64
lines changed

7 files changed

+180
-64
lines changed

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ endian_fd = ["alloc"]
4545
elf32 = []
4646
elf64 = []
4747
# for now we will require mach and pe to be alloc + endian_fd
48-
mach32 = ["alloc", "endian_fd"]
49-
mach64 = ["alloc", "endian_fd"]
48+
mach32 = ["alloc", "endian_fd", "archive"]
49+
mach64 = ["alloc", "endian_fd", "archive"]
5050
pe32 = ["alloc", "endian_fd"]
5151
pe64 = ["alloc", "endian_fd"]
5252
archive = ["alloc"]

assets/hello_world.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// This is a file use to compile some of the binaries in this
2+
// directory for testing purposes.
3+
#include <stdio.h>
4+
5+
extern void say(char *name) {
6+
printf("Hello, %s!", name);
7+
}

assets/hello_world_fat_archives

960 Bytes
Binary file not shown.

assets/hello_world_fat_binaries

96.6 KB
Binary file not shown.

examples/dyldinfo.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use goblin::mach;
2+
use goblin::mach::SingleArch;
23
use std::borrow::Cow;
34
use std::env;
45
use std::fs;
@@ -125,12 +126,18 @@ fn print_multi_arch(
125126
if let Some((cputype, _)) = mach::constants::cputype::get_arch_from_flag(&arch) {
126127
for bin in multi_arch.into_iter() {
127128
match bin {
128-
Ok(bin) => {
129+
Ok(SingleArch::MachO(bin)) => {
129130
if bin.header.cputype == cputype {
130131
print(&bin, bind, lazy_bind);
131132
process::exit(0);
132133
}
133134
}
135+
Ok(SingleArch::Archive(_)) => {
136+
// dyld_info doesn't seem to handle archives
137+
// in fat binaries, so neither do we.
138+
println!("Does not contain specified arches");
139+
process::exit(1);
140+
}
134141
Err(err) => {
135142
println!("err: {:?}", err);
136143
process::exit(1);

src/lib.rs

Lines changed: 29 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,33 @@ pub mod container {
201201
}
202202
}
203203

204+
/// Takes a reference to the first 16 bytes of the total bytes slice and convert it to an array for `peek_bytes` to use.
205+
/// Returns None if bytes's length is less than 16.
206+
#[allow(unused)]
207+
fn take_hint_bytes(bytes: &[u8]) -> Option<&[u8; 16]> {
208+
bytes
209+
.get(0..16)
210+
.and_then(|hint_bytes_slice| hint_bytes_slice.try_into().ok())
211+
}
212+
213+
#[derive(Debug, Default)]
214+
/// Information obtained from a peek `Hint`
215+
pub struct HintData {
216+
pub is_lsb: bool,
217+
pub is_64: Option<bool>,
218+
}
219+
220+
#[derive(Debug)]
221+
/// A hint at the underlying binary format for 16 bytes of arbitrary data
222+
pub enum Hint {
223+
Elf(HintData),
224+
Mach(HintData),
225+
MachFat(usize),
226+
PE,
227+
Archive,
228+
Unknown(u64),
229+
}
230+
204231
macro_rules! if_everything {
205232
($($i:item)*) => ($(
206233
#[cfg(all(feature = "endian_fd", feature = "elf64", feature = "elf32", feature = "pe64", feature = "pe32", feature = "mach64", feature = "mach32", feature = "archive"))]
@@ -210,28 +237,9 @@ macro_rules! if_everything {
210237

211238
if_everything! {
212239

213-
#[derive(Debug, Default)]
214-
/// Information obtained from a peek `Hint`
215-
pub struct HintData {
216-
pub is_lsb: bool,
217-
pub is_64: Option<bool>,
218-
}
219-
220-
#[derive(Debug)]
221-
/// A hint at the underlying binary format for 16 bytes of arbitrary data
222-
pub enum Hint {
223-
Elf(HintData),
224-
Mach(HintData),
225-
MachFat(usize),
226-
PE,
227-
Archive,
228-
Unknown(u64),
229-
}
230-
231240
/// Peeks at `bytes`, and returns a `Hint`
232241
pub fn peek_bytes(bytes: &[u8; 16]) -> error::Result<Hint> {
233-
use scroll::{Pread, LE, BE};
234-
use crate::mach::{fat, header};
242+
use scroll::{Pread, LE};
235243
if &bytes[0..elf::header::SELFMAG] == elf::header::ELFMAG {
236244
let class = bytes[elf::header::EI_CLASS];
237245
let is_lsb = bytes[elf::header::EI_DATA] == elf::header::ELFDATA2LSB;
@@ -248,23 +256,7 @@ if_everything! {
248256
} else if (&bytes[0..2]).pread_with::<u16>(0, LE)? == pe::header::DOS_MAGIC {
249257
Ok(Hint::PE)
250258
} else {
251-
let (magic, maybe_ctx) = mach::parse_magic_and_ctx(bytes, 0)?;
252-
match magic {
253-
fat::FAT_MAGIC => {
254-
// should probably verify this is always Big Endian...
255-
let narchitectures = bytes.pread_with::<u32>(4, BE)? as usize;
256-
Ok(Hint::MachFat(narchitectures))
257-
},
258-
header::MH_CIGAM_64 | header::MH_CIGAM | header::MH_MAGIC_64 | header::MH_MAGIC => {
259-
if let Some(ctx) = maybe_ctx {
260-
Ok(Hint::Mach(HintData { is_lsb: ctx.le.is_little(), is_64: Some(ctx.container.is_big()) }))
261-
} else {
262-
Err(error::Error::Malformed(format!("Correct mach magic {:#x} does not have a matching parsing context!", magic)))
263-
}
264-
},
265-
// its something else
266-
_ => Ok(Hint::Unknown(bytes.pread::<u64>(0)?))
267-
}
259+
mach::peek_bytes(bytes)
268260
}
269261
}
270262

@@ -279,16 +271,6 @@ if_everything! {
279271
peek_bytes(&bytes)
280272
}
281273

282-
/// Takes a reference to the first 16 bytes of the total bytes slice and convert it to an array for `peek_bytes` to use.
283-
/// Returns None if bytes's length is less than 16.
284-
fn take_hint_bytes(bytes: &[u8]) -> Option<&[u8; 16]> {
285-
use core::convert::TryInto;
286-
bytes.get(0..16)
287-
.and_then(|hint_bytes_slice| {
288-
hint_bytes_slice.try_into().ok()
289-
})
290-
}
291-
292274
#[derive(Debug)]
293275
#[allow(clippy::large_enum_variant)]
294276
/// A parseable object that goblin understands

src/mach/mod.rs

Lines changed: 134 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ use log::debug;
77
use scroll::ctx::SizeWith;
88
use scroll::{Pread, BE};
99

10-
use crate::container;
11-
use crate::error;
10+
use crate::{archive, container};
11+
use crate::{error, take_hint_bytes};
1212

1313
pub mod bind_opcodes;
1414
pub mod constants;
@@ -296,6 +296,15 @@ pub struct FatArchIterator<'a> {
296296
start: usize,
297297
}
298298

299+
/// A single architecture froma multi architecture binary container
300+
/// ([MultiArch]).
301+
#[derive(Debug)]
302+
#[allow(clippy::large_enum_variant)]
303+
pub enum SingleArch<'a> {
304+
MachO(MachO<'a>),
305+
Archive(archive::Archive<'a>),
306+
}
307+
299308
impl<'a> Iterator for FatArchIterator<'a> {
300309
type Item = error::Result<fat::FatArch>;
301310
fn next(&mut self) -> Option<Self::Item> {
@@ -313,16 +322,65 @@ impl<'a> Iterator for FatArchIterator<'a> {
313322
}
314323
}
315324

316-
/// Iterator over every `MachO` binary contained in this `MultiArch` container
317-
pub struct MachOIterator<'a> {
325+
/// Iterator over every entry contained in this `MultiArch` container
326+
pub struct SingleArchIterator<'a> {
318327
index: usize,
319328
data: &'a [u8],
320329
narches: usize,
321330
start: usize,
322331
}
323332

324-
impl<'a> Iterator for MachOIterator<'a> {
325-
type Item = error::Result<MachO<'a>>;
333+
pub fn peek_bytes(bytes: &[u8; 16]) -> error::Result<crate::Hint> {
334+
if &bytes[0..archive::SIZEOF_MAGIC] == archive::MAGIC {
335+
Ok(crate::Hint::Archive)
336+
} else {
337+
let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, 0)?;
338+
match magic {
339+
header::MH_CIGAM_64 | header::MH_CIGAM | header::MH_MAGIC_64 | header::MH_MAGIC => {
340+
if let Some(ctx) = maybe_ctx {
341+
Ok(crate::Hint::Mach(crate::HintData {
342+
is_lsb: ctx.le.is_little(),
343+
is_64: Some(ctx.container.is_big()),
344+
}))
345+
} else {
346+
Err(error::Error::Malformed(format!(
347+
"Correct mach magic {:#x} does not have a matching parsing context!",
348+
magic
349+
)))
350+
}
351+
}
352+
fat::FAT_MAGIC => {
353+
// should probably verify this is always Big Endian...
354+
let narchitectures = bytes.pread_with::<u32>(4, BE)? as usize;
355+
Ok(crate::Hint::MachFat(narchitectures))
356+
}
357+
_ => Ok(crate::Hint::Unknown(bytes.pread::<u64>(0)?)),
358+
}
359+
}
360+
}
361+
362+
fn extract_multi_entry(bytes: &[u8]) -> error::Result<SingleArch> {
363+
if let Some(hint_bytes) = take_hint_bytes(bytes) {
364+
match peek_bytes(hint_bytes)? {
365+
crate::Hint::Mach(_) => {
366+
let binary = MachO::parse(bytes, 0)?;
367+
Ok(SingleArch::MachO(binary))
368+
}
369+
crate::Hint::Archive => {
370+
let archive = archive::Archive::parse(bytes)?;
371+
Ok(SingleArch::Archive(archive))
372+
}
373+
_ => Err(error::Error::Malformed(format!(
374+
"multi-arch entry must be a Mach-O binary or an archive"
375+
))),
376+
}
377+
} else {
378+
Err(error::Error::Malformed(format!("Object is too small")))
379+
}
380+
}
381+
382+
impl<'a> Iterator for SingleArchIterator<'a> {
383+
type Item = error::Result<SingleArch<'a>>;
326384
fn next(&mut self) -> Option<Self::Item> {
327385
if self.index >= self.narches {
328386
None
@@ -333,8 +391,7 @@ impl<'a> Iterator for MachOIterator<'a> {
333391
match self.data.pread_with::<fat::FatArch>(offset, scroll::BE) {
334392
Ok(arch) => {
335393
let bytes = arch.slice(self.data);
336-
let binary = MachO::parse(bytes, 0);
337-
Some(binary)
394+
Some(extract_multi_entry(bytes))
338395
}
339396
Err(e) => Some(Err(e.into())),
340397
}
@@ -343,10 +400,10 @@ impl<'a> Iterator for MachOIterator<'a> {
343400
}
344401

345402
impl<'a, 'b> IntoIterator for &'b MultiArch<'a> {
346-
type Item = error::Result<MachO<'a>>;
347-
type IntoIter = MachOIterator<'a>;
403+
type Item = error::Result<SingleArch<'a>>;
404+
type IntoIter = SingleArchIterator<'a>;
348405
fn into_iter(self) -> Self::IntoIter {
349-
MachOIterator {
406+
SingleArchIterator {
350407
index: 0,
351408
data: self.data,
352409
narches: self.narches,
@@ -387,7 +444,7 @@ impl<'a> MultiArch<'a> {
387444
Ok(arches)
388445
}
389446
/// Try to get the Mach-o binary at `index`
390-
pub fn get(&self, index: usize) -> error::Result<MachO<'a>> {
447+
pub fn get(&self, index: usize) -> error::Result<SingleArch<'a>> {
391448
if index >= self.narches {
392449
return Err(error::Error::Malformed(format!(
393450
"Requested the {}-th binary, but there are only {} architectures in this container",
@@ -397,13 +454,13 @@ impl<'a> MultiArch<'a> {
397454
let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
398455
let arch = self.data.pread_with::<fat::FatArch>(offset, scroll::BE)?;
399456
let bytes = arch.slice(self.data);
400-
Ok(MachO::parse(bytes, 0)?)
457+
extract_multi_entry(bytes)
401458
}
402459

403460
pub fn find<F: Fn(error::Result<fat::FatArch>) -> bool>(
404461
&'a self,
405462
f: F,
406-
) -> Option<error::Result<MachO<'a>>> {
463+
) -> Option<error::Result<SingleArch<'a>>> {
407464
for (i, arch) in self.iter_arches().enumerate() {
408465
if f(arch) {
409466
return Some(self.get(i));
@@ -464,3 +521,66 @@ impl<'a> Mach<'a> {
464521
}
465522
}
466523
}
524+
525+
#[cfg(test)]
526+
mod test {
527+
use super::{Mach, SingleArch};
528+
529+
#[test]
530+
fn parse_multi_arch_of_macho_binaries() {
531+
// Create via:
532+
// clang -arch arm64 -shared -o /tmp/hello_world_arm hello_world.c
533+
// clang -arch x86_64 -shared -o /tmp/hello_world_x86_64 hello_world.c
534+
// lipo -create -output hello_world_fat_binaries /tmp/hello_world_arm /tmp/hello_world_x86_64
535+
// strip hello_world_fat_binaries
536+
let bytes = include_bytes!(concat!(
537+
env!("CARGO_MANIFEST_DIR"),
538+
"/assets/hello_world_fat_binaries"
539+
));
540+
let mach = Mach::parse(bytes).expect("failed to parse input file");
541+
match mach {
542+
Mach::Fat(fat) => {
543+
assert!(fat.into_iter().count() > 0);
544+
for entry in fat.into_iter() {
545+
let entry = entry.expect("failed to read entry");
546+
match entry {
547+
SingleArch::MachO(macho) => {
548+
assert!(macho.symbols().count() > 0);
549+
}
550+
_ => panic!("expected MultiArchEntry::MachO, got {:?}", entry),
551+
}
552+
}
553+
}
554+
Mach::Binary(_) => panic!("expected Mach::Fat, got Mach::Binary"),
555+
}
556+
}
557+
558+
#[test]
559+
fn parse_multi_arch_of_archives() {
560+
// Created with:
561+
// clang -c -o /tmp/hello_world.o hello_world.c
562+
// ar -r /tmp/hello_world.a /tmp/hello_world.o
563+
// lipo -create -output hello_world_fat_archives /tmp/hello_world.a
564+
// strip hello_world_fat_archives
565+
let bytes = include_bytes!(concat!(
566+
env!("CARGO_MANIFEST_DIR"),
567+
"/assets/hello_world_fat_archives"
568+
));
569+
let mach = Mach::parse(bytes).expect("failed to parse input file");
570+
match mach {
571+
Mach::Fat(fat) => {
572+
assert!(fat.into_iter().count() > 0);
573+
for entry in fat.into_iter() {
574+
let entry = entry.expect("failed to read entry");
575+
match entry {
576+
SingleArch::Archive(archive) => {
577+
assert!(!archive.members().is_empty())
578+
}
579+
_ => panic!("expected MultiArchEntry::Archive, got {:?}", entry),
580+
}
581+
}
582+
}
583+
Mach::Binary(_) => panic!("expected Mach::Fat, got Mach::Binary"),
584+
}
585+
}
586+
}

0 commit comments

Comments
 (0)