1
1
package com .datadog .debugger .util ;
2
2
3
+ import java .nio .charset .StandardCharsets ;
3
4
import org .objectweb .asm .ClassReader ;
4
5
import org .objectweb .asm .tree .ClassNode ;
5
6
6
7
/** Helper class for extracting information of a class file */
7
8
public class ClassFileHelper {
9
+ private static final int CONSTANT_POOL_COUNT_OFFSET = 8 ;
10
+ private static final int CONSTANT_POOL_BASE_OFFSET = 10 ;
11
+
8
12
public static String extractSourceFile (byte [] classFileBuffer ) {
9
- // TODO maybe by scanning the byte array directly we can avoid doing an expensive parsing
13
+ return extractSourceFileOffsetVersion (classFileBuffer );
14
+ }
15
+
16
+ // Version using ASM library
17
+ private static String extractSourceFileASM (byte [] classFileBuffer ) {
10
18
ClassReader classReader = new ClassReader (classFileBuffer );
11
19
ClassNode classNode = new ClassNode ();
12
20
classReader .accept (classNode , ClassReader .SKIP_FRAMES );
@@ -33,4 +41,126 @@ public static String stripPackagePath(String classPath) {
33
41
}
34
42
return classPath ;
35
43
}
44
+
45
+ // Based on JVM spec: https://docs.oracle.com/javase/specs/jvms/se23/html/jvms-4.html
46
+ // Extracts the SourceFile attribute from a Java class file byte array with minimal parsing.
47
+ // This method is based on the JVM spec and does not use any external libraries.
48
+ // We are scanning the constant pool to keep file offsets for later fetching of the SourceFile
49
+ // attribute value. As the constant pool is a variable length structure, we need to scan them
50
+ // and based on the tag, we can calculate the length of the entry to skip to the next one.
51
+ private static String extractSourceFileOffsetVersion (byte [] classFileBytes ) {
52
+ // Quick validation of minimum class file size and magic number
53
+ if (classFileBytes == null
54
+ || classFileBytes .length < 10
55
+ || classFileBytes [0 ] != (byte ) 0xCA
56
+ || classFileBytes [1 ] != (byte ) 0xFE
57
+ || classFileBytes [2 ] != (byte ) 0xBA
58
+ || classFileBytes [3 ] != (byte ) 0xBE ) {
59
+ return null ;
60
+ }
61
+ int constantPoolCount = readUnsignedShort (classFileBytes , CONSTANT_POOL_COUNT_OFFSET );
62
+ int [] constantPoolOffsets = new int [constantPoolCount ];
63
+ int currentOffset = CONSTANT_POOL_BASE_OFFSET ;
64
+ // based on the JVM spec, constant pool starts from index 1 until constantPoolCount - 1
65
+ for (int i = 0 ; i < constantPoolCount - 1 ; i ++) {
66
+ constantPoolOffsets [i ] = currentOffset ;
67
+ int tag = classFileBytes [constantPoolOffsets [i ]];
68
+ switch (tag ) {
69
+ case 1 : // CONSTANT_Utf8
70
+ int length = readUnsignedShort (classFileBytes , constantPoolOffsets [i ] + 1 );
71
+ currentOffset += 3 + length ;
72
+ break ;
73
+ case 7 : // CONSTANT_Class
74
+ case 8 : // CONSTANT_String
75
+ case 16 : // CONSTANT_MethodType
76
+ case 19 : // CONSTANT_Module
77
+ case 20 : // CONSTANT_Package
78
+ currentOffset += 3 ;
79
+ break ;
80
+ case 15 : // CONSTANT_MethodHandle
81
+ currentOffset += 4 ;
82
+ break ;
83
+ case 3 : // CONSTANT_Integer
84
+ case 4 : // CONSTANT_Float
85
+ case 9 : // CONSTANT_Fieldref
86
+ case 10 : // CONSTANT_Methodref
87
+ case 11 : // CONSTANT_InterfaceMethodref
88
+ case 12 : // CONSTANT_NameAndType
89
+ case 17 : // CONSTANT_Dynamic
90
+ case 18 : // CONSTANT_InvokeDynamic
91
+ currentOffset += 5 ;
92
+ break ;
93
+ case 5 : // CONSTANT_Long
94
+ case 6 : // CONSTANT_Double
95
+ currentOffset += 9 ;
96
+ i ++; // Double slot
97
+ break ;
98
+ default :
99
+ throw new IllegalArgumentException ("Unknown constant pool tag: " + tag );
100
+ }
101
+ }
102
+ currentOffset += 2 ; // Skip access flags
103
+ currentOffset += 2 ; // Skip this class
104
+ currentOffset += 2 ; // Skip super class
105
+ int interfacesCount = readUnsignedShort (classFileBytes , currentOffset );
106
+ currentOffset += 2 + interfacesCount * 2 ; // Skip interfaces
107
+ // skip fields
108
+ currentOffset = skipFieldsOrMethods (classFileBytes , currentOffset );
109
+ // skip Methods
110
+ currentOffset = skipFieldsOrMethods (classFileBytes , currentOffset );
111
+ int attributesCount = readUnsignedShort (classFileBytes , currentOffset );
112
+ currentOffset += 2 ; // Skip attributes count
113
+ for (int i = 0 ; i < attributesCount ; i ++) {
114
+ int attributeNameIndex = readUnsignedShort (classFileBytes , currentOffset );
115
+ currentOffset += 2 ; // Skip attribute name index
116
+ int attributeLength = (int ) readUnsignedInt (classFileBytes , currentOffset );
117
+ currentOffset += 4 ; // Skip attribute length
118
+ if (attributeNameIndex == 0 ) {
119
+ continue ;
120
+ }
121
+ // read attribute name
122
+ int utf8Offset = constantPoolOffsets [attributeNameIndex - 1 ];
123
+ int utf8Len = readUnsignedShort (classFileBytes , utf8Offset + 1 );
124
+ String utf8 = new String (classFileBytes , utf8Offset + 3 , utf8Len , StandardCharsets .UTF_8 );
125
+ if ("SourceFile" .equals (utf8 )) {
126
+ // read SourceFile attribute
127
+ int sourceFileIndex = readUnsignedShort (classFileBytes , currentOffset );
128
+ int sourceFileOffset = constantPoolOffsets [sourceFileIndex - 1 ];
129
+ int sourceFileLen = readUnsignedShort (classFileBytes , sourceFileOffset + 1 );
130
+ return new String (
131
+ classFileBytes , sourceFileOffset + 3 , sourceFileLen , StandardCharsets .UTF_8 );
132
+ }
133
+ currentOffset += attributeLength ; // Skip attribute data
134
+ }
135
+ return null ;
136
+ }
137
+
138
+ private static int skipFieldsOrMethods (byte [] classFileBytes , int currentOffset ) {
139
+ int fieldsCount = readUnsignedShort (classFileBytes , currentOffset );
140
+ currentOffset += 2 ; // Skip count
141
+ for (int i = 0 ; i < fieldsCount ; i ++) {
142
+ currentOffset += 6 ; // Skip access flags, name index, descriptor index
143
+ int attributesCount = readUnsignedShort (classFileBytes , currentOffset );
144
+ currentOffset += 2 ; // Skip attributes count
145
+ for (int j = 0 ; j < attributesCount ; j ++) {
146
+ currentOffset += 2 ; // Skip attribute name index
147
+ int attributeLength = (int ) readUnsignedInt (classFileBytes , currentOffset );
148
+ currentOffset += 4 + attributeLength ; // Skip attribute length and data
149
+ }
150
+ }
151
+ return currentOffset ;
152
+ }
153
+
154
+ // read unsigned short from byte array
155
+ private static int readUnsignedShort (byte [] bytes , int offset ) {
156
+ return ((bytes [offset ] & 0xFF ) << 8 ) | (bytes [offset + 1 ] & 0xFF );
157
+ }
158
+
159
+ // read unsigned int from byte array
160
+ private static long readUnsignedInt (byte [] bytes , int offset ) {
161
+ return ((long ) (bytes [offset ] & 0xFF ) << 24 )
162
+ + ((bytes [offset + 1 ] & 0xFF ) << 16 )
163
+ + ((bytes [offset + 2 ] & 0xFF ) << 8 )
164
+ + (bytes [offset + 3 ] & 0xFF );
165
+ }
36
166
}
0 commit comments