Skip to content

Add support for scanning jar from loaded class #8370

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,14 @@ private static void startDynamicInstrumentation(
configurationPoller = sco.configurationPoller(config);
if (configurationPoller != null) {
if (config.isSymbolDatabaseEnabled()) {
SymbolAggregator symbolAggregator =
new SymbolAggregator(
classNameFilter,
debuggerSink.getSymbolSink(),
config.getSymbolDatabaseFlushThreshold());
symbolAggregator.start();
symDBEnablement =
new SymDBEnablement(
instrumentation,
config,
new SymbolAggregator(
debuggerSink.getSymbolSink(), config.getSymbolDatabaseFlushThreshold()),
classNameFilter);
new SymDBEnablement(instrumentation, config, symbolAggregator, classNameFilter);
if (config.isSymbolDatabaseForceUpload()) {
symDBEnablement.startSymbolExtraction();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package com.datadog.debugger.symbol;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class BasicSymDBReport implements SymDBReport {
private static final Logger LOGGER = LoggerFactory.getLogger(BasicSymDBReport.class);

private final Set<String> missingJars = new HashSet<>();
private final Map<String, String> ioExceptions = new HashMap<>();
private final List<String> locationErrors = new ArrayList<>();
private final Map<String, Integer> classCountByJar = new HashMap<>();
private final List<String> scannedJars = new ArrayList<>();

public void addMissingJar(String jarPath) {
missingJars.add(jarPath);
}

public void addIOException(String jarPath, IOException e) {
ioExceptions.put(jarPath, e.toString());
}

public void addLocationError(String locationStr) {
locationErrors.add(locationStr);
}

public void incClassCount(String jarPath) {
classCountByJar.compute(jarPath, (k, v) -> v == null ? 1 : v + 1);
}

public void addScannedJar(String jarPath) {
scannedJars.add(jarPath);
}

public void report() {
int totalClasses = classCountByJar.values().stream().mapToInt(Integer::intValue).sum();
String content =
String.format(
"SymDB Report: Scanned jar count=%d, Total class count=%d, class count by jar: %s, Scanned jars: %s, Location errors: %s Missing jars: %s IOExceptions: %s",
scannedJars.size(),
totalClasses,
classCountByJar,
scannedJars,
locationErrors,
missingJars,
ioExceptions);
LOGGER.info(content);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,7 @@ public static Path extractJarPath(ProtectionDomain protectionDomain, SymDBReport
} else if (locationStr.startsWith(FILE_PREFIX)) {
return getPathFromPrefixedFileName(locationStr, FILE_PREFIX, locationStr.length());
}
if (symDBReport != null) {
symDBReport.addLocationError(locationStr);
}
symDBReport.addLocationError(locationStr);
return null;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
package com.datadog.debugger.symbol;

import static com.datadog.debugger.symbol.JarScanner.trimPrefixes;

import com.datadog.debugger.util.MoshiHelper;
import com.squareup.moshi.JsonAdapter;
import datadog.remoteconfig.PollingRateHinter;
Expand All @@ -10,34 +8,24 @@
import datadog.trace.api.Config;
import datadog.trace.bootstrap.debugger.DebuggerContext.ClassNameFilter;
import datadog.trace.util.AgentTaskScheduler;
import datadog.trace.util.Strings;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.lang.instrument.Instrumentation;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
import java.util.regex.Pattern;
import okio.Okio;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SymDBEnablement implements ProductListener {
private static final Logger LOGGER = LoggerFactory.getLogger(SymDBEnablement.class);
private static final Pattern COMMA_PATTERN = Pattern.compile(",");
private static final JsonAdapter<SymDbRemoteConfigRecord> SYM_DB_JSON_ADAPTER =
MoshiHelper.createMoshiConfig().adapter(SymDbRemoteConfigRecord.class);
private static final String SYM_DB_RC_KEY = "symDb";
Expand Down Expand Up @@ -120,7 +108,7 @@ public void startSymbolExtraction() {
symbolExtractionTransformer =
new SymbolExtractionTransformer(symbolAggregator, classNameFilter);
instrumentation.addTransformer(symbolExtractionTransformer);
SymDBReport symDBReport = new SymDBReport();
SymDBReport symDBReport = new BasicSymDBReport();
extractSymbolForLoadedClasses(symDBReport);
symDBReport.report();
lastUploadTimestamp = System.currentTimeMillis();
Expand All @@ -145,7 +133,6 @@ private void extractSymbolForLoadedClasses(SymDBReport symDBReport) {
LOGGER.debug("Failed to get all loaded classes", ex);
return;
}
Set<String> alreadyScannedJars = new HashSet<>();
byte[] buffer = new byte[READ_BUFFER_SIZE];
ByteArrayOutputStream baos = new ByteArrayOutputStream(CLASSFILE_BUFFER_SIZE);
for (Class<?> clazz : classesToExtract) {
Expand All @@ -162,86 +149,7 @@ private void extractSymbolForLoadedClasses(SymDBReport symDBReport) {
symDBReport.addMissingJar(jarPath.toString());
continue;
}
File jarPathFile = jarPath.toFile();
if (jarPathFile.isDirectory()) {
scanDirectory(jarPath, alreadyScannedJars, baos, buffer, symDBReport);
alreadyScannedJars.add(jarPath.toString());
continue;
}
if (alreadyScannedJars.contains(jarPath.toString())) {
continue;
}
try {
try (JarFile jarFile = new JarFile(jarPathFile)) {
jarFile.stream()
.filter(jarEntry -> jarEntry.getName().endsWith(".class"))
.filter(
jarEntry ->
!classNameFilter.isExcluded(
Strings.getClassName(trimPrefixes(jarEntry.getName()))))
.forEach(jarEntry -> parseJarEntry(jarEntry, jarFile, jarPath, baos, buffer));
}
alreadyScannedJars.add(jarPath.toString());
} catch (IOException e) {
symDBReport.addIOException(jarPath.toString(), e);
throw new RuntimeException(e);
}
}
}

private void scanDirectory(
Path jarPath,
Set<String> alreadyScannedJars,
ByteArrayOutputStream baos,
byte[] buffer,
SymDBReport symDBReport) {
try {
Files.walk(jarPath)
// explicitly no follow links walking the directory to avoid cycles
.filter(path -> Files.isRegularFile(path, LinkOption.NOFOLLOW_LINKS))
.filter(path -> path.toString().endsWith(".class"))
.filter(
path ->
!classNameFilter.isExcluded(
Strings.getClassName(trimPrefixes(jarPath.relativize(path).toString()))))
.forEach(path -> parseFileEntry(path, jarPath, baos, buffer));
alreadyScannedJars.add(jarPath.toString());
} catch (IOException e) {
symDBReport.addIOException(jarPath.toString(), e);
throw new RuntimeException(e);
}
}

private void parseFileEntry(Path path, Path jarPath, ByteArrayOutputStream baos, byte[] buffer) {
LOGGER.debug("parsing file class: {}", path.toString());
try {
try (InputStream inputStream = Files.newInputStream(path)) {
int readBytes;
baos.reset();
while ((readBytes = inputStream.read(buffer)) != -1) {
baos.write(buffer, 0, readBytes);
}
symbolAggregator.parseClass(
path.getFileName().toString(), baos.toByteArray(), jarPath.toString());
}
} catch (IOException ex) {
LOGGER.debug("Exception during parsing file class: {}", path, ex);
}
}

private void parseJarEntry(
JarEntry jarEntry, JarFile jarFile, Path jarPath, ByteArrayOutputStream baos, byte[] buffer) {
LOGGER.debug("parsing jarEntry class: {}", jarEntry.getName());
try {
InputStream inputStream = jarFile.getInputStream(jarEntry);
int readBytes;
baos.reset();
while ((readBytes = inputStream.read(buffer)) != -1) {
baos.write(buffer, 0, readBytes);
}
symbolAggregator.parseClass(jarEntry.getName(), baos.toByteArray(), jarPath.toString());
} catch (IOException ex) {
LOGGER.debug("Exception during parsing jarEntry class: {}", jarEntry.getName(), ex);
symbolAggregator.scanJar(symDBReport, jarPath, baos, buffer);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,42 +1,39 @@
package com.datadog.debugger.symbol;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SymDBReport {
private static final Logger LOGGER = LoggerFactory.getLogger(SymDBReport.class);

private final Set<String> missingJars = new HashSet<>();
private final Map<String, String> ioExceptions = new HashMap<>();
private final List<String> locationErrors = new ArrayList<>();

public void addMissingJar(String jarPath) {
missingJars.add(jarPath);
}

public void addIOException(String jarPath, IOException e) {
ioExceptions.put(jarPath, e.toString());
}

public void addLocationError(String locationStr) {
locationErrors.add(locationStr);
}

public void report() {
String content =
"== SymDB Report == Location errors:"
+ locationErrors
+ " Missing jars: "
+ missingJars
+ " IOExceptions: "
+ ioExceptions;
LOGGER.info(content);
}

public interface SymDBReport {

void addMissingJar(String jarPath);

void addIOException(String jarPath, IOException e);

void addLocationError(String locationStr);

void incClassCount(String jarPath);

void addScannedJar(String jarPath);

void report();

SymDBReport NO_OP =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So by the usage of the NO_OP report, I assume we only want a symdb report on the initial scan, not on the continuous ones? I think it might be cool if we had a periodic report as well. Not required, just nice to have.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I focus on the initial process. It would be interesting to have it periodically, but not sure if relevant right now as the usage of logs in the wild is pretty heterogeneous.
will consider it if we still have issues with the coutnuous scan process

new SymDBReport() {
@Override
public void addMissingJar(String jarPath) {}

@Override
public void addIOException(String jarPath, IOException e) {}

@Override
public void addLocationError(String locationStr) {}

@Override
public void incClassCount(String jarPath) {}

@Override
public void addScannedJar(String jarPath) {}

@Override
public void report() {}
};
}
Loading
Loading