in doc-architect/doc-architect-core/src/main/java/com/docarchitect/core/scanner/impl/python/SqlAlchemyScanner.java [556:633]
private List<PythonAst.Field> getFieldsDefinedInClass(PythonAst.PythonClass pythonClass, String fileContent) {
if (fileContent == null) {
return pythonClass.fields();
}
// Find the class definition in the file
String classPattern = "class\\s+" + Pattern.quote(pythonClass.name()) + "\\s*\\([^)]*\\):";
Pattern pattern = Pattern.compile(classPattern);
Matcher classMatcher = pattern.matcher(fileContent);
if (!classMatcher.find()) {
// Can't find class definition, use regex parsing as fallback
log.debug("Could not find class definition for {}", pythonClass.name());
return List.of();
}
int classStart = classMatcher.end();
// Find the end of the class by looking for the next class definition or end of file
// or the next line with zero indentation (simple heuristic)
String remainingContent = fileContent.substring(classStart);
String[] lines = remainingContent.split("\n");
// Build the class body
StringBuilder classBody = new StringBuilder();
boolean foundFirstIndentedLine = false;
for (String line : lines) {
// Skip empty lines and comments at the start
if (!foundFirstIndentedLine && (line.trim().isEmpty() || line.trim().startsWith("#"))) {
continue;
}
// Check if this is an indented line (part of the class)
if (line.length() > 0 && (line.charAt(0) == ' ' || line.charAt(0) == '\t')) {
classBody.append(line).append("\n");
foundFirstIndentedLine = true;
} else if (foundFirstIndentedLine && !line.trim().isEmpty()) {
// Hit a non-indented, non-empty line - end of class
break;
}
}
String classBodyStr = classBody.toString();
// Parse fields directly from class body
// Pattern 1: field_name: type = value OR field_name: type (modern syntax)
// Pattern 2: field_name = value (legacy Column() syntax)
Pattern modernPattern = Pattern.compile("^\\s*([a-z_][a-z0-9_]*)\\s*:\\s*([^=\\n]+?)(?:\\s*=\\s*(.+?))?\\s*$", Pattern.MULTILINE);
Pattern legacyPattern = Pattern.compile("^\\s*([a-z_][a-z0-9_]*)\\s*=\\s*(.+?)\\s*$", Pattern.MULTILINE);
List<PythonAst.Field> result = new ArrayList<>();
// Try modern syntax first (with type annotations)
Matcher modernMatcher = modernPattern.matcher(classBodyStr);
while (modernMatcher.find()) {
String fieldName = modernMatcher.group(1);
String fieldType = modernMatcher.group(2).trim();
String fieldValue = modernMatcher.group(3) != null ? modernMatcher.group(3).trim() : null;
result.add(new PythonAst.Field(fieldName, fieldType, fieldValue, List.of()));
}
// If no modern fields found, try legacy syntax (without type annotations)
if (result.isEmpty()) {
Matcher legacyMatcher = legacyPattern.matcher(classBodyStr);
while (legacyMatcher.find()) {
String fieldName = legacyMatcher.group(1);
String fieldValue = legacyMatcher.group(2).trim();
// Legacy syntax doesn't have type annotations, type is in Column() call
result.add(new PythonAst.Field(fieldName, null, fieldValue, List.of()));
}
}
log.debug("Extracted {} fields from class body for {}", result.size(), pythonClass.name());
return result;
}