akseljoonas HF Staff Claude Sonnet 4.5 commited on
Commit
e5a148a
·
1 Parent(s): bc8323d

Filter UV package installation output from HF Jobs logs

Browse files

Adds filtering to reduce UV package installation noise in job logs
while preserving the installation summary for context.

Changes:
- Add _filter_uv_install_output() function with regex pattern matching
- Apply filter in _run_job() before returning logs to agent
- Replace installation details with "[installs truncated]" message
- Keep "Installed X packages in Y ms/s" summary line visible
- Supports singular/plural, ms/s units, and decimal time values
- Add comprehensive tests covering various edge cases

Example output:
[installs truncated]
Installed 68 packages in 251ms
Running main script...
Hello from the job!

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

agent/tools/jobs_tool.py CHANGED
@@ -7,6 +7,7 @@ Refactored to use official huggingface-hub library instead of custom HTTP client
7
  import asyncio
8
  import base64
9
  import os
 
10
  from typing import Any, Dict, Literal, Optional
11
 
12
  from huggingface_hub import HfApi
@@ -76,6 +77,44 @@ OperationType = Literal[
76
  UV_DEFAULT_IMAGE = "ghcr.io/astral-sh/uv:python3.12-bookworm"
77
 
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def _add_environment_variables(params: Dict[str, Any] | None) -> Dict[str, Any]:
80
  token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or ""
81
 
@@ -389,8 +428,11 @@ class HfJobsTool:
389
  namespace=self.namespace,
390
  )
391
 
 
 
 
392
  # Format all logs for the agent
393
- log_text = "\n".join(all_logs) if all_logs else "(no logs)"
394
 
395
  response = f"""{job_type} job completed!
396
 
 
7
  import asyncio
8
  import base64
9
  import os
10
+ import re
11
  from typing import Any, Dict, Literal, Optional
12
 
13
  from huggingface_hub import HfApi
 
77
  UV_DEFAULT_IMAGE = "ghcr.io/astral-sh/uv:python3.12-bookworm"
78
 
79
 
80
+ def _filter_uv_install_output(logs: list[str]) -> list[str]:
81
+ """
82
+ Filter out UV package installation output from logs.
83
+
84
+ Replaces installation details with "[installs truncated]" and keeps
85
+ the "Installed X packages in Y ms/s" summary line.
86
+
87
+ Args:
88
+ logs: List of log lines
89
+
90
+ Returns:
91
+ Filtered list of log lines
92
+ """
93
+ if not logs:
94
+ return logs
95
+
96
+ # Regex pattern to match: "Installed X packages in Y ms" or "Installed X package in Y s"
97
+ install_pattern = re.compile(
98
+ r"^Installed\s+\d+\s+packages?\s+in\s+\d+(?:\.\d+)?\s*(?:ms|s)$"
99
+ )
100
+
101
+ # Find the index of the "Installed X packages" line
102
+ install_line_idx = None
103
+ for idx, line in enumerate(logs):
104
+ if install_pattern.match(line.strip()):
105
+ install_line_idx = idx
106
+ break
107
+
108
+ # If pattern found, replace installation details with truncation message
109
+ if install_line_idx is not None and install_line_idx > 0:
110
+ # Keep logs from the "Installed X packages" line onward
111
+ # Add truncation message before the "Installed" line
112
+ return ["[installs truncated]"] + logs[install_line_idx:]
113
+
114
+ # If pattern not found, return original logs
115
+ return logs
116
+
117
+
118
  def _add_environment_variables(params: Dict[str, Any] | None) -> Dict[str, Any]:
119
  token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or ""
120
 
 
428
  namespace=self.namespace,
429
  )
430
 
431
+ # Filter out UV package installation output
432
+ filtered_logs = _filter_uv_install_output(all_logs)
433
+
434
  # Format all logs for the agent
435
+ log_text = "\n".join(filtered_logs) if filtered_logs else "(no logs)"
436
 
437
  response = f"""{job_type} job completed!
438
 
tests/unit/tools/test_jobs_tool.py CHANGED
@@ -452,3 +452,86 @@ async def test_list_jobs_with_status_filter():
452
  assert "job-3" in result["formatted"]
453
  assert "job-1" not in result["formatted"]
454
  assert result["resultsShared"] == 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  assert "job-3" in result["formatted"]
453
  assert "job-1" not in result["formatted"]
454
  assert result["resultsShared"] == 1
455
+
456
+
457
+ def test_filter_uv_install_output():
458
+ """Test filtering of UV package installation output"""
459
+ from agent.tools.jobs_tool import _filter_uv_install_output
460
+
461
+ # Test case 1: Logs with UV installation output
462
+ logs_with_install = [
463
+ "Resolved 68 packages in 1.01s",
464
+ "Installed 68 packages in 251ms",
465
+ "Hello from the script!",
466
+ "Script execution completed",
467
+ ]
468
+
469
+ filtered = _filter_uv_install_output(logs_with_install)
470
+ assert len(filtered) == 4
471
+ assert filtered[0] == "[installs truncated]"
472
+ assert filtered[1] == "Installed 68 packages in 251ms"
473
+ assert filtered[2] == "Hello from the script!"
474
+ assert filtered[3] == "Script execution completed"
475
+
476
+ # Test case 2: Logs without UV installation output
477
+ logs_without_install = [
478
+ "Script started",
479
+ "Processing data...",
480
+ "Done!",
481
+ ]
482
+
483
+ filtered = _filter_uv_install_output(logs_without_install)
484
+ assert len(filtered) == 3
485
+ assert filtered == logs_without_install
486
+
487
+ # Test case 3: Empty logs
488
+ assert _filter_uv_install_output([]) == []
489
+
490
+ # Test case 4: Different time formats (ms vs s)
491
+ logs_with_seconds = [
492
+ "Downloading packages...",
493
+ "Installed 10 packages in 2s",
494
+ "Running main.py",
495
+ ]
496
+
497
+ filtered = _filter_uv_install_output(logs_with_seconds)
498
+ assert len(filtered) == 3
499
+ assert filtered[0] == "[installs truncated]"
500
+ assert filtered[1] == "Installed 10 packages in 2s"
501
+ assert filtered[2] == "Running main.py"
502
+
503
+ # Test case 5: Single package
504
+ logs_single_package = [
505
+ "Resolving dependencies",
506
+ "Installed 1 package in 50ms",
507
+ "Import successful",
508
+ ]
509
+
510
+ filtered = _filter_uv_install_output(logs_single_package)
511
+ assert len(filtered) == 3
512
+ assert filtered[0] == "[installs truncated]"
513
+ assert filtered[1] == "Installed 1 package in 50ms"
514
+ assert filtered[2] == "Import successful"
515
+
516
+ # Test case 6: Decimal time values
517
+ logs_decimal_time = [
518
+ "Starting installation",
519
+ "Installed 25 packages in 125.5ms",
520
+ "All dependencies ready",
521
+ ]
522
+
523
+ filtered = _filter_uv_install_output(logs_decimal_time)
524
+ assert len(filtered) == 3
525
+ assert filtered[0] == "[installs truncated]"
526
+ assert filtered[1] == "Installed 25 packages in 125.5ms"
527
+ assert filtered[2] == "All dependencies ready"
528
+
529
+ # Test case 7: "Installed" line is first (no truncation needed)
530
+ logs_install_first = [
531
+ "Installed 5 packages in 100ms",
532
+ "Running script...",
533
+ ]
534
+
535
+ filtered = _filter_uv_install_output(logs_install_first)
536
+ # No truncation message if "Installed" is the first line
537
+ assert filtered == logs_install_first