apolinario commited on
Commit
7615b9a
·
1 Parent(s): a4a8094

attempt to save in file

Browse files
Files changed (1) hide show
  1. ui/src/app/api/hf-jobs/route.ts +52 -62
ui/src/app/api/hf-jobs/route.ts CHANGED
@@ -1,6 +1,6 @@
1
  import { NextRequest, NextResponse } from 'next/server';
2
  import { spawn } from 'child_process';
3
- import { writeFile } from 'fs/promises';
4
  import path from 'path';
5
  import { tmpdir } from 'os';
6
 
@@ -1033,85 +1033,75 @@ async function checkHFJobStatus(token: string, jobId: string, jobNamespace?: str
1033
  }
1034
 
1035
  async function checkHFJobsCapacity(token: string): Promise<any> {
1036
- return new Promise((resolve, reject) => {
1037
  console.log('Checking HF Jobs capacity for namespace: lora-training-frenzi');
1038
- const args = [
1039
- 'jobs', 'ps',
1040
- '--namespace', 'lora-training-frenzi',
1041
- '--token', token
1042
- ];
1043
 
1044
- const childProcess = spawn('hf', args, {
 
 
 
 
 
 
 
1045
  env: {
1046
  ...process.env,
1047
  HF_TOKEN: token,
1048
- TERM: 'dumb', // Disable fancy terminal features
1049
- NO_COLOR: '1', // Disable color output
1050
- },
1051
- stdio: ['ignore', 'pipe', 'pipe'] // Explicitly set stdio to avoid TTY issues
1052
  });
1053
 
1054
- let output = '';
1055
- let error = '';
1056
-
1057
- childProcess.stdout.on('data', (data) => {
1058
- const text = data.toString();
1059
- output += text;
1060
- });
1061
 
1062
- childProcess.stderr.on('data', (data) => {
1063
- const text = data.toString();
1064
- error += text;
1065
- });
1066
 
1067
- childProcess.on('close', (code) => {
1068
- console.log(`hf jobs ps process exited with code: ${code}`);
1069
- console.log('=== RAW OUTPUT START ===');
1070
- console.log(output);
1071
- console.log('=== RAW OUTPUT END ===');
1072
- console.log('=== RAW ERROR START ===');
1073
- console.log(error);
1074
- console.log('=== RAW ERROR END ===');
1075
 
1076
- if (code === 0) {
1077
- try {
1078
- // Count RUNNING jobs in the output
1079
- // Split by newline and filter out empty lines
1080
- const lines = output.split(/\r?\n/).filter(line => line.trim().length > 0);
1081
- let runningCount = 0;
1082
 
1083
- console.log(`Total non-empty lines in output: ${lines.length}`);
1084
 
1085
- for (let i = 0; i < lines.length; i++) {
1086
- const line = lines[i];
1087
- console.log(`Line ${i}: "${line}"`);
1088
 
1089
- // Check if line contains RUNNING (case-sensitive as shown in your output)
1090
- if (line.includes('RUNNING')) {
1091
- runningCount++;
1092
- console.log(` ✓ Line ${i} contains RUNNING (count: ${runningCount})`);
1093
- }
1094
  }
 
1095
 
1096
- const atCapacity = runningCount >= 32;
1097
 
1098
- console.log(`\n=== FINAL COUNT ===`);
1099
- console.log(`Found ${runningCount} RUNNING jobs. At capacity: ${atCapacity}`);
1100
- console.log(`==================\n`);
1101
 
1102
- resolve({
1103
- runningJobs: runningCount,
1104
- atCapacity,
1105
- capacityLimit: 32,
1106
- });
1107
- } catch (parseError: any) {
1108
- console.error('Failed to parse jobs ps output:', parseError);
1109
- reject(new Error('Failed to parse capacity status'));
1110
  }
1111
- } else {
1112
- console.error('hf jobs ps failed with code:', code);
1113
- console.error('Error output:', error);
1114
- reject(new Error(error || output || 'Failed to check capacity'));
 
 
 
 
 
1115
  }
1116
  });
1117
 
 
1
  import { NextRequest, NextResponse } from 'next/server';
2
  import { spawn } from 'child_process';
3
+ import { writeFile, readFile, unlink } from 'fs/promises';
4
  import path from 'path';
5
  import { tmpdir } from 'os';
6
 
 
1033
  }
1034
 
1035
  async function checkHFJobsCapacity(token: string): Promise<any> {
1036
+ return new Promise(async (resolve, reject) => {
1037
  console.log('Checking HF Jobs capacity for namespace: lora-training-frenzi');
 
 
 
 
 
1038
 
1039
+ // Create a temporary file to store the output
1040
+ const tempFile = path.join(tmpdir(), `hf_jobs_ps_${Date.now()}.txt`);
1041
+ console.log(`Writing output to temp file: ${tempFile}`);
1042
+
1043
+ // Use shell redirection to write to file
1044
+ const command = `hf jobs ps --namespace lora-training-frenzi --token "${token}" > "${tempFile}" 2>&1`;
1045
+
1046
+ const childProcess = spawn('sh', ['-c', command], {
1047
  env: {
1048
  ...process.env,
1049
  HF_TOKEN: token,
1050
+ TERM: 'dumb',
1051
+ NO_COLOR: '1',
1052
+ }
 
1053
  });
1054
 
1055
+ childProcess.on('close', async (code) => {
1056
+ console.log(`hf jobs ps process exited with code: ${code}`);
 
 
 
 
 
1057
 
1058
+ try {
1059
+ // Read the output from the temporary file
1060
+ const output = await readFile(tempFile, 'utf-8');
 
1061
 
1062
+ console.log('=== RAW OUTPUT START ===');
1063
+ console.log(output);
1064
+ console.log('=== RAW OUTPUT END ===');
 
 
 
 
 
1065
 
1066
+ // Count RUNNING jobs in the output
1067
+ // Split by newline and filter out empty lines
1068
+ const lines = output.split(/\r?\n/).filter(line => line.trim().length > 0);
1069
+ let runningCount = 0;
 
 
1070
 
1071
+ console.log(`Total non-empty lines in output: ${lines.length}`);
1072
 
1073
+ for (let i = 0; i < lines.length; i++) {
1074
+ const line = lines[i];
1075
+ console.log(`Line ${i}: "${line}"`);
1076
 
1077
+ // Check if line contains RUNNING (case-sensitive as shown in your output)
1078
+ if (line.includes('RUNNING')) {
1079
+ runningCount++;
1080
+ console.log(` ✓ Line ${i} contains RUNNING (count: ${runningCount})`);
 
1081
  }
1082
+ }
1083
 
1084
+ const atCapacity = runningCount >= 32;
1085
 
1086
+ console.log(`\n=== FINAL COUNT ===`);
1087
+ console.log(`Found ${runningCount} RUNNING jobs. At capacity: ${atCapacity}`);
1088
+ console.log(`==================\n`);
1089
 
1090
+ // Clean up temp file
1091
+ try {
1092
+ await unlink(tempFile);
1093
+ } catch (unlinkError) {
1094
+ console.warn('Failed to delete temp file:', unlinkError);
 
 
 
1095
  }
1096
+
1097
+ resolve({
1098
+ runningJobs: runningCount,
1099
+ atCapacity,
1100
+ capacityLimit: 32,
1101
+ });
1102
+ } catch (parseError: any) {
1103
+ console.error('Failed to read or parse jobs ps output:', parseError);
1104
+ reject(new Error('Failed to parse capacity status'));
1105
  }
1106
  });
1107