Spaces:
Running
Running
| from flask import Flask, request, jsonify, render_template_string | |
| from sentence_transformers import SentenceTransformer, util | |
| import logging | |
| import sys | |
| import signal | |
| # 初始化 Flask 应用 | |
| app = Flask(__name__) | |
| # 配置日志,级别设为 INFO | |
| logging.basicConfig(level=logging.INFO) | |
| app.logger = logging.getLogger("CodeSearchAPI") | |
| # 预定义代码片段 | |
| CODE_SNIPPETS = [ | |
| "echo 'Hello, World!';", | |
| "function add($a, $b) { return $a + $b; }", | |
| "$randomNumber = rand();", | |
| "function isEven($num) { return $num % 2 == 0; }", | |
| "strlen('example');", | |
| "date('Y-m-d');", | |
| "file_exists('example.txt');", | |
| "file_get_contents('example.txt');", | |
| "file_put_contents('example.txt', 'Hello, World!');", | |
| "date('H:i:s');", | |
| "strtoupper('example');", | |
| "strtolower('EXAMPLE');", | |
| "strrev('example');", | |
| "count([1, 2, 3]);", | |
| "max([1, 2, 3]);", | |
| "min([1, 2, 3]);", | |
| "sort([3, 1, 2]);", | |
| "array_merge([1, 2], [3, 4]);", | |
| "array_splice($array, $offset, $length);", | |
| "empty([]);", | |
| "substr_count('example', 'e');", | |
| "strpos('example', 'amp') !== false;", | |
| "strval(123);", | |
| "intval('123');", | |
| "is_numeric('123');", | |
| "array_search('value', $array);", | |
| "$array = [];", | |
| "array_reverse([1, 2, 3]);", | |
| "array_unique([1, 2, 2, 3]);", | |
| "in_array('value', $array);", | |
| "$array = ['key' => 'value'];", | |
| "$array['new_key'] = 'new_value';", | |
| "unset($array['key']);", | |
| "array_keys($array);", | |
| "array_values($array);", | |
| "array_merge($array1, $array2);", | |
| "empty($array);", | |
| "$array['key'];", | |
| "array_key_exists('key', $array);", | |
| "$array = [];", | |
| "count(file('example.txt'));", | |
| "file_put_contents('example.txt', implode(PHP_EOL, $array));", | |
| "file('example.txt', FILE_IGNORE_NEW_LINES);", | |
| "str_word_count(file_get_contents('example.txt'));", | |
| "function isLeapYear($year) { return ($year % 4 == 0 && ($year % 100 != 0 || $year % 400 == 0)); }", | |
| "date('Y-m-d H:i:s');", | |
| "(strtotime('2023-12-31') - strtotime('2023-01-01')) / (60 * 60 * 24);", | |
| "getcwd();", | |
| "scandir('.');", | |
| "mkdir('new_directory');", | |
| "rmdir('directory');", | |
| "is_file('example.txt');", | |
| "is_dir('directory');", | |
| "filesize('example.txt');", | |
| "rename('old.txt', 'new.txt');", | |
| "copy('source.txt', 'destination.txt');", | |
| "rename('source.txt', 'destination.txt');", | |
| "unlink('example.txt');", | |
| "getenv('PATH');", | |
| "putenv('PATH=/new/path');", | |
| "exec('start https://example.com');", | |
| "file_get_contents('https://example.com');", | |
| "json_decode('{\"key\":\"value\"}', true);", | |
| "file_put_contents('example.json', json_encode($data));", | |
| "json_decode(file_get_contents('example.json'), true);", | |
| "implode(',', $array);", | |
| "explode(',', 'a,b,c');", | |
| "implode(PHP_EOL, $array);", | |
| "explode(' ', 'a b c');", | |
| "explode(',', 'a,b,c');", | |
| "str_split('example');", | |
| "str_replace('old', 'new', 'old text');", | |
| "trim(' example ');", | |
| "preg_replace('/[^a-zA-Z0-9]/', '', 'example!');", | |
| "empty('');", | |
| "strrev('example') == 'example';", | |
| "fputcsv($file, $array);", | |
| "array_map('str_getcsv', file('example.csv'));", | |
| "count(file('example.csv'));", | |
| "shuffle($array);", | |
| "$array[array_rand($array)];", | |
| "array_rand($array, $num);", | |
| "rand(1, 6);", | |
| "rand(0, 1);", | |
| "substr(str_shuffle('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'), 0, 8);", | |
| "printf('#%06X', mt_rand(0, 0xFFFFFF));", | |
| "uniqid();", | |
| "class Example {}", | |
| "$example = new Example();", | |
| "class Example { function method() {} }", | |
| "class Example { public $property; }", | |
| "class Child extends Parent {}", | |
| "class Child extends Parent { function method() {} }", | |
| "Example::method();", | |
| "Example::staticMethod();", | |
| "is_object($example);", | |
| "get_object_vars($example);", | |
| "$example->property = 'value';", | |
| "unset($example->property);", | |
| "try{foo();}catch(e){}", | |
| "throw new Error('CustomError')", | |
| """try{foo();}catch(e){const info=e.message;}""", | |
| "console.error(err)", | |
| "const timer={start(){this.s=Date.now()},stop(){return Date.now()-this.s}}", | |
| "const runtime=(s)=>Date.now()-s", | |
| """const progress=(i,n)=>process.stdout.write(Math.floor(i/n100)+'%\r')""", | |
| "const delay=(ms)=>new Promise(r=>setTimeout(r,ms))", | |
| "const f=(x)=>x2", | |
| "const m=arr.map(x=>x2)", | |
| "const f2=arr.filter(x=>x>0)", | |
| "const r=arr.reduce((a,x)=>a+x,0)", | |
| "const a=[1,2,3].map(x=>x)", | |
| "const o={a:1,b:2};const d={k:v for([k,v] of Object.entries(o))}", | |
| "const s=new Set([1,2,3]);const p=new Set(x for(x of s))", | |
| "const inter=new Set([...a].filter(x=>b.has(x)))", | |
| "const uni=new Set([...a,...b])", | |
| "const diff=new Set([...a].filter(x=>!b.has(x)))", | |
| "const noNone=list.filter(x=>x!=null)", | |
| """try{fs.openSync(path)}catch{}""", | |
| "typeof x==='string'", | |
| "const b=!!str", | |
| "if(cond)doSomething()", | |
| "while(cond){}", | |
| "for(const x of arr){}", | |
| "for(const k in obj){}", | |
| "for(const c of str){}", | |
| "for(...){if(cond)break}", | |
| "for(...){if(cond)continue}", | |
| "function fn(){}", | |
| "function fn(a=1){}", | |
| "function fn(){return [1,2]}", | |
| "function fn(...a){}", | |
| "function fn(kwargs){const{a,b}=kwargs}", | |
| """function timed(fn){return(...a)=>{const s=Date.now();const r=fn(...a);console.log(Date.now()-s);return r}}""", | |
| """const deco=fn=>(...a)=>fn(...a)""", | |
| """const memo=fn=>{const c={};return x=>c[x]||=(fn(x))}""", | |
| "functiongen(){yield 1;yield 2}", | |
| "const g=gen();", | |
| "const it={i:0,next(){return this.i<2?{value:this.i++,done:false}:{done:true}}}", | |
| "for(const x of it){}", | |
| "for(const [i,x] of arr.entries()){}", | |
| "const z=arr1.map((v,i)=>[v,arr2[i]])", | |
| "const dict=Object.fromEntries(arr1.map((v,i)=>[v,arr2[i]]))", | |
| "JSON.stringify(arr1)===JSON.stringify(arr2)", | |
| "JSON.stringify(obj1)===JSON.stringify(obj2)", | |
| "JSON.stringify(new Set(a))===JSON.stringify(new Set(b))", | |
| "const uniq=[...new Set(arr)]", | |
| "set.clear()", | |
| "set.size===0", | |
| "set.add(x)", | |
| "set.delete(x)", | |
| "set.has(x)", | |
| "set.size", | |
| "const hasInt=([...a].some(x=>b.has(x)))", | |
| "arr1.every(x=>arr2.includes(x))", | |
| "str.includes(sub)", | |
| "str[0]", | |
| "str[str.length-1]", | |
| """const isText=path=>['.txt','.md'].includes(require('path').extname(path))""", | |
| """const isImage=path=>['.png','.jpg','.jpeg','.gif'].includes(require('path').extname(path))""", | |
| "Math.round(n)", | |
| "Math.ceil(n)", | |
| "Math.floor(n)", | |
| "n.toFixed(2)", | |
| """const randStr=(l)=>[...Array(l)].map(()=>Math.random().toString(36).charAt(2)).join('')""", | |
| "const exists=require('fs').existsSync(path)", | |
| """const walk=(d)=>require('fs').readdirSync(d).flatMap(f=>{const p=require('path').join(d,f);return require('fs').statSync(p).isDirectory()?walk(p):p})""", | |
| """const ext=require('path').extname(fp)""", | |
| """const name=require('path').basename(fp)""", | |
| """const full=require('path').resolve(fp)""", | |
| "process.version", | |
| "process.platform", | |
| "require('os').cpus().length", | |
| "require('os').totalmem()", | |
| """const d=require('os').diskUsageSync?require('os').diskUsageSync('/'):null""", | |
| "require('os').networkInterfaces()", | |
| """require('dns').resolve('www.google.com',e=>console.log(!e))""", | |
| """require('https').get(url,res=>res.pipe(require('fs').createWriteStream(dest)))""", | |
| """const upload=async f=>Promise.resolve('ok')""", | |
| """require('https').request({method:'POST',host,u:path},()=>{}).end(data)""", | |
| """require('https').get(url+'?'+new URLSearchParams(params),res=>{})""", | |
| """const req=()=>fetch(url,{headers})""", | |
| """const jsdom=require('jsdom');const d=new jsdom.JSDOM(html)""", | |
| """const title=jsdom.JSDOM(html).window.document.querySelector('title').textContent""", | |
| """const links=[...d.window.document.querySelectorAll('a')].map(a=>a.href)""", | |
| """Promise.all(links.map(u=>fetch(u).then(r=>r.blob()).then(b=>require('fs').writeFileSync(require('path').basename(u),Buffer.from(b)))))""", | |
| """const freq=html.split(/\W+/).reduce((c,w)=>{c[w]=(c[w]||0)+1;return c},{})""", | |
| """const login=()=>fetch(url,{method:'POST',body:creds})""", | |
| """const text=html.replace(/<[^>]+>/g,'')""", | |
| """const emails=html.match(/[\w.-]+@[\w.-]+/g)""", | |
| """const phones=html.match(/\+?\d[\d -]{7,}\d/g)""", | |
| """const nums=html.match(/\d+/g)""", | |
| """const newHtml=html.replace(/foo/g,'bar')""", | |
| """const ok=/^\d{3}$/.test(str)""", | |
| """const noTags=html.replace(/<[^>]*>/g,'')""", | |
| """const enc=html.replace(/./g,c=>'&#'+c.charCodeAt(0)+';')""", | |
| """const dec=enc.replace(/&#(\d+);/g,(m,n)=>String.fromCharCode(n))""", | |
| """const {app,BrowserWindow}=require('electron');app.on('ready',()=>new BrowserWindow().loadURL('about:blank'))""", | |
| "$button = new GtkButton('Click Me'); $window->add($button);", | |
| "$button->connect('clicked', function() { echo 'Button clicked!'; });", | |
| "$dialog = new GtkMessageDialog($window, GtkDialogFlags::MODAL, GtkMessageType::INFO, GtkButtonsType::OK, 'Hello!'); $dialog->run();", | |
| "$entry = new GtkEntry(); $input = $entry->get_text();", | |
| "$window->set_title('New Title');", | |
| "$window->set_default_size(800, 600);", | |
| "$window->set_position(Gtk::WIN_POS_CENTER);", | |
| "$menubar = new GtkMenuBar(); $menu = new GtkMenu(); $menuitem = new GtkMenuItem('File'); $menuitem->set_submenu($menu); $menubar->append($menuitem); $window->add($menubar);", | |
| "$combobox = new GtkComboBoxText(); $combobox->append_text('Option 1'); $combobox->append_text('Option 2'); $window->add($combobox);", | |
| "$radiobutton1 = new GtkRadioButton('Option 1'); $radiobutton2 = new GtkRadioButton($radiobutton1, 'Option 2'); $window->add($radiobutton1); $window->add($radiobutton2);", | |
| "$checkbutton = new GtkCheckButton('Check Me'); $window->add($checkbutton);", | |
| "$image = new GtkImage('image.png'); $window->add($image);", | |
| "exec('play audio.mp3');", | |
| "exec('play video.mp4');", | |
| "$current_time = exec('get_current_time_command');", | |
| "exec('screenshot_command');", | |
| "exec('record_screen_command');", | |
| "$mouse_position = exec('get_mouse_position_command');", | |
| "exec('simulate_keyboard_input_command');", | |
| "exec('simulate_mouse_click_command');", | |
| "time();", | |
| "date('Y-m-d H:i:s', $timestamp);", | |
| "strtotime('2023-10-01 12:00:00');", | |
| "date('l');", | |
| "date('t');", | |
| "date('Y-01-01');", | |
| "date('Y-12-31');", | |
| "date('Y-m-01', strtotime('2023-10-01'));", | |
| "date('Y-m-t', strtotime('2023-10-01'));", | |
| "date('N') < 6;", | |
| "date('N') >= 6;", | |
| "date('H');", | |
| "date('i');", | |
| "date('s');", | |
| "sleep(1);", | |
| "floor(microtime(true) * 1000);", | |
| "date('Y-m-d H:i:s', $time);", | |
| "strtotime($time_string);", | |
| "$thread = new Thread(); $thread->start();", | |
| "$thread->sleep(1);", | |
| "$threads = []; for ($i = 0; $i < 5; $i++) { $threads[$i] = new Thread(); $threads[$i]->start(); }", | |
| "$thread->getName();", | |
| "$thread->setDaemon(true);", | |
| "$lock = new Mutex(); $lock->lock(); $lock->unlock();", | |
| "$pid = pcntl_fork();", | |
| "getmypid();", | |
| "posix_kill($pid, 0);", | |
| "$pids = []; for ($i = 0; $i < 5; $i++) { $pids[$i] = pcntl_fork(); if ($pids[$i] == 0) { exit; } }", | |
| "$queue = new Threaded(); $queue->push('value');", | |
| "$pipe = fopen('php://stdin', 'r'); fwrite($pipe, 'value'); fclose($pipe);", | |
| "set_time_limit(0);", | |
| "exec('ls');", | |
| "exec('ls', $output);", | |
| "exec('ls', $output, $status);", | |
| "$status === 0;", | |
| "__FILE__;", | |
| "$argv;", | |
| "$parser = new ArgParser(); $parser->addArgument('arg1'); $parser->parse($argv);", | |
| "$parser->printHelp();", | |
| "print_r(get_loaded_extensions());", | |
| "exec('pip install package_name');", | |
| "exec('pip uninstall package_name');", | |
| "exec('pip show package_name | grep Version');", | |
| "exec('python -m venv venv');", | |
| "exec('pip list');", | |
| "exec('pip install --upgrade package_name');", | |
| "$db = new SQLite3('database.db');", | |
| "$result = $db->query('SELECT * FROM table');", | |
| "$db->exec(\"INSERT INTO table (column) VALUES ('value')\");", | |
| "$db->exec(\"DELETE FROM table WHERE id = 1\");", | |
| "$db->exec(\"UPDATE table SET column = 'new_value' WHERE id = 1\");", | |
| "$result = $db->query('SELECT * FROM table'); while ($row = $result->fetchArray()) { print_r($row); }", | |
| "$stmt = $db->prepare('SELECT * FROM table WHERE id = :id'); $stmt->bindValue(':id', 1); $result = $stmt->execute();", | |
| "$db->close();", | |
| "$db->exec('CREATE TABLE table (id INTEGER PRIMARY KEY, column TEXT)');", | |
| "$db->exec('DROP TABLE table');", | |
| "$result = $db->query(\"SELECT name FROM sqlite_master WHERE type='table' AND name='table'\");", | |
| "$result = $db->query(\"SELECT name FROM sqlite_master WHERE type='table'\");", | |
| "$model = new Model(); $model->save();", | |
| "$model = Model::find(1);", | |
| "$model = Model::find(1); $model->delete();", | |
| "$model = Model::find(1); $model->column = 'new_value'; $model->save();", | |
| "class Model extends ORM { protected static $table = 'table'; }", | |
| "class ChildModel extends ParentModel {}", | |
| "protected static $primaryKey = 'id';", | |
| "protected static $unique = ['column'];", | |
| "protected static $defaults = ['column' => 'default_value'];", | |
| "$file = fopen('data.csv', 'w'); fputcsv($file, $data); fclose($file);", | |
| "$excel = new ExcelWriter('data.xlsx'); $excel->write($data); $excel->close();", | |
| "$json = json_encode($data); file_put_contents('data.json', $json);", | |
| "$excel = new ExcelReader('data.xlsx'); $data = $excel->read(); $excel->close();", | |
| "$excel = new ExcelWriter('merged.xlsx'); foreach ($files as $file) { $data = (new ExcelReader($file))->read(); $excel->write($data); } $excel->close();", | |
| "$excel = new ExcelWriter('data.xlsx'); $excel->addSheet('New Sheet'); $excel->close();", | |
| "$excel = new ExcelWriter('data.xlsx'); $excel->copyStyle('Sheet1', 'Sheet2'); $excel->close();", | |
| "$excel = new ExcelWriter('data.xlsx'); $excel->setCellColor('A1', 'FF0000'); $excel->close();", | |
| "$excel = new ExcelWriter('data.xlsx'); $excel->setFontStyle('A1', 'bold'); $excel->close();", | |
| "$excel = new ExcelReader('data.xlsx'); $value = $excel->getCellValue('A1'); $excel->close();", | |
| "$excel = new ExcelWriter('data.xlsx'); $excel->setCellValue('A1', 'Hello'); $excel->close();", | |
| "list($width, $height) = getimagesize('image.png');", | |
| "$image = new Imagick('image.png'); $image->resizeImage(100, 100, Imagick::FILTER_LANCZOS, 1); $image->writeImage('resized_image.png');" | |
| ] | |
| # 全局服务状态 | |
| service_ready = False | |
| # 优雅关闭处理 | |
| def handle_shutdown(signum, frame): | |
| app.logger.info("收到终止信号,开始关闭...") | |
| sys.exit(0) | |
| signal.signal(signal.SIGTERM, handle_shutdown) | |
| signal.signal(signal.SIGINT, handle_shutdown) | |
| # 初始化模型和预计算编码 | |
| try: | |
| app.logger.info("开始加载模型...") | |
| model = SentenceTransformer( | |
| "flax-sentence-embeddings/st-codesearch-distilroberta-base", | |
| cache_folder="/model-cache" | |
| ) | |
| # 预计算代码片段的编码(强制使用 CPU) | |
| code_emb = model.encode(CODE_SNIPPETS, convert_to_tensor=True, device="cpu") | |
| service_ready = True | |
| app.logger.info("服务初始化完成") | |
| except Exception as e: | |
| app.logger.error("初始化失败: %s", str(e)) | |
| raise | |
| # Hugging Face 健康检查端点,必须响应根路径 | |
| def hf_health_check(): | |
| # 如果请求接受 HTML,则返回一个简单的 HTML 页面(包含测试链接) | |
| if request.accept_mimetypes.accept_html: | |
| html = """ | |
| <h2>CodeSearch API</h2> | |
| <p>服务状态:{{ status }}</p> | |
| <p>你可以在地址栏输入 /search?query=你的查询 来测试接口</p> | |
| """ | |
| status = "ready" if service_ready else "initializing" | |
| return render_template_string(html, status=status) | |
| # 否则返回 JSON 格式的健康检查 | |
| if service_ready: | |
| return jsonify({"status": "ready"}), 200 | |
| else: | |
| return jsonify({"status": "initializing"}), 503 | |
| # 搜索 API 端点,同时支持 GET 和 POST 请求 | |
| def handle_search(): | |
| if not service_ready: | |
| app.logger.info("服务未就绪") | |
| return jsonify({"error": "服务正在初始化"}), 503 | |
| try: | |
| # 根据请求方法提取查询内容 | |
| if request.method == 'GET': | |
| query = request.args.get('query', '').strip() | |
| else: | |
| data = request.get_json() or {} | |
| query = data.get('query', '').strip() | |
| if not query: | |
| app.logger.info("收到空的查询请求") | |
| return jsonify({"error": "查询不能为空"}), 400 | |
| # 记录接收到的查询 | |
| app.logger.info("收到查询请求: %s", query) | |
| # 对查询进行编码,并进行语义搜索 | |
| query_emb = model.encode(query, convert_to_tensor=True, device="cpu") | |
| hits = util.semantic_search(query_emb, code_emb, top_k=1)[0] | |
| best = hits[0] | |
| result = { | |
| "code": CODE_SNIPPETS[best['corpus_id']], | |
| "score": round(float(best['score']), 4) | |
| } | |
| # 记录返回结果 | |
| app.logger.info("返回结果: %s", result) | |
| return jsonify(result) | |
| except Exception as e: | |
| app.logger.error("请求处理失败: %s", str(e)) | |
| return jsonify({"error": "服务器内部错误"}), 500 | |
| if __name__ == "__main__": | |
| # 本地测试用,Hugging Face Spaces 通常通过 gunicorn 启动 | |
| app.run(host='0.0.0.0', port=7860) | |