Spaces:
Running
Running
| from flask import Flask, request, jsonify, render_template_string | |
| from sentence_transformers import SentenceTransformer, util | |
| import logging | |
| import sys | |
| import signal | |
| # 初始化 Flask 应用 | |
| app = Flask(__name__) | |
| # 配置日志,级别设为 INFO | |
| logging.basicConfig(level=logging.INFO) | |
| app.logger = logging.getLogger("CodeSearchAPI") | |
| # 预定义代码片段 | |
| CODE_SNIPPETS = [ | |
| "puts 'Hello, World!'", | |
| "def sum(a, b); a + b; end", | |
| "rand", | |
| "def even?(num); num.even?; end", | |
| "str.length", | |
| "Date.today", | |
| "File.exist?('file.txt')", | |
| "File.read('file.txt')", | |
| "File.write('file.txt', 'content')", | |
| "Time.now", | |
| "str.upcase", | |
| "str.downcase", | |
| "str.reverse", | |
| "list.size", | |
| "list.max", | |
| "list.min", | |
| "list.sort", | |
| "list1 + list2", | |
| "list.delete(element)", | |
| "list.empty?", | |
| "str.count(char)", | |
| "str.include?(substring)", | |
| "num.to_s", | |
| "str.to_i", | |
| "str.match?(/^\d+$/)", | |
| "list.index(element)", | |
| "list.clear", | |
| "list.reverse", | |
| "list.uniq", | |
| "list.include?(value)", | |
| "{}", | |
| "hash[key] = value", | |
| "hash.delete(key)", | |
| "hash.keys", | |
| "hash.values", | |
| "hash1.merge(hash2)", | |
| "hash.empty?", | |
| "hash[key]", | |
| "hash.key?(key)", | |
| "hash.clear", | |
| "File.readlines('file.txt').size", | |
| "File.write('file.txt', list.join('\\n'))", | |
| "File.read('file.txt').split('\\n')", | |
| "File.read('file.txt').split.size", | |
| "def leap_year?(year); (year % 400 == 0) || (year % 100 != 0 && year % 4 == 0); end", | |
| "Time.now.strftime('%Y-%m-%d %H:%M:%S')", | |
| "(Date.today - Date.new(2023, 1, 1)).to_i", | |
| "Dir.pwd", | |
| "Dir.entries('.')", | |
| "Dir.mkdir('new_dir')", | |
| "Dir.rmdir('new_dir')", | |
| "File.file?('path')", | |
| "File.directory?('path')", | |
| "File.size('file.txt')", | |
| "File.rename('old.txt', 'new.txt')", | |
| "FileUtils.cp('source.txt', 'destination.txt')", | |
| "FileUtils.mv('source.txt', 'destination.txt')", | |
| "File.delete('file.txt')", | |
| "ENV['VAR_NAME']", | |
| "ENV['VAR_NAME'] = 'value'", | |
| "system('open https://example.com')", | |
| "require 'net/http'; Net::HTTP.get(URI('https://example.com'))", | |
| "require 'json'; JSON.parse(json_string)", | |
| "require 'json'; File.write('file.json', JSON.dump(data))", | |
| "require 'json'; JSON.parse(File.read('file.json'))", | |
| "list.join", | |
| "str.split(',')", | |
| "list.join(',')", | |
| "list.join('\\n')", | |
| "str.split", | |
| "str.split(delimiter)", | |
| "str.chars", | |
| "str.gsub(old, new)", | |
| "str.gsub(' ', '')", | |
| "str.gsub(/[^a-zA-Z0-9]/, '')", | |
| "str.empty?", | |
| "str == str.reverse", | |
| "require 'csv'; CSV.open('file.csv', 'w') { |csv| csv << ['data'] }", | |
| "require 'csv'; CSV.read('file.csv')", | |
| "require 'csv'; CSV.read('file.csv').size", | |
| "list.shuffle", | |
| "list.sample", | |
| "list.sample(n)", | |
| "rand(6) + 1", | |
| "rand(2) == 0 ? 'Heads' : 'Tails'", | |
| "SecureRandom.alphanumeric(8)", | |
| "format('#%06x', rand(0xffffff))", | |
| "SecureRandom.uuid", | |
| "class MyClass; end", | |
| "MyClass.new", | |
| "class MyClass; def my_method; end; end", | |
| "class MyClass; attr_accessor :my_attr; end", | |
| "class ChildClass < ParentClass; end", | |
| "class ChildClass < ParentClass; def my_method; super; end; end", | |
| "class MyClass; def self.class_method; end; end", | |
| "class MyClass; def self.static_method; end; end", | |
| "obj.is_a?(Class)", | |
| "obj.instance_variable_get(:@attr)", | |
| "obj.instance_variable_set(:@attr, value)", | |
| "obj.instance_variable_defined?(:@attr)", | |
| "begin; risky_operation; rescue => e; puts e; end", | |
| """class CustomError < StandardError | |
| end | |
| raise CustomError, 'error occurred'""", | |
| "begin; raise 'oops'; rescue => e; e.message; end", | |
| """require 'logger' | |
| logger = Logger.new('error.log') | |
| logger.error('error occurred')""", | |
| "start_time = Time.now", | |
| "Time.now - start_time", | |
| "20.times { |i| print \"\\r[#{'='*(i+1)}#{' '*(19-i)}]\"; sleep(0.1) }", | |
| "sleep(1)", | |
| "square = ->(x) { xx }", | |
| "squares = [1,2,3].map { |n| nn }", | |
| "evens = [1,2,3,4].select { |n| n.even? }", | |
| "sum = [1,2,3].reduce(0) { |acc,n| acc+n }", | |
| "doubles = [1,2,3,4,5].map { |n| n2 }", | |
| "hash = [1,2,3].map { |n| [n, n2] }.to_h", | |
| "require 'set'; s = Set.new([1,2,3].map { |n| n*2 })", | |
| "intersection = a & b", | |
| "union = a | b", | |
| "diff = a - b", | |
| "filtered = list.compact", | |
| "begin; File.open('file.txt'); rescue; false; end", | |
| "x.is_a?(String)", | |
| "bool = ['true','1'].include?(str.downcase)", | |
| "puts 'yes' if x > 0", | |
| "i=0; while i<5; i+=1; end", | |
| "for item in [1,2,3]; puts item; end", | |
| """h = {a:1, b:2} | |
| for k, v in h | |
| puts "#{k}:#{v}" | |
| end""", | |
| """for c in 'hello'.chars | |
| puts c | |
| end""", | |
| """for i in 1..5 | |
| break if i==3 | |
| puts i | |
| end""", | |
| """for i in 1..5 | |
| next if i==3 | |
| puts i | |
| end""", | |
| "def foo; end", | |
| "def foo(a=1); a; end", | |
| "def foo; [1,2]; end", | |
| "def foo(*args); args; end", | |
| "def foo(a:, b:); a+b; end", | |
| """def foo | |
| end | |
| start = Time.now | |
| foo | |
| puts Time.now - start""", | |
| """def decorate(f) | |
| ->(args) { puts 'before'; result = f.call(args); puts 'after'; result } | |
| end""", | |
| """def fib(n, memo={}) | |
| return memo[n] if memo[n] | |
| memo[n] = n<2 ? n : fib(n-1, memo) + fib(n-2, memo) | |
| end""", | |
| "gen = Enumerator.new { |y| i=0; loop { y << i; i+=1 } }", | |
| "def foo; yield 1; end", | |
| "gen.next", | |
| "itr = [1,2,3].each", | |
| """itr = [1,2,3].each | |
| loop do | |
| puts itr.next | |
| end""", | |
| "[1,2].each_with_index { |v, i| puts i, v }", | |
| "zipped = [1,2].zip(['a','b'])", | |
| "h = [1,2].zip(['a','b']).to_h", | |
| "[1,2] == [1,2]", | |
| "{a:1, b:2} == {b:2, a:1}", | |
| "require 'set'; Set.new([1,2]) == Set.new([2,1])", | |
| "unique = [1,2,1].uniq", | |
| "s.clear", | |
| "s.empty?", | |
| "s.add(1)", | |
| "s.delete(1)", | |
| "s.include?(1)", | |
| "s.size", | |
| "!(a & b).empty?", | |
| "[1,2].all? { |e| [1,2,3].include?(e) }", | |
| "'hi'.include?('h')", | |
| "str[0]", | |
| "str[-1]", | |
| "File.extname(path) == '.txt'", | |
| "['.png','.jpg','.jpeg','.gif'].include?(File.extname(path))", | |
| "x.round", | |
| "x.ceil", | |
| "x.floor", | |
| "sprintf('%.2f', x)", | |
| "require 'securerandom'; SecureRandom.alphanumeric(8)", | |
| "File.exist?('path')", | |
| "Dir['**/'].each { |f| puts f }", | |
| "File.extname('path.txt')", | |
| "File.basename(path)", | |
| "File.expand_path(path)", | |
| "RUBY_VERSION", | |
| "RUBY_PLATFORM", | |
| "require 'etc'; Etc.nprocessors", | |
| "mem = grep MemTotal /proc/meminfo", | |
| "df = df -h /", | |
| """require 'socket' | |
| ip = Socket.ip_address_list.detect(&:ipv4_private).ip_address""", | |
| "system('ping -c1 8.8.8.8 > /dev/null 2>&1')", | |
| """require 'open-uri' | |
| File.open('file', 'wb') { |f| f.write open(url).read }""", | |
| """def upload(file) | |
| puts 'Uploading' | |
| end""", | |
| """require 'net/http' | |
| uri = URI(url) | |
| Net::HTTP.post_form(uri, key: 'value')""", | |
| """uri = URI(url) | |
| uri.query = URI.encode_www_form(params) | |
| Net::HTTP.get(uri)""", | |
| """require 'net/http' | |
| uri = URI(url) | |
| req = Net::HTTP::Get.new(uri) | |
| req['User-Agent'] = 'Custom' | |
| res = Net::HTTP.start(uri.hostname, uri.port) { |http| http.request(req) }""", | |
| "require 'nokogiri'; doc = Nokogiri::HTML(html)", | |
| "doc.at('title').text", | |
| "links = doc.css('a').map { |a| a['href'] }", | |
| """doc.css('img').each do |img| | |
| open(img['src']).each do |chunk| | |
| File.open(File.basename(img['src']), 'ab') { |f| f.write chunk } | |
| end | |
| end""", | |
| """freq = Hash.new(0) | |
| text.split.each { |w| freq[w] += 1 }""", | |
| """require 'net/http' | |
| res = Net::HTTP.post_form(URI(login_url), username: 'u', password: 'p')""", | |
| "Nokogiri::HTML(html).text", | |
| "emails = text.scan(/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/)", | |
| "phones = text.scan(/\b\d{3}-\d{3}-\d{4}\b/)", | |
| "nums = text.scan(/\d+/)", | |
| "new = text.gsub(/foo/, 'bar')", | |
| "!!(text =~ /pattern/)", | |
| "clean = text.gsub(/<[^>]>/, '')", | |
| "CGI.escapeHTML(text)", | |
| "CGI.unescapeHTML(text)", | |
| """require 'tk' | |
| root = TkRoot.new { title 'App' } | |
| Tk.mainloop""", | |
| "require 'tk'", | |
| """root = TkRoot.new | |
| button = TkButton.new(root) {text 'Click Me'; command { Tk.messageBox(message: 'Button Clicked!') }} | |
| button.pack""", | |
| """Tk.messageBox(message: 'Hello, World!')""", | |
| """entry = TkEntry.new(root).pack | |
| entry.get""", | |
| """root.title = 'My Window'""", | |
| """root.geometry('400x300')""", | |
| """root.geometry('+%d+%d' % [(root.winfo_screenwidth() - root.winfo_reqwidth()) / 2, (root.winfo_screenheight() - root.winfo_reqheight()) / 2])""", | |
| """menu = TkMenu.new(root) | |
| root['menu'] = menu | |
| menu.add('command', 'label' => 'File')""", | |
| """combobox = Tk::Tile::Combobox.new(root).pack""", | |
| """radio = TkRadioButton.new(root) {text 'Option 1'}.pack""", | |
| """check = TkCheckButton.new(root) {text 'Check Me'}.pack""", | |
| """image = TkPhotoImage.new(file: 'image.png') | |
| label = TkLabel.new(root) {image image}.pack""", | |
| """`afplay audio.mp3`""", | |
| """`ffplay video.mp4`""", | |
| """`ffmpeg -i video.mp4 -f null - 2>&1 | grep 'time=' | awk '{print $2}'`""", | |
| """`screencapture screen.png`""", | |
| """`ffmpeg -f avfoundation -i "1" -t 10 screen.mp4`""", | |
| """`cliclick p:.`""", | |
| """`cliclick kd:cmd kp:space ku:cmd`""", | |
| """`cliclick c:.`""", | |
| """Time.now.to_i""", | |
| """Time.at(timestamp).strftime('%Y-%m-%d')""", | |
| """Time.parse(date).to_i""", | |
| """Time.now.strftime('%A')""", | |
| """Time.days_in_month(Time.now.month, Time.now.year)""", | |
| """Time.new(Time.now.year, 1, 1)""", | |
| """Time.new(Time.now.year, 12, 31)""", | |
| """Time.new(year, month, 1)""", | |
| """Time.new(year, month, -1)""", | |
| """Time.now.wday.between?(1, 5)""", | |
| """Time.now.wday.between?(6, 7)""", | |
| """Time.now.hour""", | |
| """Time.now.min""", | |
| """Time.now.sec""", | |
| """sleep(1)""", | |
| """(Time.now.to_f * 1000).to_i""", | |
| """Time.now.strftime('%Y-%m-%d %H:%M:%S')""", | |
| """Time.parse(time_str)""", | |
| """Thread.new { puts 'Hello from thread' }""", | |
| """sleep(1)""", | |
| """threads = [] | |
| 3.times { threads << Thread.new { puts 'Hello from thread' } } | |
| threads.each(&:join)""", | |
| """Thread.current.name""", | |
| """thread = Thread.new { puts 'Hello from thread' } | |
| thread.abort_on_exception = true""", | |
| """mutex = Mutex.new | |
| mutex.synchronize { puts 'Hello from synchronized thread' }""", | |
| """pid = Process.spawn('sleep 5')""", | |
| """Process.pid""", | |
| """Process.kill(0, pid) rescue false""", | |
| """pids = [] | |
| 3.times { pids << Process.spawn('sleep 5') } | |
| pids.each { |pid| Process.wait(pid) }""", | |
| """queue = Queue.new | |
| queue.push('Hello') | |
| queue.pop""", | |
| """reader, writer = IO.pipe | |
| writer.puts 'Hello' | |
| reader.gets""", | |
| """Process.setrlimit(:CPU, 50)""", | |
| """`ls`""", | |
| """`ls`.chomp""", | |
| """$?.exitstatus""", | |
| """$?.success?""", | |
| """File.expand_path(__FILE__)""", | |
| """ARGV""", | |
| """require 'optparse' | |
| OptionParser.new { |opts| opts.on('-h', '--help', 'Show help') { puts opts } }.parse!""", | |
| """OptionParser.new { |opts| opts.on('-h', '--help', 'Show help') { puts opts } }.parse!""", | |
| """Gem.loaded_specs.keys""", | |
| """`gem install package_name`""", | |
| """`gem uninstall package_name`""", | |
| """Gem.loaded_specs['package_name'].version.to_s""", | |
| """`bundle exec ruby script.rb`""", | |
| """Gem::Specification.map(&:name)""", | |
| """`gem update package_name`""", | |
| """require 'sqlite3' | |
| db = SQLite3::Database.new('test.db')""", | |
| """db.execute('SELECT * FROM table')""", | |
| """db.execute('INSERT INTO table (column) VALUES (?)', 'value')""", | |
| """db.execute('DELETE FROM table WHERE id = ?', 1)""", | |
| """db.execute('UPDATE table SET column = ? WHERE id = ?', 'new_value', 1)""", | |
| """db.execute('SELECT * FROM table').each { |row| puts row }""", | |
| """db.execute('SELECT * FROM table WHERE column = ?', 'value')""", | |
| """db.close""", | |
| """db.execute('CREATE TABLE table (id INTEGER PRIMARY KEY, column TEXT)')""", | |
| """db.execute('DROP TABLE table')""", | |
| """db.table_info('table').any?""", | |
| """db.execute('SELECT name FROM sqlite_master WHERE type = "table"')""", | |
| """class Model < ActiveRecord::Base | |
| end | |
| Model.create(column: 'value')""", | |
| """Model.find_by(column: 'value')""", | |
| """Model.find_by(column: 'value').destroy""", | |
| """Model.find_by(column: 'value').update(column: 'new_value')""", | |
| """class Model < ActiveRecord::Base | |
| end""", | |
| """class ChildModel < ParentModel | |
| end""", | |
| """class Model < ActiveRecord::Base | |
| self.primary_key = 'id' | |
| end""", | |
| """class Model < ActiveRecord::Base | |
| validates_uniqueness_of :column | |
| end""", | |
| """class Model < ActiveRecord::Base | |
| attribute :column, default: 'value' | |
| end""", | |
| """require 'csv' | |
| CSV.open('data.csv', 'w') { |csv| csv << ['column1', 'column2'] }""", | |
| """require 'spreadsheet' | |
| book = Spreadsheet::Workbook.new | |
| sheet = book.create_worksheet | |
| sheet[0, 0] = 'Hello' | |
| book.write('data.xls')""", | |
| """require 'json' | |
| File.write('data.json', {key: 'value'}.to_json)""", | |
| """require 'spreadsheet' | |
| book = Spreadsheet.open('data.xls') | |
| sheet = book.worksheet(0) | |
| sheet.each { |row| puts row }""", | |
| """require 'spreadsheet' | |
| book1 = Spreadsheet.open('file1.xls') | |
| book2 = Spreadsheet.open('file2.xls') | |
| book1.worksheets.each { |sheet| book2.add_worksheet(sheet) } | |
| book2.write('merged.xls')""", | |
| """require 'spreadsheet' | |
| book = Spreadsheet::Workbook.new | |
| book.create_worksheet(name: 'New Sheet') | |
| book.write('data.xls')""", | |
| """require 'spreadsheet' | |
| book = Spreadsheet.open('data.xls') | |
| sheet = book.worksheet(0) | |
| new_sheet = book.create_worksheet | |
| new_sheet.format_with(sheet) | |
| book.write('data.xls')""", | |
| """require 'spreadsheet' | |
| book = Spreadsheet.open('data.xls') | |
| sheet = book.worksheet(0) | |
| sheet.row(0).set_format(0, Spreadsheet::Format.new(color: :red)) | |
| book.write('data.xls')""", | |
| """require 'spreadsheet' | |
| book = Spreadsheet.open('data.xls') | |
| sheet = book.worksheet(0) | |
| sheet.row(0).set_format(0, Spreadsheet::Format.new(weight: :bold)) | |
| book.write('data.xls')""", | |
| """require 'spreadsheet' | |
| book = Spreadsheet.open('data.xls') | |
| sheet = book.worksheet(0) | |
| sheet[0, 0]""", | |
| """require 'spreadsheet' | |
| book = Spreadsheet::Workbook.new | |
| sheet = book.create_worksheet | |
| sheet[0, 0] = 'Hello' | |
| book.write('data.xls')""", | |
| """require 'rmagick' | |
| image = Magick::Image.read('image.png').first | |
| [image.columns, image.rows]""", | |
| """require 'rmagick' | |
| image = Magick::Image.read('image.png').first | |
| image.resize!(100, 100)""" | |
| ] | |
| # 全局服务状态 | |
| service_ready = False | |
| # 优雅关闭处理 | |
| def handle_shutdown(signum, frame): | |
| app.logger.info("收到终止信号,开始关闭...") | |
| sys.exit(0) | |
| signal.signal(signal.SIGTERM, handle_shutdown) | |
| signal.signal(signal.SIGINT, handle_shutdown) | |
| # 初始化模型和预计算编码 | |
| try: | |
| app.logger.info("开始加载模型...") | |
| model = SentenceTransformer( | |
| "flax-sentence-embeddings/st-codesearch-distilroberta-base", | |
| cache_folder="/model-cache" | |
| ) | |
| # 预计算代码片段的编码(强制使用 CPU) | |
| code_emb = model.encode(CODE_SNIPPETS, convert_to_tensor=True, device="cpu") | |
| service_ready = True | |
| app.logger.info("服务初始化完成") | |
| except Exception as e: | |
| app.logger.error("初始化失败: %s", str(e)) | |
| raise | |
| # Hugging Face 健康检查端点,必须响应根路径 | |
| def hf_health_check(): | |
| # 如果请求接受 HTML,则返回一个简单的 HTML 页面(包含测试链接) | |
| if request.accept_mimetypes.accept_html: | |
| html = """ | |
| <h2>CodeSearch API</h2> | |
| <p>服务状态:{{ status }}</p> | |
| <p>你可以在地址栏输入 /search?query=你的查询 来测试接口</p> | |
| """ | |
| status = "ready" if service_ready else "initializing" | |
| return render_template_string(html, status=status) | |
| # 否则返回 JSON 格式的健康检查 | |
| if service_ready: | |
| return jsonify({"status": "ready"}), 200 | |
| else: | |
| return jsonify({"status": "initializing"}), 503 | |
| # 搜索 API 端点,同时支持 GET 和 POST 请求 | |
| def handle_search(): | |
| if not service_ready: | |
| app.logger.info("服务未就绪") | |
| return jsonify({"error": "服务正在初始化"}), 503 | |
| try: | |
| # 根据请求方法提取查询内容 | |
| if request.method == 'GET': | |
| query = request.args.get('query', '').strip() | |
| else: | |
| data = request.get_json() or {} | |
| query = data.get('query', '').strip() | |
| if not query: | |
| app.logger.info("收到空的查询请求") | |
| return jsonify({"error": "查询不能为空"}), 400 | |
| # 记录接收到的查询 | |
| app.logger.info("收到查询请求: %s", query) | |
| # 对查询进行编码,并进行语义搜索 | |
| query_emb = model.encode(query, convert_to_tensor=True, device="cpu") | |
| hits = util.semantic_search(query_emb, code_emb, top_k=1)[0] | |
| best = hits[0] | |
| result = { | |
| "code": CODE_SNIPPETS[best['corpus_id']], | |
| "score": round(float(best['score']), 4) | |
| } | |
| # 记录返回结果 | |
| app.logger.info("返回结果: %s", result) | |
| return jsonify(result) | |
| except Exception as e: | |
| app.logger.error("请求处理失败: %s", str(e)) | |
| return jsonify({"error": "服务器内部错误"}), 500 | |
| if __name__ == "__main__": | |
| # 本地测试用,Hugging Face Spaces 通常通过 gunicorn 启动 | |
| app.run(host='0.0.0.0', port=7860) | |