| { | |
| "bomFormat": "CycloneDX", | |
| "specVersion": "1.6", | |
| "serialNumber": "urn:uuid:674452ba-d230-4d3b-90b5-74ccd0c20c63", | |
| "version": 1, | |
| "metadata": { | |
| "timestamp": "2025-06-05T09:40:49.133901+00:00", | |
| "component": { | |
| "type": "machine-learning-model", | |
| "bom-ref": "infly/OpenCoder-8B-Instruct-d195bd27-df6f-5de8-8ab3-9f475c6cc49c", | |
| "name": "infly/OpenCoder-8B-Instruct", | |
| "externalReferences": [ | |
| { | |
| "url": "https://huggingface.co/infly/OpenCoder-8B-Instruct", | |
| "type": "documentation" | |
| } | |
| ], | |
| "modelCard": { | |
| "modelParameters": { | |
| "task": "text-generation", | |
| "architectureFamily": "llama", | |
| "modelArchitecture": "LlamaForCausalLM", | |
| "datasets": [ | |
| { | |
| "ref": "OpenCoder-LLM/opencoder-sft-stage1-372d7af3-6153-5091-b5cd-39c612c74e03" | |
| }, | |
| { | |
| "ref": "OpenCoder-LLM/opencoder-sft-stage2-ea53b7ac-b98d-5eef-a195-834ce3ec5a49" | |
| } | |
| ] | |
| }, | |
| "properties": [ | |
| { | |
| "name": "library_name", | |
| "value": "transformers" | |
| }, | |
| { | |
| "name": "base_model", | |
| "value": "infly/OpenCoder-8B-Base" | |
| } | |
| ] | |
| }, | |
| "authors": [ | |
| { | |
| "name": "infly" | |
| } | |
| ], | |
| "licenses": [ | |
| { | |
| "license": { | |
| "name": "inf", | |
| "url": "https://huggingface.co/infly/OpenCoder-8B-Instruct/blob/main/LICENSE" | |
| } | |
| } | |
| ], | |
| "tags": [ | |
| "transformers", | |
| "safetensors", | |
| "llama", | |
| "text-generation", | |
| "conversational", | |
| "en", | |
| "zh", | |
| "dataset:OpenCoder-LLM/opencoder-sft-stage1", | |
| "dataset:OpenCoder-LLM/opencoder-sft-stage2", | |
| "arxiv:2411.04905", | |
| "base_model:infly/OpenCoder-8B-Base", | |
| "base_model:finetune:infly/OpenCoder-8B-Base", | |
| "license:other", | |
| "autotrain_compatible", | |
| "text-generation-inference", | |
| "endpoints_compatible", | |
| "region:us" | |
| ] | |
| } | |
| }, | |
| "components": [ | |
| { | |
| "type": "data", | |
| "bom-ref": "OpenCoder-LLM/opencoder-sft-stage1-372d7af3-6153-5091-b5cd-39c612c74e03", | |
| "name": "OpenCoder-LLM/opencoder-sft-stage1", | |
| "data": [ | |
| { | |
| "type": "dataset", | |
| "bom-ref": "OpenCoder-LLM/opencoder-sft-stage1-372d7af3-6153-5091-b5cd-39c612c74e03", | |
| "name": "OpenCoder-LLM/opencoder-sft-stage1", | |
| "contents": { | |
| "url": "https://huggingface.co/datasets/OpenCoder-LLM/opencoder-sft-stage1", | |
| "properties": [ | |
| { | |
| "name": "configs", | |
| "value": "Name of the dataset subset: filtered_infinity_instruct {\"split\": \"train\", \"path\": \"data/filtered_infinity_instruct-*\"}" | |
| }, | |
| { | |
| "name": "configs", | |
| "value": "Name of the dataset subset: largescale_diverse_instruct {\"split\": \"train\", \"path\": \"data/largescale_diverse_instruct-*\"}" | |
| }, | |
| { | |
| "name": "configs", | |
| "value": "Name of the dataset subset: realuser_instruct {\"split\": \"train\", \"path\": \"data/realuser_instruct-*\"}" | |
| }, | |
| { | |
| "name": "license", | |
| "value": "mit" | |
| } | |
| ] | |
| }, | |
| "governance": { | |
| "owners": [ | |
| { | |
| "organization": { | |
| "name": "OpenCoder-LLM", | |
| "url": "https://huggingface.co/OpenCoder-LLM" | |
| } | |
| } | |
| ] | |
| }, | |
| "description": "\n\n\t\n\t\t\n\t\tOpenCoder Dataset\n\t\n\nThe OpenCoder dataset is composed of the following datasets:\n\nopc-sft-stage1: the sft data used for opencoder sft-stage1 <-- you are here\nopc-sft-stage2: the sft data used for opencoder sft-stage2\nopc-annealing-corpus: the synthetic data & algorithmic corpus used for opencoder annealing\nopc-fineweb-code-corpus: the code-related page recalled from fineweb\nopc-fineweb-math-corpus: the math-related page recalled from finewebrefineCode-code-corpus-meta: the meta-data\u2026 See the full description on the dataset page: https://huggingface.co/datasets/OpenCoder-LLM/opc-sft-stage1." | |
| } | |
| ] | |
| }, | |
| { | |
| "type": "data", | |
| "bom-ref": "OpenCoder-LLM/opencoder-sft-stage2-ea53b7ac-b98d-5eef-a195-834ce3ec5a49", | |
| "name": "OpenCoder-LLM/opencoder-sft-stage2", | |
| "data": [ | |
| { | |
| "type": "dataset", | |
| "bom-ref": "OpenCoder-LLM/opencoder-sft-stage2-ea53b7ac-b98d-5eef-a195-834ce3ec5a49", | |
| "name": "OpenCoder-LLM/opencoder-sft-stage2", | |
| "contents": { | |
| "url": "https://huggingface.co/datasets/OpenCoder-LLM/opencoder-sft-stage2", | |
| "properties": [ | |
| { | |
| "name": "configs", | |
| "value": "Name of the dataset subset: educational_instruct {\"split\": \"train\", \"path\": \"educational_instruct/train-*\"}" | |
| }, | |
| { | |
| "name": "configs", | |
| "value": "Name of the dataset subset: evol_instruct {\"split\": \"train\", \"path\": \"evol_instruct/train-*\"}" | |
| }, | |
| { | |
| "name": "configs", | |
| "value": "Name of the dataset subset: mceval_instruct {\"split\": \"train\", \"path\": \"mceval_instruct/train-*\"}" | |
| }, | |
| { | |
| "name": "configs", | |
| "value": "Name of the dataset subset: package_instruct {\"split\": \"train\", \"path\": \"package_instruct/train-*\"}" | |
| }, | |
| { | |
| "name": "license", | |
| "value": "mit" | |
| } | |
| ] | |
| }, | |
| "governance": { | |
| "owners": [ | |
| { | |
| "organization": { | |
| "name": "OpenCoder-LLM", | |
| "url": "https://huggingface.co/OpenCoder-LLM" | |
| } | |
| } | |
| ] | |
| }, | |
| "description": "\n\n\t\n\t\t\n\t\tOpenCoder Dataset\n\t\n\nThe OpenCoder dataset is composed of the following datasets:\n\nopc-sft-stage1: the sft data used for opencoder sft-stage1\nopc-sft-stage2: the sft data used for opencoder sft-stage2 <-- you are here\nopc-annealing-corpus: the synthetic data & algorithmic corpus used for opencoder annealing\nopc-fineweb-code-corpus: the code-related page recalled from fineweb\nopc-fineweb-math-corpus: the math-related page recalled from finewebrefineCode-code-corpus-meta: the meta-data\u2026 See the full description on the dataset page: https://huggingface.co/datasets/OpenCoder-LLM/opc-sft-stage2." | |
| } | |
| ] | |
| } | |
| ] | |
| } |