{ "bomFormat": "CycloneDX", "specVersion": "1.6", "serialNumber": "urn:uuid:674452ba-d230-4d3b-90b5-74ccd0c20c63", "version": 1, "metadata": { "timestamp": "2025-06-05T09:40:49.133901+00:00", "component": { "type": "machine-learning-model", "bom-ref": "infly/OpenCoder-8B-Instruct-d195bd27-df6f-5de8-8ab3-9f475c6cc49c", "name": "infly/OpenCoder-8B-Instruct", "externalReferences": [ { "url": "https://huggingface.co/infly/OpenCoder-8B-Instruct", "type": "documentation" } ], "modelCard": { "modelParameters": { "task": "text-generation", "architectureFamily": "llama", "modelArchitecture": "LlamaForCausalLM", "datasets": [ { "ref": "OpenCoder-LLM/opencoder-sft-stage1-372d7af3-6153-5091-b5cd-39c612c74e03" }, { "ref": "OpenCoder-LLM/opencoder-sft-stage2-ea53b7ac-b98d-5eef-a195-834ce3ec5a49" } ] }, "properties": [ { "name": "library_name", "value": "transformers" }, { "name": "base_model", "value": "infly/OpenCoder-8B-Base" } ] }, "authors": [ { "name": "infly" } ], "licenses": [ { "license": { "name": "inf", "url": "https://huggingface.co/infly/OpenCoder-8B-Instruct/blob/main/LICENSE" } } ], "tags": [ "transformers", "safetensors", "llama", "text-generation", "conversational", "en", "zh", "dataset:OpenCoder-LLM/opencoder-sft-stage1", "dataset:OpenCoder-LLM/opencoder-sft-stage2", "arxiv:2411.04905", "base_model:infly/OpenCoder-8B-Base", "base_model:finetune:infly/OpenCoder-8B-Base", "license:other", "autotrain_compatible", "text-generation-inference", "endpoints_compatible", "region:us" ] } }, "components": [ { "type": "data", "bom-ref": "OpenCoder-LLM/opencoder-sft-stage1-372d7af3-6153-5091-b5cd-39c612c74e03", "name": "OpenCoder-LLM/opencoder-sft-stage1", "data": [ { "type": "dataset", "bom-ref": "OpenCoder-LLM/opencoder-sft-stage1-372d7af3-6153-5091-b5cd-39c612c74e03", "name": "OpenCoder-LLM/opencoder-sft-stage1", "contents": { "url": "https://huggingface.co/datasets/OpenCoder-LLM/opencoder-sft-stage1", "properties": [ { "name": "configs", "value": "Name of the dataset subset: filtered_infinity_instruct {\"split\": \"train\", \"path\": \"data/filtered_infinity_instruct-*\"}" }, { "name": "configs", "value": "Name of the dataset subset: largescale_diverse_instruct {\"split\": \"train\", \"path\": \"data/largescale_diverse_instruct-*\"}" }, { "name": "configs", "value": "Name of the dataset subset: realuser_instruct {\"split\": \"train\", \"path\": \"data/realuser_instruct-*\"}" }, { "name": "license", "value": "mit" } ] }, "governance": { "owners": [ { "organization": { "name": "OpenCoder-LLM", "url": "https://huggingface.co/OpenCoder-LLM" } } ] }, "description": "\n\n\t\n\t\t\n\t\tOpenCoder Dataset\n\t\n\nThe OpenCoder dataset is composed of the following datasets:\n\nopc-sft-stage1: the sft data used for opencoder sft-stage1 <-- you are here\nopc-sft-stage2: the sft data used for opencoder sft-stage2\nopc-annealing-corpus: the synthetic data & algorithmic corpus used for opencoder annealing\nopc-fineweb-code-corpus: the code-related page recalled from fineweb\nopc-fineweb-math-corpus: the math-related page recalled from finewebrefineCode-code-corpus-meta: the meta-data\u2026 See the full description on the dataset page: https://huggingface.co/datasets/OpenCoder-LLM/opc-sft-stage1." } ] }, { "type": "data", "bom-ref": "OpenCoder-LLM/opencoder-sft-stage2-ea53b7ac-b98d-5eef-a195-834ce3ec5a49", "name": "OpenCoder-LLM/opencoder-sft-stage2", "data": [ { "type": "dataset", "bom-ref": "OpenCoder-LLM/opencoder-sft-stage2-ea53b7ac-b98d-5eef-a195-834ce3ec5a49", "name": "OpenCoder-LLM/opencoder-sft-stage2", "contents": { "url": "https://huggingface.co/datasets/OpenCoder-LLM/opencoder-sft-stage2", "properties": [ { "name": "configs", "value": "Name of the dataset subset: educational_instruct {\"split\": \"train\", \"path\": \"educational_instruct/train-*\"}" }, { "name": "configs", "value": "Name of the dataset subset: evol_instruct {\"split\": \"train\", \"path\": \"evol_instruct/train-*\"}" }, { "name": "configs", "value": "Name of the dataset subset: mceval_instruct {\"split\": \"train\", \"path\": \"mceval_instruct/train-*\"}" }, { "name": "configs", "value": "Name of the dataset subset: package_instruct {\"split\": \"train\", \"path\": \"package_instruct/train-*\"}" }, { "name": "license", "value": "mit" } ] }, "governance": { "owners": [ { "organization": { "name": "OpenCoder-LLM", "url": "https://huggingface.co/OpenCoder-LLM" } } ] }, "description": "\n\n\t\n\t\t\n\t\tOpenCoder Dataset\n\t\n\nThe OpenCoder dataset is composed of the following datasets:\n\nopc-sft-stage1: the sft data used for opencoder sft-stage1\nopc-sft-stage2: the sft data used for opencoder sft-stage2 <-- you are here\nopc-annealing-corpus: the synthetic data & algorithmic corpus used for opencoder annealing\nopc-fineweb-code-corpus: the code-related page recalled from fineweb\nopc-fineweb-math-corpus: the math-related page recalled from finewebrefineCode-code-corpus-meta: the meta-data\u2026 See the full description on the dataset page: https://huggingface.co/datasets/OpenCoder-LLM/opc-sft-stage2." } ] } ] }