fix: push files via Git LFS instead of raw blobs

Binary files (.glb, .gltf, .png, .jpg, .jpeg, .webp) were pushed as raw
Git blobs via Octokit, bypassing Git LFS. This caused LFS-tracked repos
to see all files as modified after git pull, requiring spurious commits.

Now the bot uploads binaries to the LFS server via the Batch API and
stores LFS pointer files in the Git tree. Also fixes getRemoteFolder()
to read the real file size from LFS pointers instead of the pointer size.
This commit is contained in:
Tom Boullay
2026-04-15 09:38:05 +02:00
parent 616eb26206
commit 8bbc0dc0eb
2 changed files with 222 additions and 14 deletions
+3
View File
@@ -6,6 +6,9 @@ export const MODEL_EXTENSIONS = new Set(['.glb', '.gltf'])
export const TEXTURE_EXTENSIONS = new Set(['.png', '.jpg', '.jpeg', '.webp'])
export const ALL_ALLOWED_EXTENSIONS = new Set([...MODEL_EXTENSIONS, ...TEXTURE_EXTENSIONS])
/** Extensions tracked by Git LFS (must match .gitattributes) */
export const LFS_EXTENSIONS = new Set(['.glb', '.gltf', '.png', '.jpg', '.jpeg', '.webp'])
export const REQUIRED_TEXTURES = ['roughness', 'normal', 'metalness', 'color', 'displace'] as const
export const VALID_DESTINATIONS = new Set<string>([
+218 -13
View File
@@ -1,4 +1,6 @@
import { createHash } from 'crypto'
import { Octokit } from '@octokit/rest'
import { LFS_EXTENSIONS } from './constants'
import type { RemoteFile } from './types'
// ---------------------------------------------------------------------------
@@ -30,7 +32,143 @@ function parseRepoUrl(): { owner: string; repo: string } {
}
// ---------------------------------------------------------------------------
// Read remote folder contents (with size per file)
// Git LFS helpers
// ---------------------------------------------------------------------------
/** Check if a file path should be tracked by LFS based on its extension. */
function isLfsFile(filePath: string): boolean {
const ext = filePath.slice(filePath.lastIndexOf('.')).toLowerCase()
return LFS_EXTENSIONS.has(ext)
}
/** Build an LFS pointer file (text content stored in the Git blob). */
function buildLfsPointer(sha256: string, size: number): string {
return `version https://git-lfs.github.com/spec/v1\noid sha256:${sha256}\nsize ${size}\n`
}
/** Parse an LFS pointer to extract the real file size. Returns null if not a pointer. */
function parseLfsPointer(content: string): { oid: string; size: number } | null {
if (!content.startsWith('version https://git-lfs.github.com/spec/v1')) return null
const sizeMatch = content.match(/^size (\d+)$/m)
const oidMatch = content.match(/^oid sha256:([a-f0-9]{64})$/m)
if (!sizeMatch || !oidMatch) return null
return { oid: oidMatch[1], size: parseInt(sizeMatch[1], 10) }
}
interface LfsObject {
oid: string
size: number
contentBase64: string
}
/**
* Upload binary objects to the Git LFS server via the Batch API.
*
* Flow:
* 1. POST to the LFS batch endpoint with operation "upload"
* 2. For each object that has an "upload" action, PUT the binary content
* 3. If the server omits "actions", the object already exists — skip upload
*/
async function uploadToLfs(
owner: string,
repo: string,
objects: LfsObject[],
): Promise<void> {
if (objects.length === 0) return
const token = process.env.GITHUB_TOKEN!
const lfsUrl = `https://github.com/${owner}/${repo}.git/info/lfs/objects/batch`
// 1. Batch request — ask for upload URLs
const batchRes = await fetch(lfsUrl, {
method: 'POST',
headers: {
'Accept': 'application/vnd.git-lfs+json',
'Content-Type': 'application/vnd.git-lfs+json',
'Authorization': `token ${token}`,
},
body: JSON.stringify({
operation: 'upload',
transfers: ['basic'],
objects: objects.map((o) => ({ oid: o.oid, size: o.size })),
}),
})
if (!batchRes.ok) {
const text = await batchRes.text()
throw new Error(`LFS batch request failed (${batchRes.status}): ${text}`)
}
const batchData = (await batchRes.json()) as {
objects: Array<{
oid: string
size: number
actions?: {
upload?: { href: string; header?: Record<string, string> }
verify?: { href: string; header?: Record<string, string> }
}
error?: { code: number; message: string }
}>
}
// 2. Upload each object that has an upload action
const objectMap = new Map(objects.map((o) => [o.oid, o]))
for (const obj of batchData.objects) {
if (obj.error) {
throw new Error(`LFS error for ${obj.oid}: ${obj.error.message} (${obj.error.code})`)
}
// No actions = server already has this object, skip
if (!obj.actions?.upload) continue
const local = objectMap.get(obj.oid)
if (!local) continue
const uploadAction = obj.actions.upload
const headers: Record<string, string> = {
'Content-Type': 'application/octet-stream',
...uploadAction.header,
}
const body = Buffer.from(local.contentBase64, 'base64')
const uploadRes = await fetch(uploadAction.href, {
method: 'PUT',
headers,
body,
})
if (!uploadRes.ok) {
const text = await uploadRes.text()
throw new Error(`LFS upload failed for ${obj.oid} (${uploadRes.status}): ${text}`)
}
// 3. Verify if required
if (obj.actions.verify) {
const verifyAction = obj.actions.verify
const verifyHeaders: Record<string, string> = {
'Accept': 'application/vnd.git-lfs+json',
'Content-Type': 'application/vnd.git-lfs+json',
...verifyAction.header,
}
const verifyRes = await fetch(verifyAction.href, {
method: 'POST',
headers: verifyHeaders,
body: JSON.stringify({ oid: obj.oid, size: obj.size }),
})
if (!verifyRes.ok) {
const text = await verifyRes.text()
throw new Error(`LFS verify failed for ${obj.oid} (${verifyRes.status}): ${text}`)
}
}
}
}
// ---------------------------------------------------------------------------
// Read remote folder contents (with real file sizes for LFS files)
// ---------------------------------------------------------------------------
export async function getRemoteFolder(
@@ -48,14 +186,44 @@ export async function getRemoteFolder(
ref: branch,
})
if (Array.isArray(data)) {
return {
exists: true,
files: data.map((f) => ({ name: f.name, size: f.size })),
}
if (!Array.isArray(data)) {
return { exists: false, files: [] }
}
return { exists: false, files: [] }
// For LFS-tracked files, the "size" from getContent is the pointer size (~130 bytes),
// not the real file size. We need to fetch each LFS pointer to get the real size.
const files: RemoteFile[] = await Promise.all(
data.map(async (f): Promise<RemoteFile> => {
if (!isLfsFile(f.name) || f.size > 1024) {
// Not LFS or too large to be a pointer — use size as-is
return { name: f.name, size: f.size }
}
// Fetch the blob content to check if it's an LFS pointer
try {
const { data: fileData } = await octokit.repos.getContent({
owner,
repo,
path: `${folderPath}/${f.name}`,
ref: branch,
})
if (!Array.isArray(fileData) && 'content' in fileData && fileData.content) {
const content = Buffer.from(fileData.content, 'base64').toString('utf-8')
const pointer = parseLfsPointer(content)
if (pointer) {
return { name: f.name, size: pointer.size }
}
}
} catch {
// Fall through to use the original size
}
return { name: f.name, size: f.size }
}),
)
return { exists: true, files }
} catch (err: unknown) {
if (isHttpError(err) && err.status === 404) {
return { exists: false, files: [] }
@@ -65,7 +233,7 @@ export async function getRemoteFolder(
}
// ---------------------------------------------------------------------------
// Push all files in a single commit (with optional deletions)
// Push all files in a single commit (with optional deletions + LFS support)
// ---------------------------------------------------------------------------
export async function pushAllToGitHub(
@@ -77,6 +245,29 @@ export async function pushAllToGitHub(
const { owner, repo } = parseRepoUrl()
const branch = process.env.GIT_BRANCH ?? 'main'
// --- Separate LFS files from regular files ---
const lfsFiles: { path: string; contentBase64: string; oid: string; size: number }[] = []
const regularFiles: { path: string; contentBase64: string }[] = []
for (const f of files) {
if (isLfsFile(f.path)) {
const buf = Buffer.from(f.contentBase64, 'base64')
const oid = createHash('sha256').update(buf).digest('hex')
lfsFiles.push({ ...f, oid, size: buf.length })
} else {
regularFiles.push(f)
}
}
// --- Upload LFS objects to the LFS server ---
if (lfsFiles.length > 0) {
await uploadToLfs(
owner,
repo,
lfsFiles.map((f) => ({ oid: f.oid, size: f.size, contentBase64: f.contentBase64 })),
)
}
// 1. Get latest commit on branch
const { data: ref } = await octokit.git.getRef({
owner,
@@ -92,16 +283,30 @@ export async function pushAllToGitHub(
commit_sha: latestCommitSha,
})
// 3. Create all blobs in parallel
// 3. Create blobs — LFS files get pointer blobs, regular files get raw blobs
const allFiles = [...regularFiles, ...lfsFiles]
const blobResults = await Promise.all(
files.map((f) =>
octokit.git.createBlob({
allFiles.map((f) => {
const lfs = lfsFiles.find((lf) => lf.path === f.path)
if (lfs) {
// Create a blob with the LFS pointer text (NOT the binary content)
const pointer = buildLfsPointer(lfs.oid, lfs.size)
return octokit.git.createBlob({
owner,
repo,
content: Buffer.from(pointer, 'utf-8').toString('base64'),
encoding: 'base64',
})
}
// Regular file — push content as-is
return octokit.git.createBlob({
owner,
repo,
content: f.contentBase64,
encoding: 'base64',
})
}),
),
)
// 4. Build tree entries: new/changed files + deletions
@@ -120,7 +325,7 @@ export async function pushAllToGitHub(
repo,
base_tree: commit.tree.sha,
tree: [
...files.map((f, i) => ({
...allFiles.map((f, i) => ({
path: f.path,
mode: '100644' as const,
type: 'blob' as const,