File size: 3,039 Bytes
f6f0c40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
955ce73
 
 
f6f0c40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d30f5cb
f6f0c40
 
 
 
 
d30f5cb
f6f0c40
 
 
 
 
 
 
 
 
 
 
d30f5cb
f6f0c40
d30f5cb
f6f0c40
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101

/*
unfortunately the Gradio client doesn't support streaming:
it will crash here with a nasty error

  node_modules/@gradio/client/dist/index.js:705
  return data.map((d, i) => {
              ^
TypeError: Cannot read properties of null (reading 'is_file')
    at node_modules/@gradio/client/dist/index.js:713:43
    at Array.map (<anonymous>)
    at transform_output (node_modules/@gradio/client/dist/index.js:705:15)


This prevents use from using IDEFICS using the Gradio API,
so the only solution is to hack our way in using puppeteer.
*/


import path from "node:path"

import { v4 as uuidv4 } from "uuid"
import tmpDir from "temp-dir"
import puppeteer from "puppeteer"

import { writeBase64ToFile } from "../../utils/filesystem/writeBase64ToFile.mts"
import { sleep } from "../../utils/misc/sleep.mts"
import { deleteFileIfExists } from "../../utils/filesystem/deleteFileIfExists.mts"

const instances: string[] = [
  `${process.env.VC_ANALYSIS_SPACE_API_URL || ""}`,
].filter(instance => instance?.length > 0)

// There is no easy to use public API for IDEFICS
// (something where we can just push text + file and get a response without handling history, upload etc)
// So let's hack our way in πŸ•
export async function analyzeImage(image: string, prompt: string) {
  const instance = instances.shift()
  instances.push(instance)

  // wait.. is that really a jpg we have?
  // well, let's hope so.
  const tmpImageFilePath = path.join(tmpDir, `${uuidv4()}.jpg`)

  await writeBase64ToFile(image, tmpImageFilePath)
  // console.log("wrote the image to ", tmpImageFilePath)

  const browser = await puppeteer.launch({
    headless: true,
    protocolTimeout: 30000,
  })

  try {
    const page = await browser.newPage()

    await page.goto(instance, {
      waitUntil: 'networkidle2',
    })

    // console.log("filling in the prompt..")
    const promptField = await page.$('textarea')
    await promptField.type(prompt)

    // console.log("beginning:", imageBase64.slice(0, 100))

    // await new Promise(r => setTimeout(r, 1000))

    const fileField = await page.$('input[type=file]')

    console.log(`uploading file..`)
    await fileField.uploadFile(tmpImageFilePath)
    // console.log(`did it work? did it do something?`)
    // await sleep(2000)

    // console.log('looking for the button to submit')
    const submitButton = await page.$('button.lg')

    // console.log('clicking on the submit')
    await submitButton.click()

    console.log("waiting for bot response..")
    await page.$('.message.bot')

    // note: we are going to receive the response in streaming

    // TODO we should a different approach here, like perhaps something to detect when the element
    // has stopped receiving updates
    await sleep(12000)

    const message = await page.$$eval(".message.bot p", el => el.map(x => x.innerText)[0])
    console.log("response:", message)

    return message || ""
  } catch (err) {
    throw err
  } finally {
    await browser.close()
    await deleteFileIfExists(tmpImageFilePath)
  }
}