File size: 2,284 Bytes
8ce416b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6f0c40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ce416b
 
 
f6f0c40
8ce416b
 
 
 
 
 
 
 
 
 
 
 
 
 
f6f0c40
8ce416b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

import { client } from "@gradio/client"

const instances: string[] = [
  `${process.env.VC_ANALYSIS_SPACE_API_URL || ""}`,
].filter(instance => instance?.length > 0)

export async function analyzeImage(src: string, prompt: string): Promise<string> {

  const instance = instances.shift()
  instances.push(instance)

  const api = await client(instance, {
    hf_token: `${process.env.VC_HF_API_TOKEN}` as any
  })

  console.log("/analyzeImage: calling api.predict(6, ...)")

  /*
  the chat history has this format:
  [
    [
      '![](/file=/tmp/gradio/2ee0577f810cba5c50d0a7f047a9e6557f4e269f/image.png)What do you see in the following image?',
      'I'
    ]
  ]
*/
  const chat_history = [
    // ['', '']
  ]

  // unfortunately the Gradio client doesn't support streaming, and will crash here with a nasty error
  /*
  node_modules/@gradio/client/dist/index.js:705
  return data.map((d, i) => {
              ^
TypeError: Cannot read properties of null (reading 'is_file')
    at node_modules/@gradio/client/dist/index.js:713:43
    at Array.map (<anonymous>)
    at transform_output (node_modules/@gradio/client/dist/index.js:705:15)
  */

  const result = await api.predict(6, [		
    "HuggingFaceM4/idefics-80b-instruct", // string (Option from: ['HuggingFaceM4/idefics-80b-instruct']) in 'Model' Dropdown component		
    prompt, // string  in 'Text input' Textbox component		
    chat_history, // any (any valid json) in 'IDEFICS' Chatbot component
    src, 	// blob in 'Image input' Image component

    // the following values come from the source code at:
    // https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/blob/main/app_dialogue.py#L416-L472

    "Greedy", // string  in 'Decoding strategy' Radio component		
    0.4, // number (numeric value between 0.0 and 5.0) in 'Sampling temperature' Slider component		
    512, // number (numeric value between 8 and 1024) in 'Maximum number of new tokens to generate' Slider component		
    1, // number (numeric value between 0.0 and 5.0) in 'Repetition penalty' Slider component		
    0.8, // number (numeric value between 0.01 and 0.99) in 'Top P' Slider component
  ])

  const rawResponse = result as any 

  console.log("got a response!:", rawResponse)
  
  return rawResponse?.data?.[0] as string
}