gemini.py

 1import subprocess
 2import json
 3import http.client
 4import mimetypes
 5import os
 6
 7def get_text_files():
 8    text_files = []
 9    # List all files tracked by Git
10    git_files_proc = subprocess.run(['git', 'ls-files'], stdout=subprocess.PIPE, text=True)
11    for file in git_files_proc.stdout.strip().split('\n'):
12        # Check MIME type for each file
13        mime_check_proc = subprocess.run(['file', '--mime', file], stdout=subprocess.PIPE, text=True)
14        if 'text' in mime_check_proc.stdout:
15            text_files.append(file)
16
17    print(f"File count: {len(text_files)}")
18
19    return text_files
20
21def get_file_contents(file):
22    # Read file content
23    with open(file, 'r') as f:
24        return f.read()
25
26
27def main():
28    GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
29
30    # Your prompt
31    prompt = "Document the data types and dataflow in this codebase in preparation to port a streaming implementation to rust:\n\n"
32    # Fetch all text files
33    text_files = get_text_files()
34    code_blocks = []
35    for file in text_files:
36        file_contents = get_file_contents(file)
37        # Create a code block for each text file
38        code_blocks.append(f"\n`{file}`\n\n```{file_contents}```\n")
39
40    # Construct the JSON payload
41    payload = json.dumps({
42        "contents": [{
43            "parts": [{
44                "text": prompt + "".join(code_blocks)
45            }]
46        }]
47    })
48
49    # Prepare the HTTP connection
50    conn = http.client.HTTPSConnection("generativelanguage.googleapis.com")
51
52    # Define headers
53    headers = {
54        'Content-Type': 'application/json',
55        'Content-Length': str(len(payload))
56    }
57
58    # Output the content length in bytes
59    print(f"Content Length in kilobytes: {len(payload.encode('utf-8')) / 1024:.2f} KB")
60
61
62    # Send a request to count the tokens
63    conn.request("POST", f"/v1beta/models/gemini-1.5-pro-latest:countTokens?key={GEMINI_API_KEY}", body=payload, headers=headers)
64    # Get the response
65    response = conn.getresponse()
66    if response.status == 200:
67        token_count = json.loads(response.read().decode('utf-8')).get('totalTokens')
68        print(f"Token count: {token_count}")
69    else:
70        print(f"Failed to get token count. Status code: {response.status}, Response body: {response.read().decode('utf-8')}")
71
72
73    # Prepare the HTTP connection
74    conn = http.client.HTTPSConnection("generativelanguage.googleapis.com")
75    conn.request("GET", f"/v1beta/models/gemini-1.5-pro-latest:streamGenerateContent?key={GEMINI_API_KEY}", body=payload, headers=headers)
76
77    # Get the response in a streaming manner
78    response = conn.getresponse()
79    if response.status == 200:
80        print("Successfully sent the data to the API.")
81        # Read the response in chunks
82        while chunk := response.read(4096):
83            print(chunk.decode('utf-8'))
84    else:
85        print(f"Failed to send the data to the API. Status code: {response.status}, Response body: {response.read().decode('utf-8')}")
86
87    # Close the connection
88    conn.close()
89
90if __name__ == "__main__":
91    main()