File size: 3,069 Bytes
b31f748 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
from html import escape
import iscc_sct as ict
def generate_html(fingerprint_data):
chunks = fingerprint_data["features"]
# Sort chunks by offset
chunks.sort(key=lambda x: x["offset"])
html_content = f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Text Fingerprint Visualization</title>
<script src="https://cdn.tailwindcss.com"></script>
</head>
<body class="bg-gray-100 p-8">
<div class="max-w-4xl mx-auto bg-white p-6 rounded-lg shadow-lg">
<h1 class="text-2xl font-bold mb-4">Text Fingerprint Visualization</h1>
<div class="text-sm mb-4">
<span class="font-semibold">ISCC:</span> {fingerprint_data['iscc']}
</div>
<div class="text-sm mb-4">
<span class="font-semibold">Characters:</span> {fingerprint_data['characters']}
</div>
<div class="relative text-base leading-relaxed whitespace-pre-wrap">
"""
chunk_color = "bg-yellow-100"
overlap_color = "bg-red-100"
current_pos = 0
for i, chunk in enumerate(chunks):
start = max(chunk["offset"], current_pos)
end = chunk["offset"] + chunk["size"]
if start < end:
# Function to escape text and preserve line breaks
def escape_and_preserve_breaks(text):
return escape(text).replace("\n", "<br>")
# Non-overlapping part
html_content += f'<span class="{overlap_color}">{escape_and_preserve_breaks(chunk["text"][current_pos - chunk["offset"]:start - chunk["offset"]])}'
# Overlapping part (if any)
if i < len(chunks) - 1 and end > chunks[i + 1]["offset"]:
overlap_end = chunks[i + 1]["offset"]
html_content += f'<span class="{chunk_color}">{escape_and_preserve_breaks(chunk["text"][start - chunk["offset"]:overlap_end - chunk["offset"]])}</span>'
html_content += escape_and_preserve_breaks(chunk["text"][overlap_end - chunk["offset"] :])
else:
html_content += escape_and_preserve_breaks(chunk["text"][start - chunk["offset"] :])
# Fingerprint badge
html_content += f'<span class="inline-block bg-gray-800 text-white text-xs px-2 py-1 rounded ml-1">{chunk["feature"]}</span>'
html_content += "</span>"
current_pos = end
html_content += """
</div>
</div>
</body>
</html>
"""
return html_content
def main():
with open("../README.md", "rb") as f:
data = f.read()
text = data.decode("utf-8")
result = ict.create(text, granular=True)
print(result.model_dump())
# Generate the HTML content
html_content = generate_html(result.model_dump())
# Write the HTML content to a file
with open("readme.html", "wt", encoding="utf-8") as f:
f.write(html_content)
if __name__ == "__main__":
main()
|