SmolVLM2-XSPFGenerator

Build error

App Files Files Community

Miquel Farre commited on Feb 14

Commit

e05c441

1 Parent(s): 9bada46

update

Browse files

Files changed (1) hide show

app.py +17 -23

app.py CHANGED Viewed

@@ -1,15 +1,16 @@
 import os
 import json
 import gradio as gr
-import tempfile
 import torch
 import spaces
 from pathlib import Path
-from transformers import AutoProcessor, AutoModelForVision2Seq
 import subprocess
 import logging
 import xml.etree.ElementTree as ET
 from xml.dom import minidom
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -152,49 +153,43 @@ class VideoHighlightDetector:
         outputs = self.model.generate(**inputs, max_new_tokens=64, do_sample=False)
         response = self.processor.decode(outputs[0], skip_special_tokens=True).lower().split("assistant: ")[1]
         return "yes" in response
 def create_xspf_playlist(video_path: str, segments: list, descriptions: list) -> str:
     """Create XSPF playlist from segments with descriptions."""
-    # Register the VLC namespace
-    ET.register_namespace('vlc', 'http://www.videolan.org/vlc/playlist/ns/0/')
-    ET.register_namespace('', 'http://xspf.org/ns/0/')
-    root = ET.Element("{http://xspf.org/ns/0/}playlist", {"version": "1"})
     # Get video filename for the title
     video_filename = os.path.basename(video_path)
-    title = ET.SubElement(root, "{http://xspf.org/ns/0/}title")
     title.text = f"{video_filename} - Highlights"
-    tracklist = ET.SubElement(root, "{http://xspf.org/ns/0/}trackList")
     for idx, ((start_time, end_time), description) in enumerate(zip(segments, descriptions)):
-        track = ET.SubElement(tracklist, "{http://xspf.org/ns/0/}track")
-        location = ET.SubElement(track, "{http://xspf.org/ns/0/}location")
         location.text = f"file:///{video_filename}"
-        title = ET.SubElement(track, "{http://xspf.org/ns/0/}title")
         title.text = f"Highlight {idx + 1}"
-        annotation = ET.SubElement(track, "{http://xspf.org/ns/0/}annotation")
         annotation.text = description
-        start_meta = ET.SubElement(track, "{http://xspf.org/ns/0/}meta", {"rel": "start"})
         start_meta.text = format_duration(start_time)
-        end_meta = ET.SubElement(track, "{http://xspf.org/ns/0/}meta", {"rel": "end"})
         end_meta.text = format_duration(end_time)
     # Add VLC extension
-    extension = ET.SubElement(root, "{http://xspf.org/ns/0/}extension",
-                            {"application": "http://www.videolan.org/vlc/playlist/0"})
     for i in range(len(segments)):
-        ET.SubElement(extension, "{http://www.videolan.org/vlc/playlist/ns/0/}item",
-                     {"tid": str(i)})
     # Convert to string with pretty printing
-    xml_str = minidom.parseString(ET.tostring(root, encoding='unicode')).toprettyxml(indent="    ")
     return xml_str
 def create_ui(examples_path: str, model_path: str):
@@ -243,7 +238,7 @@ def create_ui(examples_path: str, model_path: str):
             try:
                 duration = get_video_duration_seconds(video)
-                if duration > 18000:  # 300 minutes
                     return [
                         None,
                         "Video must be shorter than 30 minutes",
@@ -286,7 +281,6 @@ def create_ui(examples_path: str, model_path: str):
                         if detector.process_segment(temp_segment.name, highlights):
                             # Get segment description
-                            print("KEEPING SEGMENT")
                             description = detector.analyze_segment(temp_segment.name)
                             kept_segments.append((start_time, end_time))
                             segment_descriptions.append(description)

 import os
 import json
 import gradio as gr
 import torch
 import spaces
+import tempfile
 from pathlib import Path
 import subprocess
 import logging
 import xml.etree.ElementTree as ET
 from xml.dom import minidom
+from transformers import AutoProcessor, AutoModelForVision2Seq
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
         outputs = self.model.generate(**inputs, max_new_tokens=64, do_sample=False)
         response = self.processor.decode(outputs[0], skip_special_tokens=True).lower().split("assistant: ")[1]
         return "yes" in response
 def create_xspf_playlist(video_path: str, segments: list, descriptions: list) -> str:
     """Create XSPF playlist from segments with descriptions."""
+    root = ET.Element("playlist", version="1", xmlns="http://xspf.org/ns/0/")
     # Get video filename for the title
     video_filename = os.path.basename(video_path)
+    title = ET.SubElement(root, "title")
     title.text = f"{video_filename} - Highlights"
+    tracklist = ET.SubElement(root, "trackList")
     for idx, ((start_time, end_time), description) in enumerate(zip(segments, descriptions)):
+        track = ET.SubElement(tracklist, "track")
+        location = ET.SubElement(track, "location")
         location.text = f"file:///{video_filename}"
+        title = ET.SubElement(track, "title")
         title.text = f"Highlight {idx + 1}"
+        annotation = ET.SubElement(track, "annotation")
         annotation.text = description
+        start_meta = ET.SubElement(track, "meta", rel="start")
         start_meta.text = format_duration(start_time)
+        end_meta = ET.SubElement(track, "meta", rel="end")
         end_meta.text = format_duration(end_time)
     # Add VLC extension
+    extension = ET.SubElement(root, "extension", application="http://www.videolan.org/vlc/playlist/0")
     for i in range(len(segments)):
+        item = ET.SubElement(extension, "vlc:item", tid=str(i))
     # Convert to string with pretty printing
+    xml_str = minidom.parseString(ET.tostring(root)).toprettyxml(indent="    ")
     return xml_str
 def create_ui(examples_path: str, model_path: str):
             try:
                 duration = get_video_duration_seconds(video)
+                if duration > 1800:  # 30 minutes
                     return [
                         None,
                         "Video must be shorter than 30 minutes",
                         if detector.process_segment(temp_segment.name, highlights):
                             # Get segment description
                             description = detector.analyze_segment(temp_segment.name)
                             kept_segments.append((start_time, end_time))
                             segment_descriptions.append(description)