feat(CourseInformer): 添加课程通知系统

- 实现了读取教务系统导出的xls课程安排以及通过Napcat实例发送对应通知的功能 - 添加了通知配置文件和测试脚本 - 创建了项目结构和必要的配置文件
2025-08-20 10:55:11 +08:00
commit 074a2ee56c
10 changed files with 786 additions and 0 deletions
--- a/reader.py
+++ b/reader.py
@@ -0,0 +1,247 @@
+"""
+Module for reading course schedule information from HTML file
+"""
+
+from dataclasses import dataclass
+from typing import List
+import os
+import re
+from bs4 import BeautifulSoup
+
+
+@dataclass
+class TimeRange:
+    """Data class representing a time range with from and to values"""
+    from_value: int
+    to_value: int
+
+
+@dataclass
+class CourseSchedule:
+    """Data class representing a course schedule entry"""
+    course_name: str
+    instructor: str
+    week: TimeRange  # e.g., "1-8", "11-18", etc.
+    timeslot: TimeRange  # e.g., "[1-2]", "[6-7]", "[8-10]", etc.
+    location: str  # e.g., "[龙]二号楼2301"
+    day: str  # e.g., "星期一", "星期二", etc.
+    period: str  # e.g., "上午", "下午", "晚上"
+
+
+@dataclass
+class StudentInfo:
+    """Data class representing student information"""
+    student_id: str
+    student_name: str
+    class_name: str
+    total_credits: float
+
+
+def read_course_schedule(file_path: str = "resources/教学安排表.xls") -> tuple[StudentInfo, List[CourseSchedule]]:
+    """
+    Read _course schedule information from HTML file
+    
+    Args:
+        file_path: Path to the HTML file containing _course schedule
+        
+    Returns:
+        Tuple of (StudentInfo, List of CourseSchedule objects)
+    """
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"Course schedule file not found: {file_path}")
+    
+    # Read the HTML file with GBK encoding
+    with open(file_path, 'r', encoding='GBK') as file:
+        content = file.read()
+    
+    # Parse HTML content
+    soup = BeautifulSoup(content, 'html.parser')
+    
+    # Extract student information
+    student_id = "Unknown"
+    student_name = "Unknown"
+    class_name = "Unknown"
+    total_credits = 0.0
+    
+    # Get student info from hidden inputs
+    xh_input = soup.find('input', {'id': 'xh'})
+    if xh_input and xh_input.get('value'):
+        student_id = xh_input['value']
+    
+    # Get student name and class from the first table
+    tables = soup.find_all('table')
+    if tables:
+        first_table = tables[0]
+        table_rows = first_table.find_all('tr')
+        for row in table_rows:
+            cells = row.find_all('td')
+            for cell in cells:
+                text = cell.get_text().strip()
+                if text.startswith("姓名："):
+                    student_name = text.replace("姓名：", "")
+                elif text.startswith("所在班级："):
+                    class_name = text.replace("所在班级：", "")
+
+    _student_info = StudentInfo(
+        student_id=student_id,
+        student_name=student_name,
+        class_name=class_name,
+        total_credits=total_credits
+    )
+    
+    # Extract _course schedule information
+    _courses = []
+    
+    # Get day names from the header row
+    days = ["星期一", "星期二", "星期三", "星期四", "星期五"]  # Default days
+    header_row = soup.find('tr', class_='H')
+    if header_row:
+        day_cells = header_row.find_all('td', class_='td0')
+        days = []
+        for cell in day_cells:
+            day_text = cell.get_text().strip()
+            if day_text and "星期" in day_text:  # Only add actual day names
+                days.append(day_text)
+    
+    # Get _course information from the schedule table
+    course_divs = soup.find_all('div', class_='div1')
+    for div in course_divs:
+        # Extract _course information from each div
+        xkinfo = div.find('span', class_='xkinfo')
+        if xkinfo:
+            # Each div may contain multiple _courses
+            course_blocks = xkinfo.find_all('div', style=lambda x: x and 'padding-bottom:5px' in x)
+            for block in course_blocks:
+                # Get the raw text
+                raw_text = block.get_text()
+                
+                # Initialize variables
+                course_name = "Unknown Course"
+                instructor = "Unknown"
+                location = "Unknown"
+                week_range = TimeRange(1, 1)
+                timeslot_range = TimeRange(1, 1)
+                
+                # Parse the concatenated text based on the format:
+                # course_name + instructor + weeks[timeslot] + location
+                # e.g., "企业资源计划（ERP）黄伟 1-8[3-4][龙]一号楼1307"
+                
+                # Look for the pattern: numbers followed by brackets (timeslot)
+                time_pattern = r'([0-9\-]+)(\[[0-9\-]+\])'
+                time_match = re.search(time_pattern, raw_text)
+                
+                if time_match:
+                    # Extract time information
+                    weeks_str = time_match.group(1)
+                    timeslot_str = time_match.group(2)
+                    
+                    # Parse week range
+                    week_range = TimeRange(1, 1)
+                    week_range = time_parser(weeks_str,week_range)
+                    
+                    # Parse timeslot range (remove brackets)
+                    timeslot_range = TimeRange(1, 1)
+                    timeslot_clean = timeslot_str.strip('[]')
+                    timeslot_range = time_parser(timeslot_clean, timeslot_range)
+                    
+                    # Extract location (everything after the time info)
+                    time_end = time_match.end()
+                    if time_end < len(raw_text):
+                        location = raw_text[time_end:].strip()
+                    
+                    # Extract the part before time info
+                    time_start = time_match.start()
+                    before_time = raw_text[:time_start].strip()
+                    
+                    # Split the part before time to get _course name and instructor
+                    # Look for Chinese characters to identify the instructor
+                    instructor_pattern = r'([\u4e00-\u9fff]+)$'
+                    instructor_match = re.search(instructor_pattern, before_time)
+                    if instructor_match:
+                        instructor = instructor_match.group(1)
+                        # Course name is everything before the instructor
+                        instructor_start = instructor_match.start()
+                        course_name = before_time[:instructor_start].strip()
+                    else:
+                        # If we can't find instructor, use the whole part as _course name
+                        course_name = before_time
+                else:
+                    # Fallback: try to extract at least the _course name
+                    # Assume the first part is the _course name
+                    parts = raw_text.split()
+                    if parts:
+                        course_name = parts[0]
+                
+                # Determine day and period based on div id
+                day = "Unknown"
+                period = "Unknown"
+                div_id = div.get('id', '')
+                if div_id.startswith('k') and len(div_id) >= 3:
+                    # Extract day from div id (k11, k21, etc.)
+                    # First digit after 'k' represents the day (1=Monday, 2=Tuesday, etc.)
+                    try:
+                        day_index = int(div_id[1]) - 1
+                        if 0 <= day_index < len(days):
+                            day = days[day_index]
+                    except (ValueError, IndexError):
+                        pass
+                    
+                    # Extract period from div id (last digit represents the period)
+                    # 1,2 = 上午, 3,4 = 下午, 5 = 晚上
+                    try:
+                        period_index = int(div_id[2])
+                        if period_index in [1, 2]:
+                            period = "上午"
+                        elif period_index in [3, 4]:
+                            period = "下午"
+                        elif period_index == 5:
+                            period = "晚上"
+                    except (ValueError, IndexError):
+                        pass
+                
+                _course = CourseSchedule(
+                    course_name=course_name,
+                    instructor=instructor,
+                    week=week_range,
+                    timeslot=timeslot_range,
+                    location=location,
+                    day=day,
+                    period=period
+                )
+                _courses.append(_course)
+    
+    return _student_info, _courses
+
+
+def time_parser(timeslot_clean, timeslot_range):
+    if '-' in timeslot_clean:
+        timeslot_parts = timeslot_clean.split('-')
+        if len(timeslot_parts) == 2:
+            try:
+                timeslot_range = TimeRange(int(timeslot_parts[0]), int(timeslot_parts[1]))
+            except ValueError:
+                pass
+    else:
+        try:
+            timeslot_value = int(timeslot_clean)
+            timeslot_range = TimeRange(timeslot_value, timeslot_value)
+        except ValueError:
+            pass
+    return timeslot_range
+
+
+if __name__ == "__main__":
+    # Test the function
+    try:
+        student_info, courses = read_course_schedule()
+        print(f"Student: {student_info.student_name} ({student_info.student_id})")
+        print(f"Class: {student_info.class_name}")
+        print("\nCourses:")
+        for course in courses:  # Print first 5 courses
+            print(f"- {course.course_name} by {course.instructor}")
+            print(f"  Time: {course.week.from_value}-{course.week.to_value} [{course.timeslot.from_value}-{course.timeslot.to_value}]")
+            print(f"  Location: {course.location}")
+            print(f"  Day: {course.day}, Period: {course.period}")
+            print()
+    except Exception as e:
+        print(f"Error reading course schedule: {e}")