feat(CourseInformer): 添加课程通知系统
- 实现了读取教务系统导出的xls课程安排以及通过Napcat实例发送对应通知的功能 - 添加了通知配置文件和测试脚本 - 创建了项目结构和必要的配置文件
This commit is contained in:
247
reader.py
Normal file
247
reader.py
Normal file
@@ -0,0 +1,247 @@
|
||||
"""
|
||||
Module for reading course schedule information from HTML file
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
import os
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
@dataclass
|
||||
class TimeRange:
|
||||
"""Data class representing a time range with from and to values"""
|
||||
from_value: int
|
||||
to_value: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class CourseSchedule:
|
||||
"""Data class representing a course schedule entry"""
|
||||
course_name: str
|
||||
instructor: str
|
||||
week: TimeRange # e.g., "1-8", "11-18", etc.
|
||||
timeslot: TimeRange # e.g., "[1-2]", "[6-7]", "[8-10]", etc.
|
||||
location: str # e.g., "[龙]二号楼2301"
|
||||
day: str # e.g., "星期一", "星期二", etc.
|
||||
period: str # e.g., "上午", "下午", "晚上"
|
||||
|
||||
|
||||
@dataclass
|
||||
class StudentInfo:
|
||||
"""Data class representing student information"""
|
||||
student_id: str
|
||||
student_name: str
|
||||
class_name: str
|
||||
total_credits: float
|
||||
|
||||
|
||||
def read_course_schedule(file_path: str = "resources/教学安排表.xls") -> tuple[StudentInfo, List[CourseSchedule]]:
|
||||
"""
|
||||
Read _course schedule information from HTML file
|
||||
|
||||
Args:
|
||||
file_path: Path to the HTML file containing _course schedule
|
||||
|
||||
Returns:
|
||||
Tuple of (StudentInfo, List of CourseSchedule objects)
|
||||
"""
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"Course schedule file not found: {file_path}")
|
||||
|
||||
# Read the HTML file with GBK encoding
|
||||
with open(file_path, 'r', encoding='GBK') as file:
|
||||
content = file.read()
|
||||
|
||||
# Parse HTML content
|
||||
soup = BeautifulSoup(content, 'html.parser')
|
||||
|
||||
# Extract student information
|
||||
student_id = "Unknown"
|
||||
student_name = "Unknown"
|
||||
class_name = "Unknown"
|
||||
total_credits = 0.0
|
||||
|
||||
# Get student info from hidden inputs
|
||||
xh_input = soup.find('input', {'id': 'xh'})
|
||||
if xh_input and xh_input.get('value'):
|
||||
student_id = xh_input['value']
|
||||
|
||||
# Get student name and class from the first table
|
||||
tables = soup.find_all('table')
|
||||
if tables:
|
||||
first_table = tables[0]
|
||||
table_rows = first_table.find_all('tr')
|
||||
for row in table_rows:
|
||||
cells = row.find_all('td')
|
||||
for cell in cells:
|
||||
text = cell.get_text().strip()
|
||||
if text.startswith("姓名:"):
|
||||
student_name = text.replace("姓名:", "")
|
||||
elif text.startswith("所在班级:"):
|
||||
class_name = text.replace("所在班级:", "")
|
||||
|
||||
_student_info = StudentInfo(
|
||||
student_id=student_id,
|
||||
student_name=student_name,
|
||||
class_name=class_name,
|
||||
total_credits=total_credits
|
||||
)
|
||||
|
||||
# Extract _course schedule information
|
||||
_courses = []
|
||||
|
||||
# Get day names from the header row
|
||||
days = ["星期一", "星期二", "星期三", "星期四", "星期五"] # Default days
|
||||
header_row = soup.find('tr', class_='H')
|
||||
if header_row:
|
||||
day_cells = header_row.find_all('td', class_='td0')
|
||||
days = []
|
||||
for cell in day_cells:
|
||||
day_text = cell.get_text().strip()
|
||||
if day_text and "星期" in day_text: # Only add actual day names
|
||||
days.append(day_text)
|
||||
|
||||
# Get _course information from the schedule table
|
||||
course_divs = soup.find_all('div', class_='div1')
|
||||
for div in course_divs:
|
||||
# Extract _course information from each div
|
||||
xkinfo = div.find('span', class_='xkinfo')
|
||||
if xkinfo:
|
||||
# Each div may contain multiple _courses
|
||||
course_blocks = xkinfo.find_all('div', style=lambda x: x and 'padding-bottom:5px' in x)
|
||||
for block in course_blocks:
|
||||
# Get the raw text
|
||||
raw_text = block.get_text()
|
||||
|
||||
# Initialize variables
|
||||
course_name = "Unknown Course"
|
||||
instructor = "Unknown"
|
||||
location = "Unknown"
|
||||
week_range = TimeRange(1, 1)
|
||||
timeslot_range = TimeRange(1, 1)
|
||||
|
||||
# Parse the concatenated text based on the format:
|
||||
# course_name + instructor + weeks[timeslot] + location
|
||||
# e.g., "企业资源计划(ERP)黄伟 1-8[3-4][龙]一号楼1307"
|
||||
|
||||
# Look for the pattern: numbers followed by brackets (timeslot)
|
||||
time_pattern = r'([0-9\-]+)(\[[0-9\-]+\])'
|
||||
time_match = re.search(time_pattern, raw_text)
|
||||
|
||||
if time_match:
|
||||
# Extract time information
|
||||
weeks_str = time_match.group(1)
|
||||
timeslot_str = time_match.group(2)
|
||||
|
||||
# Parse week range
|
||||
week_range = TimeRange(1, 1)
|
||||
week_range = time_parser(weeks_str,week_range)
|
||||
|
||||
# Parse timeslot range (remove brackets)
|
||||
timeslot_range = TimeRange(1, 1)
|
||||
timeslot_clean = timeslot_str.strip('[]')
|
||||
timeslot_range = time_parser(timeslot_clean, timeslot_range)
|
||||
|
||||
# Extract location (everything after the time info)
|
||||
time_end = time_match.end()
|
||||
if time_end < len(raw_text):
|
||||
location = raw_text[time_end:].strip()
|
||||
|
||||
# Extract the part before time info
|
||||
time_start = time_match.start()
|
||||
before_time = raw_text[:time_start].strip()
|
||||
|
||||
# Split the part before time to get _course name and instructor
|
||||
# Look for Chinese characters to identify the instructor
|
||||
instructor_pattern = r'([\u4e00-\u9fff]+)$'
|
||||
instructor_match = re.search(instructor_pattern, before_time)
|
||||
if instructor_match:
|
||||
instructor = instructor_match.group(1)
|
||||
# Course name is everything before the instructor
|
||||
instructor_start = instructor_match.start()
|
||||
course_name = before_time[:instructor_start].strip()
|
||||
else:
|
||||
# If we can't find instructor, use the whole part as _course name
|
||||
course_name = before_time
|
||||
else:
|
||||
# Fallback: try to extract at least the _course name
|
||||
# Assume the first part is the _course name
|
||||
parts = raw_text.split()
|
||||
if parts:
|
||||
course_name = parts[0]
|
||||
|
||||
# Determine day and period based on div id
|
||||
day = "Unknown"
|
||||
period = "Unknown"
|
||||
div_id = div.get('id', '')
|
||||
if div_id.startswith('k') and len(div_id) >= 3:
|
||||
# Extract day from div id (k11, k21, etc.)
|
||||
# First digit after 'k' represents the day (1=Monday, 2=Tuesday, etc.)
|
||||
try:
|
||||
day_index = int(div_id[1]) - 1
|
||||
if 0 <= day_index < len(days):
|
||||
day = days[day_index]
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
|
||||
# Extract period from div id (last digit represents the period)
|
||||
# 1,2 = 上午, 3,4 = 下午, 5 = 晚上
|
||||
try:
|
||||
period_index = int(div_id[2])
|
||||
if period_index in [1, 2]:
|
||||
period = "上午"
|
||||
elif period_index in [3, 4]:
|
||||
period = "下午"
|
||||
elif period_index == 5:
|
||||
period = "晚上"
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
|
||||
_course = CourseSchedule(
|
||||
course_name=course_name,
|
||||
instructor=instructor,
|
||||
week=week_range,
|
||||
timeslot=timeslot_range,
|
||||
location=location,
|
||||
day=day,
|
||||
period=period
|
||||
)
|
||||
_courses.append(_course)
|
||||
|
||||
return _student_info, _courses
|
||||
|
||||
|
||||
def time_parser(timeslot_clean, timeslot_range):
|
||||
if '-' in timeslot_clean:
|
||||
timeslot_parts = timeslot_clean.split('-')
|
||||
if len(timeslot_parts) == 2:
|
||||
try:
|
||||
timeslot_range = TimeRange(int(timeslot_parts[0]), int(timeslot_parts[1]))
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
timeslot_value = int(timeslot_clean)
|
||||
timeslot_range = TimeRange(timeslot_value, timeslot_value)
|
||||
except ValueError:
|
||||
pass
|
||||
return timeslot_range
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test the function
|
||||
try:
|
||||
student_info, courses = read_course_schedule()
|
||||
print(f"Student: {student_info.student_name} ({student_info.student_id})")
|
||||
print(f"Class: {student_info.class_name}")
|
||||
print("\nCourses:")
|
||||
for course in courses: # Print first 5 courses
|
||||
print(f"- {course.course_name} by {course.instructor}")
|
||||
print(f" Time: {course.week.from_value}-{course.week.to_value} [{course.timeslot.from_value}-{course.timeslot.to_value}]")
|
||||
print(f" Location: {course.location}")
|
||||
print(f" Day: {course.day}, Period: {course.period}")
|
||||
print()
|
||||
except Exception as e:
|
||||
print(f"Error reading course schedule: {e}")
|
||||
Reference in New Issue
Block a user