Only one IRQ comes in upon initializing IDT - gcc

I am trying to work on an x86 operating system and coding it in C. My OS uses GRUB-Multiboot and thus far I have been able to get a GDT working.
I am currently working on my IDT and have run into a problem. When my OS boots up, it receives one IRQ and no further ones. I do have the keyboard line enabled and I should be receiving IRQs from that whenever I click a key but this is not the case.
What comes up upon startup:
Here is the code for idt.c:
#include <stdint.h>
#include "idt.h"
#include "lib/stdio.h"
#include "include/io.h"
// Define the Interrupt Descriptor Table (IDT)
struct idt_entry_t idt[IDT_ENTRIES];
struct idt_entry_t idt_entries[IDT_ENTRIES];
// Load the IDT pointer
struct idt_ptr_t idt_ptr = {
.limit = sizeof(idt) - 1,
.base = (uint32_t)&idt
};
typedef struct registers {
uint32_t ds; // Data segment selector
uint32_t edi, esi, ebp, esp, ebx, edx, ecx, eax; // Pushed by pusha
uint32_t int_no, err_code; // Interrupt number and error code (if applicable)
uint32_t eip, cs, eflags, useresp, ss; // Pushed by the processor automatically
} registers_t;
// Define an IRQ handler function
void irq_handler(registers_t *regs)
{
nanos_printf("Received an IRQ!\n");
// Send an EOI (end-of-interrupt) signal to the PICs
outb(0xA0, 0x20); // Send reset signal to slave
outb(0x20, 0x20); // Send reset signal to master
return;
}
void isr_handler()
{
nanos_printf("\n\nSTOP\n\nException occurred... halting...");
// Halt the CPU
for (;;);
}
void idt_load()
{
asm volatile("lidt %0" : : "m"(idt_ptr));
}
void idt_init(void)
{
// Disable interrupts
__asm__ __volatile__("cli");
// Setup the PIC(s)
outb(0x20, 0x11);
outb(0xA0, 0x11);
outb(0x21, 0x20);
outb(0xA1, 0x28);
outb(0x21, 0x04);
outb(0x21, 0x01);
outb(0x21, 0xFB);
outb(0xA1, 0xFF);
// Print a message to indicate that IDT is being loaded
nanos_printf("Loading IDT\n");
// Initialize the IDT pointer
idt_ptr.limit = sizeof(idt) - 1;
idt_ptr.base = (uint32_t)&idt;
// Set each IDT entry to the default handler
for (int i = 0; i < IDT_ENTRIES; i++)
{
idt_set_gate(i, (uint32_t)isr_handler, 0x08, 0x8E);
}
// Set the IRQ entries in the IDT
idt_set_gate(32, (uint32_t)irq_handler, 0x08, 0x8E);
idt_set_gate(33, (uint32_t)irq_handler, 0x08, 0x8E);
idt_set_gate(34, (uint32_t)irq_handler, 0x08, 0x8E);
idt_set_gate(35, (uint32_t)irq_handler, 0x08, 0x8E);
idt_set_gate(36, (uint32_t)irq_handler, 0x08, 0x8E);
idt_set_gate(37, (uint32_t)irq_handler, 0x08, 0x8E);
idt_set_gate(38, (uint32_t)irq_handler, 0x08, 0x8E);
idt_set_gate(39, (uint32_t)irq_handler, 0x08, 0x8E);
idt_set_gate(40, (uint32_t)irq_handler, 0x08, 0x8E);
idt_set_gate(41, (uint32_t)irq_handler, 0x08, 0x8E);
idt_set_gate(42, (uint32_t)irq_handler, 0x08, 0x8E);
idt_set_gate(43, (uint32_t)irq_handler, 0x08, 0x8E);
idt_set_gate(44, (uint32_t)irq_handler, 0x08, 0x8E);
idt_set_gate(45, (uint32_t)irq_handler, 0x08, 0x8E);
// Send initialization control word 1 and 2 to both PICs
outb(PIC1_COMMAND, ICW1_INIT | ICW1_ICW4); // Initialization Control Word 1
io_wait();
outb(PIC2_COMMAND, ICW1_INIT | ICW1_ICW4);
io_wait();
// Send initialization control word 3 to both PICs
outb(PIC1_DATA, 0x20); // Initialization Control Word 3: IRQ 0-7 map to IDT entries 0x20-0x27
io_wait();
outb(PIC2_DATA, 0x28); // Initialization Control Word 3: IRQ 8-15 map to IDT entries 0x28-0x2F
io_wait();
// Send initialization control word 4 to both PICs
outb(PIC1_DATA, ICW4_8086);
io_wait();
outb(PIC2_DATA, ICW4_8086);
io_wait();
// Unmask IRQs
outb(PIC1_DATA, 0x0);
outb(PIC2_DATA, 0x0);
// Load IDT
idt_load();
// Enable interrupts
__asm__ __volatile__("sti");
nanos_printf("IDT loaded\n");
}
void idt_set_gate(uint8_t num, uint32_t base, uint16_t sel, uint8_t flags)
{
// Set the base address
idt[num].base_lo = base & 0xFFFF;
idt[num].base_hi = (base >> 16) & 0xFFFF;
// Set the selector
idt[num].sel = sel;
// Set the always0 field
idt[num].always0 = 0;
// Set the flags
idt[num].flags = flags;
}
And here is idt.h:
#ifndef IDT_H
#define IDT_H
#include <stdint.h>
// Number of entries in the IDT
#define IDT_ENTRIES 256
// Struct for IDT entry
struct idt_entry_t {
uint16_t base_lo; // Lower 16 bits of handler function address
uint16_t sel; // Kernel segment selector
uint8_t always0; // Must always be zero
uint8_t flags; // Flags for entry (present, privilege level, type)
uint16_t base_hi; // Upper 16 bits of handler function address
} __attribute__((packed));
// Struct for IDT pointer
struct idt_ptr_t {
uint16_t limit; // Size of IDT
uint32_t base; // Base address of IDT
} __attribute__((packed));
// Define macros for setting flags in IDT entries
#define IDT_PRESENT_BIT 0x80
#define IDT_RING0 0x00
#define IDT_RING3 0x60
#define IDT_INT_GATE 0x0E
#define IDT_TRAP_GATE 0x0F
#define IDT_SIZE 0x08
// Define an IRQ handler function
void irq_handler();
// Define an ISR handler function
void isr_handler();
// Define a function to load the IDT
void idt_load();
// Define a function to initialize the IDT
void idt_init(void);
// Define a function to set a gate in the IDT
void idt_set_gate(uint8_t num, uint32_t base, uint16_t sel, uint8_t flags);
#endif // IDT_H
The issue seems to lie within the IRQ handler itself not returning. I do send reset signals to both the master and slave PIC inside of irq_handler() which is inside of idt.c.
And yes, I do have an infinite loop at the end of my kernel_main():
void kernel_main(void)
{
clear_screen();
nanos_printf("Hello NanOS!\n");
gdt_init();
nanos_printf("Initialized Global Descriptor Table.\n");
idt_init();
nanos_printf("Initialized Interrupt Descriptor Table.\n");
while (true) {}
}
Problem:
I tried to click keys on the keyboard to receive a keyboard IRQ but I only receive one IRQ at the start. I dabbled in OSDEV a few years back and from what I remember the PICs receive more than one IRQ upon startup anyways.

Related

Analog measurement incorrect on Teensy 2.0++

I have a Joystick wired up to my Teensy 2.0++ and I want to read the analog values from it.
I took this implementation from PJRC:
static uint8_t aref = (1<<REFS0); // default to AREF = Vcc, this is a 5V Vcc Teensy
void analogReference(uint8_t mode)
{
aref = mode & 0xC0;
}
// Mux input
int16_t adc_read(uint8_t mux)
{
#if defined(__AVR_AT90USB162__)
return 0;
#else
uint8_t low;
ADCSRA = (1<<ADEN) | ADC_PRESCALER; // enable ADC
ADCSRB = (1<<ADHSM) | (mux & 0x20); // high speed mode
ADMUX = aref | (mux & 0x1F); // configure mux input
ADCSRA = (1<<ADEN) | ADC_PRESCALER | (1<<ADSC); // start the conversion
while (ADCSRA & (1<<ADSC)) ; // wait for result
low = ADCL; // must read LSB first
return (ADCH << 8) | low; // must read MSB only once!
#endif
}
// Arduino compatible pin input
int16_t analogRead(uint8_t pin)
{
#if defined(__AVR_ATmega32U4__)
static const uint8_t PROGMEM pin_to_mux[] = {
0x00, 0x01, 0x04, 0x05, 0x06, 0x07,
0x25, 0x24, 0x23, 0x22, 0x21, 0x20};
if (pin >= 12) return 0;
return adc_read(pgm_read_byte(pin_to_mux + pin));
#elif defined(__AVR_AT90USB646__) || defined(__AVR_AT90USB1286__)
if (pin >= 8) return 0;
return adc_read(pin);
#else
return 0;
#endif
}
I have my X and Y pins wired up to F1 and F0, and I want to retrieve values with the following code:
long map(long x, long in_min, long in_max, long out_min, long out_max) // map method shamelessy ripped from Arduino
{
return (x - in_min) * (out_max - out_min) / (in_max - in_min) + out_min;
}
joy_ly = map(analogRead(0), 0, 65535, 0, 255);
joy_lx = map(analogRead(1), 0, 65535, 0, 255);
I measured my Joystick with a multimeter and it works perfectly (around 2.43V on center, 0V on min, and 5V on max), but the center value always ends up being very close to zero.
Is there anything I'm doing wrong?
NOTE: This is an at90usb1286 chip.
The ADC max value is 1024, not 65535.

Atmel Studio- ATmega128 bootloader

I am trying to write a customized boot-loader for ATmega AVR's. I write a code, and it work perfectly in small AVR's like ATmega32A and ATmega8A. But when i want to use it in ATmega128A, it writes nothing in flash segment.
I'm sure Fuses are correct, ATmega103 mode is disabled, and the program starts at boot section, but it does nothing..
Before calling function "boot_program_page" I set PORTC and turn some LED's on, and after that I cleared PORTC and LED's goes off. so the code in executing completely too.
The function I am using is an example provided in avr/boot.h.
It should write some data ( 0x00 actually ) in page 0 of flash memory..
here is my code :
#define F_CPU 8000000UL
#include <inttypes.h>
#include <avr/io.h>
#include <avr/boot.h>
#include <avr/interrupt.h>
#include <avr/pgmspace.h>
#include <util/delay.h>
void boot_program_page (uint32_t page, uint8_t *buf);
int main(void)
{
uint8_t data[SPM_PAGESIZE] = {0};
uint32_t page = 0;
DDRC = 255;
PORTC = 255;
page *= (uint32_t)SPM_PAGESIZE;
boot_program_page(page,data);
_delay_ms(1000);
PORTC = 0;
while (1)
{
}
}
void boot_program_page (uint32_t page, uint8_t *buf)
{
uint16_t i;
uint8_t sreg;
// Disable interrupts.
sreg = SREG;
cli();
eeprom_busy_wait ();
boot_page_erase (page);
boot_spm_busy_wait (); // Wait until the memory is erased.
for (i=0; i<SPM_PAGESIZE; i+=2)
{
// Set up little-endian word.
uint16_t w = *buf++;
w += (*buf++) << 8;
boot_page_fill (page + i, w);
}
boot_page_write (page); // Store buffer in flash page.
boot_spm_busy_wait(); // Wait until the memory is written.
// Reenable RWW-section again. We need this if we want to jump back
// to the application after bootloading.
boot_rww_enable ();
// Re-enable interrupts (if they were ever enabled).
SREG = sreg;
}

CreateFile Failed: 5

I'm new in windows driver.
I downloaded this sample and fixed nothing but a report descriptor like this.
HID_REPORT_DESCRIPTOR G_DefaultReportDescriptor[] = {
0x06, 0x00, 0xFF, // USAGE_PAGE (Vender Defined Usage Page)
0x09, 0x01, // USAGE (Vendor Usage 0x01)
0xA1, 0x01, // COLLECTION (Application)
0x85, CONTROL_FEATURE_REPORT_ID, // REPORT_ID (1)
0x09, 0x01, // USAGE (Vendor Usage 0x01)
0x15, 0x00, // LOGICAL_MINIMUM(0)
0x26, 0xff, 0x00, // LOGICAL_MAXIMUM(255)
0x75, 0x08, // REPORT_SIZE (0x08)
//0x95,FEATURE_REPORT_SIZE_CB, // REPORT_COUNT
0x96, (FEATURE_REPORT_SIZE_CB & 0xff), (FEATURE_REPORT_SIZE_CB >> 8), // REPORT_COUNT
0xB1, 0x00, // FEATURE (Data,Ary,Abs)
0x09, 0x01, // USAGE (Vendor Usage 0x01)
0x75, 0x08, // REPORT_SIZE (0x08)
//0x95,INPUT_REPORT_SIZE_CB, // REPORT_COUNT
0x96, (INPUT_REPORT_SIZE_CB & 0xff), (INPUT_REPORT_SIZE_CB >> 8), // REPORT_COUNT
0x81, 0x00, // INPUT (Data,Ary,Abs)
0x09, 0x01, // USAGE (Vendor Usage 0x01)
0x75, 0x08, // REPORT_SIZE (0x08)
//0x95,OUTPUT_REPORT_SIZE_CB, // REPORT_COUNT
0x96, (OUTPUT_REPORT_SIZE_CB & 0xff), (OUTPUT_REPORT_SIZE_CB >> 8), // REPORT_COUNT
0x91, 0x00, // OUTPUT (Data,Ary,Abs)
0xC0, // END_COLLECTION
};
to
HID_REPORT_DESCRIPTOR G_DefaultReportDescriptor[] = {
0x05,0x01, // USAGE_PAGE (Generic Desktop)
0x09,0x02, // USAGE (Mouse)
0xA1,0x01, // COLLECTION (Application)
0x85,0x01, // REPORT_ID (1)
0x09,0x01, // USAGE (Pointer)
0xA1,0x00, // COLLECTION (Physical)
0x05,0x09, // USAGE Page (Buttons)
0x19,0x01, // USAGE Minimum (01)
0x29,0x03, // USAGE Maximum (03)
0x15,0x00, // LOGICAL_MINIMUM(0)
0x25,0x01, // LOGICAL_MAXIMUM(1)
0x95,0x03, // REPORT_COUNT (3)
0x75,0x01, // REPORT_SIZE (1)
0x81,0x02, // Input (Data, Variable, Absolute)
0x95,0x01, // REPORT_COUNT (1)
0x75,0x05, // REPORT_SIZE (5)
0x81,0x03, // Input (Constant)
0x05,0x01, // USAGE_PAGE (Generic Desktop)
0x09,0x30, // Usage (X)
0x09,0x31, // Usage (Y)
0x15,0x81, // LOGICAL_MINIMUM(-127)
0x25,0x7F, // LOGICAL_MAXIMUM(127)
0x75,0x08, // REPORT_SIZE (8)
0x95,0x02, // REPORT_COUNT (2)
0x81,0x06, // Input (Data, Variable, Relative)
0xC0, // END_COLLECTION
0xC0 // END_COLLECTION
};
The results:
1. Successfully installed as a HID mouse.
2. Running testApp, CreateFile Failed like this.
....looking for our HID device
Error: CreateFile failed: 5
Failure: Could not find our HID device
Please help me. I couldn't figure out why this happened.

rdpmc in user mode does not work even with PCE set

Based on the Wikipedia entry as well as the Intel manual, rdpmc should be available to user-mode processes as long as bit 8 of CR4 is set. However, I am still running into general protection error when trying to run rdpmc from userspace even with that bit set.
I am running on an 8-core Intel X3470 on kernel 2.6.32-279.el6.x86_64.
Here is the user-mode program I am trying to execute:
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
#include <sched.h>
#include <assert.h>
uint64_t
read_pmc(int ecx)
{
unsigned int a, d;
__asm __volatile("rdpmc" : "=a"(a), "=d"(d) : "c"(ecx));
return ((uint64_t)a) | (((uint64_t)d) << 32);
}
int main(int ac, char **av)
{
uint64_t start, end;
cpu_set_t cpuset;
unsigned int c;
int i;
if (ac != 3) {
fprintf(stderr, "usage: %s cpu-id pmc-num\n", av[0]);
exit(EXIT_FAILURE);
}
i = atoi(av[1]);
c = atoi(av[2]);
CPU_ZERO(&cpuset);
CPU_SET(i, &cpuset);
assert(sched_setaffinity(0, sizeof(cpuset), &cpuset) == 0);
printf("%lu\n", read_pmc(c));
return 0;
}
Here is the kernel module which sets the bit and reads out CR4 so I can manually verify that the bit has been set.
/*
* Enable PMC in user mode.
*/
#include <linux/module.h>
#include <linux/kernel.h>
int init_module(void)
{
typedef long unsigned int uint64_t;
uint64_t output;
// Set CR4, Bit 8 to enable PMC
__asm__("push %rax\n\t"
"mov %cr4,%rax;\n\t"
"or $(1 << 7),%rax;\n\t"
"mov %rax,%cr4;\n\t"
"wbinvd\n\t"
"pop %rax"
);
// Read back CR4 to check the bit.
__asm__("\t mov %%cr4,%0" : "=r"(output));
printk(KERN_INFO "%lu", output);
return 0;
}
void cleanup_module(void)
{
__asm__("push %rax\n\t"
"push %rbx\n\t"
"mov %cr4,%rax;\n\t"
"mov $(1 << 7), %rbx\n\t"
"not %rbx\n\t"
"and %rbx, %rax;\n\t"
"mov %rax,%cr4;\n\t"
"wbinvd\n\t"
"pop %rbx\n\t"
"pop %rax\n\t"
);
}
Apparently, when Intel says Bit 8, they are referring to the 9th bit from the right, since their indexing begins at 0. Replacing $(1 << 7) with $(1 << 8) globally resolves the issue, and allows rdpmc to be called from user mode.
Here is the updated kernel module, also using on_each_cpu to make sure that it is set on every core.
/*
* Read PMC in kernel mode.
*/
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
static void printc4(void) {
typedef long unsigned int uint64_t;
uint64_t output;
// Read back CR4 to check the bit.
__asm__("\t mov %%cr4,%0" : "=r"(output));
printk(KERN_INFO "%lu", output);
}
static void setc4b8(void * info) {
// Set CR4, Bit 8 (9th bit from the right) to enable
__asm__("push %rax\n\t"
"mov %cr4,%rax;\n\t"
"or $(1 << 8),%rax;\n\t"
"mov %rax,%cr4;\n\t"
"wbinvd\n\t"
"pop %rax"
);
// Check which CPU we are on:
printk(KERN_INFO "Ran on Processor %d", smp_processor_id());
printc4();
}
static void clearc4b8(void * info) {
printc4();
__asm__("push %rax\n\t"
"push %rbx\n\t"
"mov %cr4,%rax;\n\t"
"mov $(1 << 8), %rbx\n\t"
"not %rbx\n\t"
"and %rbx, %rax;\n\t"
"mov %rax,%cr4;\n\t"
"wbinvd\n\t"
"pop %rbx\n\t"
"pop %rax\n\t"
);
printk(KERN_INFO "Ran on Processor %d", smp_processor_id());
}
int init_module(void)
{
on_each_cpu(setc4b8, NULL, 0);
return 0;
}
void cleanup_module(void)
{
on_each_cpu(clearc4b8, NULL, 0);
}
Echoing "2" to /sys/bus/event_source/devices/cpu/rdpmc allows user processes
to access performance counters via the rdpmc instruction.
Note that behaviour has changed. Prior to 4.0 "1" meant "enabled"
while meant "0" disable. Now "1" means allow only for processes that have active perf events. More details: http://man7.org/linux/man-pages/man2/perf_event_open.2.html

Userland interrupt timer access such as via KeQueryInterruptTime (or similar)

Is there a "Nt" or similar (i.e. non-kernelmode-driver) function equivalent for KeQueryInterruptTime or anything similar? There seems to be no such thing as NtQueryInterruptTime, at least I've not found it.
What I want is some kind of reasonably accurate and reliable, monotonic timer (thus not QPC) which is reasonably efficient and doesn't have surprises as an overflowing 32-bit counter, and no unnecessary "smartness", no time zones, or complicated structures.
So ideally, I want something like timeGetTime with a 64 bit value. It doesn't even have to be the same timer.
There exists GetTickCount64 starting with Vista, which would be acceptable as such, but I'd not like to break XP support only for such a stupid reason.
Reading the quadword at 0x7FFE0008 as indicated here ... well, works ... and it proves that indeed the actual internal counter is 64 bits under XP (it's also as fast as it could possibly get), but meh... let's not talk about what a kind of nasty hack it is to read some unknown, hardcoded memory location.
There must certainly be something in between calling an artificially stupefied (scaling a 64 bit counter down to 32 bits) high-level API function and reading a raw memory address?
Here's an example of a thread-safe wrapper for GetTickCount() extending the tick count value to 64 bits and in that being equivalent to GetTickCount64().
To avoid undesired counter roll overs, make sure to call this function a few times every 49.7 days. You can even have a dedicated thread whose only purpose would be to call this function and then sleep some 20 days in an infinite loop.
ULONGLONG MyGetTickCount64(void)
{
static volatile LONGLONG Count = 0;
LONGLONG curCount1, curCount2;
LONGLONG tmp;
curCount1 = InterlockedCompareExchange64(&Count, 0, 0);
curCount2 = curCount1 & 0xFFFFFFFF00000000;
curCount2 |= GetTickCount();
if ((ULONG)curCount2 < (ULONG)curCount1)
{
curCount2 += 0x100000000;
}
tmp = InterlockedCompareExchange64(&Count, curCount2, curCount1);
if (tmp == curCount1)
{
return curCount2;
}
else
{
return tmp;
}
}
EDIT: And here's a complete application that tests MyGetTickCount64().
// Compiled with Open Watcom C 1.9: wcl386.exe /we /wx /q gettick.c
#include <windows.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
//
// The below code is an ugly implementation of InterlockedCompareExchange64()
// that is apparently missing in Open Watcom C 1.9.
// It must work with MSVC++ too, however.
//
UINT8 Cmpxchg8bData[] =
{
0x55, // push ebp
0x89, 0xE5, // mov ebp, esp
0x57, // push edi
0x51, // push ecx
0x53, // push ebx
0x8B, 0x7D, 0x10, // mov edi, [ebp + 0x10]
0x8B, 0x07, // mov eax, [edi]
0x8B, 0x57, 0x04, // mov edx, [edi + 0x4]
0x8B, 0x7D, 0x0C, // mov edi, [ebp + 0xc]
0x8B, 0x1F, // mov ebx, [edi]
0x8B, 0x4F, 0x04, // mov ecx, [edi + 0x4]
0x8B, 0x7D, 0x08, // mov edi, [ebp + 0x8]
0xF0, // lock:
0x0F, 0xC7, 0x0F, // cmpxchg8b [edi]
0x5B, // pop ebx
0x59, // pop ecx
0x5F, // pop edi
0x5D, // pop ebp
0xC3 // ret
};
LONGLONG (__cdecl *Cmpxchg8b)(LONGLONG volatile* Dest, LONGLONG* Exch, LONGLONG* Comp) =
(LONGLONG (__cdecl *)(LONGLONG volatile*, LONGLONG*, LONGLONG*))Cmpxchg8bData;
LONGLONG MyInterlockedCompareExchange64(LONGLONG volatile* Destination,
LONGLONG Exchange,
LONGLONG Comparand)
{
return Cmpxchg8b(Destination, &Exchange, &Comparand);
}
#ifdef InterlockedCompareExchange64
#undef InterlockedCompareExchange64
#endif
#define InterlockedCompareExchange64(Destination, Exchange, Comparand) \
MyInterlockedCompareExchange64(Destination, Exchange, Comparand)
//
// This stuff makes a thread-safe printf().
// We don't want characters output by one thread to be mixed
// with characters output by another. We want printf() to be
// "atomic".
// We use a critical section around vprintf() to achieve "atomicity".
//
static CRITICAL_SECTION PrintfCriticalSection;
int ts_printf(const char* Format, ...)
{
int count;
va_list ap;
EnterCriticalSection(&PrintfCriticalSection);
va_start(ap, Format);
count = vprintf(Format, ap);
va_end(ap);
LeaveCriticalSection(&PrintfCriticalSection);
return count;
}
#define TICK_COUNT_10MS_INCREMENT 0x800000
//
// This is the simulated tick counter.
// Its low 32 bits are going to be returned by
// our, simulated, GetTickCount().
//
// TICK_COUNT_10MS_INCREMENT is what the counter is
// incremented by every time. The value is so chosen
// that the counter quickly overflows in its
// low 32 bits.
//
static volatile LONGLONG SimulatedTickCount = 0;
//
// This is our simulated 32-bit GetTickCount()
// that returns a count that often overflows.
//
ULONG SimulatedGetTickCount(void)
{
return (ULONG)SimulatedTickCount;
}
//
// This thread function will increment the simulated tick counter
// whose value's low 32 bits we'll be reading in SimulatedGetTickCount().
//
DWORD WINAPI SimulatedTickThread(LPVOID lpParameter)
{
UNREFERENCED_PARAMETER(lpParameter);
for (;;)
{
LONGLONG c;
Sleep(10);
// Get the counter value, add TICK_COUNT_10MS_INCREMENT to it and
// store the result back.
c = InterlockedCompareExchange64(&SimulatedTickCount, 0, 0);
InterlockedCompareExchange64(&SimulatedTickCount, c + TICK_COUNT_10MS_INCREMENT, c) != c);
}
return 0;
}
volatile LONG CountOfObserved32bitOverflows = 0;
volatile LONG CountOfObservedUpdateRaces = 0;
//
// This prints statistics that includes the true 64-bit value of
// SimulatedTickCount that we can't get from SimulatedGetTickCount() as it
// returns only its lower 32 bits.
//
// The stats also include:
// - the number of times that MyGetTickCount64() observes an overflow of
// SimulatedGetTickCount()
// - the number of times MyGetTickCount64() fails to update its internal
// counter because of a concurrent update in another thread.
//
void PrintStats(void)
{
LONGLONG true64bitCounter = InterlockedCompareExchange64(&SimulatedTickCount, 0, 0);
ts_printf(" 0x%08X`%08X <- true 64-bit count; ovfs: ~%d; races: %d\n",
(ULONG)(true64bitCounter >> 32),
(ULONG)true64bitCounter,
CountOfObserved32bitOverflows,
CountOfObservedUpdateRaces);
}
//
// This is our poor man's implementation of GetTickCount64()
// on top of GetTickCount().
//
// It's thread safe.
//
// When used with actual GetTickCount() instead of SimulatedGetTickCount()
// it must be called at least a few times in 49.7 days to ensure that
// it doesn't miss any overflows in GetTickCount()'s return value.
//
ULONGLONG MyGetTickCount64(void)
{
static volatile LONGLONG Count = 0;
LONGLONG curCount1, curCount2;
LONGLONG tmp;
curCount1 = InterlockedCompareExchange64(&Count, 0, 0);
curCount2 = curCount1 & 0xFFFFFFFF00000000;
curCount2 |= SimulatedGetTickCount();
if ((ULONG)curCount2 < (ULONG)curCount1)
{
curCount2 += 0x100000000;
InterlockedIncrement(&CountOfObserved32bitOverflows);
}
tmp = InterlockedCompareExchange64(&Count, curCount2, curCount1);
if (tmp != curCount1)
{
curCount2 = tmp;
InterlockedIncrement(&CountOfObservedUpdateRaces);
}
return curCount2;
}
//
// This is an error counter. If a thread that uses MyGetTickCount64() notices
// any problem with what MyGetTickCount64() returns, it bumps up this error
// counter and stops. If one of threads sees a non-zero value in this
// counter due to an error in another thread, it stops as well.
//
volatile LONG Error = 0;
//
// This is a thread function that will be using MyGetTickCount64(),
// validating its return value and printing some stats once in a while.
//
// This function is meant to execute concurrently in multiple threads
// to create race conditions inside of MyGetTickCount64() and test it.
//
DWORD WINAPI TickUserThread(LPVOID lpParameter)
{
DWORD user = (DWORD)lpParameter; // thread number
ULONGLONG ticks[4];
ticks[3] = ticks[2] = ticks[1] = MyGetTickCount64();
while (!Error)
{
ticks[0] = ticks[1];
ticks[1] = MyGetTickCount64();
// Every ~100 ms sleep a little (slightly lowers CPU load, to about 90%)
if (ticks[1] > ticks[2] + TICK_COUNT_10MS_INCREMENT * 10L)
{
ticks[2] = ticks[1];
Sleep(1 + rand() % 20);
}
// Every ~1000 ms print the last value from MyGetTickCount64().
// Thread 1 also prints stats here.
if (ticks[1] > ticks[3] + TICK_COUNT_10MS_INCREMENT * 100L)
{
ticks[3] = ticks[1];
ts_printf("%u:0x%08X`%08X\n", user, (ULONG)(ticks[1] >> 32), (ULONG)ticks[1]);
if (user == 1)
{
PrintStats();
}
}
if (ticks[0] > ticks[1])
{
ts_printf("%u:Non-monotonic tick counts: 0x%016llX > 0x%016llX!\n",
user,
ticks[0],
ticks[1]);
PrintStats();
InterlockedIncrement(&Error);
return -1;
}
else if (ticks[0] + 0x100000000 <= ticks[1])
{
ts_printf("%u:Too big tick count jump: 0x%016llX -> 0x%016llX!\n",
user,
ticks[0],
ticks[1]);
PrintStats();
InterlockedIncrement(&Error);
return -1;
}
Sleep(0); // be nice, yield to other threads.
}
return 0;
}
//
// This prints stats upon Ctrl+C and terminates the program.
//
BOOL WINAPI ConsoleEventHandler(DWORD Event)
{
if (Event == CTRL_C_EVENT)
{
PrintStats();
}
return FALSE;
}
int main(void)
{
HANDLE simulatedTickThreadHandle;
HANDLE tickUserThreadHandle;
DWORD dummy;
// This is for the missing InterlockedCompareExchange64() workaround.
VirtualProtect(Cmpxchg8bData, sizeof(Cmpxchg8bData), PAGE_EXECUTE_READWRITE, &dummy);
InitializeCriticalSection(&PrintfCriticalSection);
if (!SetConsoleCtrlHandler(&ConsoleEventHandler, TRUE))
{
ts_printf("SetConsoleCtrlHandler(&ConsoleEventHandler) failed with error 0x%X\n", GetLastError());
return -1;
}
// Start the tick simulator thread.
simulatedTickThreadHandle = CreateThread(NULL, 0, &SimulatedTickThread, NULL, 0, NULL);
if (simulatedTickThreadHandle == NULL)
{
ts_printf("CreateThread(&SimulatedTickThread) failed with error 0x%X\n", GetLastError());
return -1;
}
// Start one thread that'll be using MyGetTickCount64().
tickUserThreadHandle = CreateThread(NULL, 0, &TickUserThread, (LPVOID)2, 0, NULL);
if (tickUserThreadHandle == NULL)
{
ts_printf("CreateThread(&TickUserThread) failed with error 0x%X\n", GetLastError());
return -1;
}
// The other thread using MyGetTickCount64() will be the main thread.
TickUserThread((LPVOID)1);
//
// The app terminates upon any error condition detected in TickUserThread()
// in any of the threads or by Ctrl+C.
//
return 0;
}
As a test I've been running this test app under Windows XP for 5+ hours on an otherwise idle machine that has 2 CPUs (idle, to avoid potential long starvation times and therefore avoid missing counter overflows that occur every 5 seconds) and it's still doing well.
Here's the latest output from the console:
2:0x00000E1B`C8800000
1:0x00000E1B`FA800000
0x00000E1B`FA800000 <- true 64-bit count; ovfs: ~3824; races: 110858
As you can see, MyGetTickCount64() has observed 3824 32-bit overflows and failed to update the value of Count with its second InterlockedCompareExchange64() 110858 times. So, overflows indeed occur and the last number means that the variable is, in fact, being concurrently updated by the two threads.
You can also see that the 64-bit tick counts that the two threads receive from MyGetTickCount64() in TickUserThread() don't have anything missing in the top 32 bits and are pretty close to the actual 64-bit tick count in SimulatedTickCount, whose 32 low bits are returned by SimulatedGetTickCount(). 0x00000E1BC8800000 is visually behind 0x00000E1BFA800000 due to thread scheduling and infrequent stat prints, it's behind by exactly 100*TICK_COUNT_10MS_INCREMENT, or 1 second. Internally, of course, the difference is much smaller.
Now, on availability of InterlockedCompareExchange64()... It's a bit odd that it's officially available since Windows Vista and Windows Server 2003. Server 2003 is in fact build from the same code base as Windows XP.
But the most important thing here is that this function is built on top of the Pentium CMPXCHG8B instruction that's been available since 1998 or earlier (1), (2). And I can see this instruction in my Windows XP's (SP3) binaries. It's in ntkrnlpa.exe/ntoskrnl.exe (the kernel) and ntdll.dll (the DLL that exports kernel's NtXxxx() functions that everything's built upon). Look for a byte sequence of 0xF0, 0x0F, 0xC7 and disassemble the code around that place to see that these bytes aren't there coincidentally.
You can check availability of this instruction through the CPUID instruction (EDX bit 8 of CPUID function 0x00000001 and function 0x80000001) and refuse to run instead of crashing if the instruction isn't there, but these days you're unlikely to find a machine that doesn't support this instruction. If you do, it won't be a good machine for Windows XP and probably your application as well anyways.
Thanks to Google Books which kindly offered the relevant literature for free, I came up with an easy and fast implementation of GetTickCount64 which works perfectly well on pre-Vista systems too (and it still is somewhat less nasty than reading a value from a hardcoded memory address).
It is in fact as easy as calling interrupt 0x2A, which maps to KiGetTickCount. In GCC inline assembly, this gives:
static __inline__ __attribute__((always_inline)) unsigned long long get_tick_count64()
{
unsigned long long ret;
__asm__ __volatile__ ("int $0x2a" : "=A"(ret) : : );
return ret;
}
Due to the way KiGetTickCount works, the function should probably better be called GetTickCount46, as it performs a right shift by 18, returning 46 bits, not 64. Though the same is true for the original Vista version, too.
Note that KiGetTickCount clobbers edx, this is relevant if you plan to implement your own faster implementation of the 32-bit version (must add edx to the clobber list in that case!).
Here's another approach, a variant of Alex's wrapper but using only 32-bit interlocks. It only actually returns a 60-bit number, but that's still good for about thirty-six million years. :-)
It does need to be called more often, at least once every three days. That shouldn't normally be a major drawback.
ULONGLONG MyTickCount64(void)
{
static volatile DWORD count = 0xFFFFFFFF;
DWORD previous_count, current_tick32, previous_count_zone, current_tick32_zone;
ULONGLONG current_tick64;
previous_count = InterlockedCompareExchange(&count, 0, 0);
current_tick32 = GetTickCount();
if (previous_count == 0xFFFFFFFF)
{
// count has never been written
DWORD initial_count;
initial_count = current_tick32 >> 28;
previous_count = InterlockedCompareExchange(&count, initial_count, 0xFFFFFFFF);
if (previous_count == 0xFFFFFFFF)
{ // This thread wrote the initial value for count
previous_count = initial_count;
}
else if (previous_count != initial_count)
{ // Another thread wrote the initial value for count,
// and it differs from the one we calculated
current_tick32 = GetTickCount();
}
}
previous_count_zone = previous_count & 15;
current_tick32_zone = current_tick32 >> 28;
if (current_tick32_zone == previous_count_zone)
{
// The top four bits of the 32-bit tick count haven't changed since count was last written.
current_tick64 = previous_count;
current_tick64 <<= 28;
current_tick64 += current_tick32 & 0x0FFFFFFF;
return current_tick64;
}
if (current_tick32_zone == previous_count_zone + 1 || (current_tick32_zone == 0 && previous_count_zone == 15))
{
// The top four bits of the 32-bit tick count have been incremented since count was last written.
InterlockedCompareExchange(&count, previous_count + 1, previous_count);
current_tick64 = previous_count + 1;
current_tick64 <<= 28;
current_tick64 += current_tick32 & 0x0FFFFFFF;
return current_tick64;
}
// Oops, we weren't called often enough, we're stuck
return 0xFFFFFFFF;
}

Resources